tech-hub-skills 1.5.1 → 1.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/LICENSE +21 -21
- package/.claude/README.md +291 -291
- package/.claude/bin/cli.js +266 -266
- package/.claude/bin/copilot.js +182 -182
- package/.claude/bin/postinstall.js +42 -42
- package/.claude/commands/README.md +336 -336
- package/.claude/commands/ai-engineer.md +104 -104
- package/.claude/commands/aws.md +143 -143
- package/.claude/commands/azure.md +149 -149
- package/.claude/commands/backend-developer.md +108 -108
- package/.claude/commands/code-review.md +399 -399
- package/.claude/commands/compliance-automation.md +747 -747
- package/.claude/commands/compliance-officer.md +108 -108
- package/.claude/commands/data-engineer.md +113 -113
- package/.claude/commands/data-governance.md +102 -102
- package/.claude/commands/data-scientist.md +123 -123
- package/.claude/commands/database-admin.md +109 -109
- package/.claude/commands/devops.md +160 -160
- package/.claude/commands/docker.md +160 -160
- package/.claude/commands/enterprise-dashboard.md +613 -613
- package/.claude/commands/finops.md +184 -184
- package/.claude/commands/frontend-developer.md +108 -108
- package/.claude/commands/gcp.md +143 -143
- package/.claude/commands/ml-engineer.md +115 -115
- package/.claude/commands/mlops.md +187 -187
- package/.claude/commands/network-engineer.md +109 -109
- package/.claude/commands/optimization-advisor.md +329 -329
- package/.claude/commands/orchestrator.md +623 -623
- package/.claude/commands/platform-engineer.md +102 -102
- package/.claude/commands/process-automation.md +226 -226
- package/.claude/commands/process-changelog.md +184 -184
- package/.claude/commands/process-documentation.md +484 -484
- package/.claude/commands/process-kanban.md +324 -324
- package/.claude/commands/process-versioning.md +214 -214
- package/.claude/commands/product-designer.md +104 -104
- package/.claude/commands/project-starter.md +443 -443
- package/.claude/commands/qa-engineer.md +109 -109
- package/.claude/commands/security-architect.md +135 -135
- package/.claude/commands/sre.md +109 -109
- package/.claude/commands/system-design.md +126 -126
- package/.claude/commands/technical-writer.md +101 -101
- package/.claude/package.json +46 -46
- package/.claude/roles/ai-engineer/skills/01-prompt-engineering/README.md +252 -252
- package/.claude/roles/ai-engineer/skills/01-prompt-engineering/prompt_ab_tester.py +356 -356
- package/.claude/roles/ai-engineer/skills/01-prompt-engineering/prompt_template_manager.py +274 -274
- package/.claude/roles/ai-engineer/skills/01-prompt-engineering/token_cost_estimator.py +324 -324
- package/.claude/roles/ai-engineer/skills/02-rag-pipeline/README.md +448 -448
- package/.claude/roles/ai-engineer/skills/02-rag-pipeline/document_chunker.py +336 -336
- package/.claude/roles/ai-engineer/skills/02-rag-pipeline/rag_pipeline.sql +213 -213
- package/.claude/roles/ai-engineer/skills/03-agent-orchestration/README.md +599 -599
- package/.claude/roles/ai-engineer/skills/04-llm-guardrails/README.md +735 -735
- package/.claude/roles/ai-engineer/skills/05-vector-embeddings/README.md +711 -711
- package/.claude/roles/ai-engineer/skills/06-llm-evaluation/README.md +777 -777
- package/.claude/roles/azure/skills/01-infrastructure-fundamentals/README.md +264 -264
- package/.claude/roles/azure/skills/02-data-factory/README.md +264 -264
- package/.claude/roles/azure/skills/03-synapse-analytics/README.md +264 -264
- package/.claude/roles/azure/skills/04-databricks/README.md +264 -264
- package/.claude/roles/azure/skills/05-functions/README.md +264 -264
- package/.claude/roles/azure/skills/06-kubernetes-service/README.md +264 -264
- package/.claude/roles/azure/skills/07-openai-service/README.md +264 -264
- package/.claude/roles/azure/skills/08-machine-learning/README.md +264 -264
- package/.claude/roles/azure/skills/09-storage-adls/README.md +264 -264
- package/.claude/roles/azure/skills/10-networking/README.md +264 -264
- package/.claude/roles/azure/skills/11-sql-cosmos/README.md +264 -264
- package/.claude/roles/azure/skills/12-event-hubs/README.md +264 -264
- package/.claude/roles/code-review/skills/01-automated-code-review/README.md +394 -394
- package/.claude/roles/code-review/skills/02-pr-review-workflow/README.md +427 -427
- package/.claude/roles/code-review/skills/03-code-quality-gates/README.md +518 -518
- package/.claude/roles/code-review/skills/04-reviewer-assignment/README.md +504 -504
- package/.claude/roles/code-review/skills/05-review-analytics/README.md +540 -540
- package/.claude/roles/data-engineer/skills/01-lakehouse-architecture/README.md +550 -550
- package/.claude/roles/data-engineer/skills/01-lakehouse-architecture/bronze_ingestion.py +337 -337
- package/.claude/roles/data-engineer/skills/01-lakehouse-architecture/medallion_queries.sql +300 -300
- package/.claude/roles/data-engineer/skills/02-etl-pipeline/README.md +580 -580
- package/.claude/roles/data-engineer/skills/03-data-quality/README.md +579 -579
- package/.claude/roles/data-engineer/skills/04-streaming-pipelines/README.md +608 -608
- package/.claude/roles/data-engineer/skills/05-performance-optimization/README.md +547 -547
- package/.claude/roles/data-governance/skills/01-data-catalog/README.md +112 -112
- package/.claude/roles/data-governance/skills/02-data-lineage/README.md +129 -129
- package/.claude/roles/data-governance/skills/03-data-quality-framework/README.md +182 -182
- package/.claude/roles/data-governance/skills/04-access-control/README.md +39 -39
- package/.claude/roles/data-governance/skills/05-master-data-management/README.md +40 -40
- package/.claude/roles/data-governance/skills/06-compliance-privacy/README.md +46 -46
- package/.claude/roles/data-scientist/skills/01-eda-automation/README.md +230 -230
- package/.claude/roles/data-scientist/skills/01-eda-automation/eda_generator.py +446 -446
- package/.claude/roles/data-scientist/skills/02-statistical-modeling/README.md +264 -264
- package/.claude/roles/data-scientist/skills/03-feature-engineering/README.md +264 -264
- package/.claude/roles/data-scientist/skills/04-predictive-modeling/README.md +264 -264
- package/.claude/roles/data-scientist/skills/05-customer-analytics/README.md +264 -264
- package/.claude/roles/data-scientist/skills/06-campaign-analysis/README.md +264 -264
- package/.claude/roles/data-scientist/skills/07-experimentation/README.md +264 -264
- package/.claude/roles/data-scientist/skills/08-data-visualization/README.md +264 -264
- package/.claude/roles/devops/skills/01-cicd-pipeline/README.md +264 -264
- package/.claude/roles/devops/skills/02-container-orchestration/README.md +264 -264
- package/.claude/roles/devops/skills/03-infrastructure-as-code/README.md +264 -264
- package/.claude/roles/devops/skills/04-gitops/README.md +264 -264
- package/.claude/roles/devops/skills/05-environment-management/README.md +264 -264
- package/.claude/roles/devops/skills/06-automated-testing/README.md +264 -264
- package/.claude/roles/devops/skills/07-release-management/README.md +264 -264
- package/.claude/roles/devops/skills/08-monitoring-alerting/README.md +264 -264
- package/.claude/roles/devops/skills/09-devsecops/README.md +265 -265
- package/.claude/roles/finops/skills/01-cost-visibility/README.md +264 -264
- package/.claude/roles/finops/skills/02-resource-tagging/README.md +264 -264
- package/.claude/roles/finops/skills/03-budget-management/README.md +264 -264
- package/.claude/roles/finops/skills/04-reserved-instances/README.md +264 -264
- package/.claude/roles/finops/skills/05-spot-optimization/README.md +264 -264
- package/.claude/roles/finops/skills/06-storage-tiering/README.md +264 -264
- package/.claude/roles/finops/skills/07-compute-rightsizing/README.md +264 -264
- package/.claude/roles/finops/skills/08-chargeback/README.md +264 -264
- package/.claude/roles/ml-engineer/skills/01-mlops-pipeline/README.md +566 -566
- package/.claude/roles/ml-engineer/skills/02-feature-engineering/README.md +655 -655
- package/.claude/roles/ml-engineer/skills/03-model-training/README.md +704 -704
- package/.claude/roles/ml-engineer/skills/04-model-serving/README.md +845 -845
- package/.claude/roles/ml-engineer/skills/05-model-monitoring/README.md +874 -874
- package/.claude/roles/mlops/skills/01-ml-pipeline-orchestration/README.md +264 -264
- package/.claude/roles/mlops/skills/02-experiment-tracking/README.md +264 -264
- package/.claude/roles/mlops/skills/03-model-registry/README.md +264 -264
- package/.claude/roles/mlops/skills/04-feature-store/README.md +264 -264
- package/.claude/roles/mlops/skills/05-model-deployment/README.md +264 -264
- package/.claude/roles/mlops/skills/06-model-observability/README.md +264 -264
- package/.claude/roles/mlops/skills/07-data-versioning/README.md +264 -264
- package/.claude/roles/mlops/skills/08-ab-testing/README.md +264 -264
- package/.claude/roles/mlops/skills/09-automated-retraining/README.md +264 -264
- package/.claude/roles/platform-engineer/skills/01-internal-developer-platform/README.md +153 -153
- package/.claude/roles/platform-engineer/skills/02-self-service-infrastructure/README.md +57 -57
- package/.claude/roles/platform-engineer/skills/03-slo-sli-management/README.md +59 -59
- package/.claude/roles/platform-engineer/skills/04-developer-experience/README.md +57 -57
- package/.claude/roles/platform-engineer/skills/05-incident-management/README.md +73 -73
- package/.claude/roles/platform-engineer/skills/06-capacity-management/README.md +59 -59
- package/.claude/roles/product-designer/skills/01-requirements-discovery/README.md +407 -407
- package/.claude/roles/product-designer/skills/02-user-research/README.md +382 -382
- package/.claude/roles/product-designer/skills/03-brainstorming-ideation/README.md +437 -437
- package/.claude/roles/product-designer/skills/04-ux-design/README.md +496 -496
- package/.claude/roles/product-designer/skills/05-product-market-fit/README.md +376 -376
- package/.claude/roles/product-designer/skills/06-stakeholder-management/README.md +412 -412
- package/.claude/roles/security-architect/skills/01-pii-detection/README.md +319 -319
- package/.claude/roles/security-architect/skills/02-threat-modeling/README.md +264 -264
- package/.claude/roles/security-architect/skills/03-infrastructure-security/README.md +264 -264
- package/.claude/roles/security-architect/skills/04-iam/README.md +264 -264
- package/.claude/roles/security-architect/skills/05-application-security/README.md +264 -264
- package/.claude/roles/security-architect/skills/06-secrets-management/README.md +264 -264
- package/.claude/roles/security-architect/skills/07-security-monitoring/README.md +264 -264
- package/.claude/roles/system-design/skills/01-architecture-patterns/README.md +337 -337
- package/.claude/roles/system-design/skills/02-requirements-engineering/README.md +264 -264
- package/.claude/roles/system-design/skills/03-scalability/README.md +264 -264
- package/.claude/roles/system-design/skills/04-high-availability/README.md +264 -264
- package/.claude/roles/system-design/skills/05-cost-optimization-design/README.md +264 -264
- package/.claude/roles/system-design/skills/06-api-design/README.md +264 -264
- package/.claude/roles/system-design/skills/07-observability-architecture/README.md +264 -264
- package/.claude/roles/system-design/skills/08-process-automation/PROCESS_TEMPLATE.md +336 -336
- package/.claude/roles/system-design/skills/08-process-automation/README.md +521 -521
- package/.claude/roles/system-design/skills/08-process-automation/ai_prompt_generator.py +744 -744
- package/.claude/roles/system-design/skills/08-process-automation/automation_recommender.py +688 -688
- package/.claude/roles/system-design/skills/08-process-automation/plan_generator.py +679 -679
- package/.claude/roles/system-design/skills/08-process-automation/process_analyzer.py +528 -528
- package/.claude/roles/system-design/skills/08-process-automation/process_parser.py +684 -684
- package/.claude/roles/system-design/skills/08-process-automation/role_matcher.py +615 -615
- package/.claude/skills/README.md +336 -336
- package/.claude/skills/ai-engineer.md +104 -104
- package/.claude/skills/aws.md +143 -143
- package/.claude/skills/azure.md +149 -149
- package/.claude/skills/backend-developer.md +108 -108
- package/.claude/skills/code-review.md +399 -399
- package/.claude/skills/compliance-automation.md +747 -747
- package/.claude/skills/compliance-officer.md +108 -108
- package/.claude/skills/data-engineer.md +113 -113
- package/.claude/skills/data-governance.md +102 -102
- package/.claude/skills/data-scientist.md +123 -123
- package/.claude/skills/database-admin.md +109 -109
- package/.claude/skills/devops.md +160 -160
- package/.claude/skills/docker.md +160 -160
- package/.claude/skills/enterprise-dashboard.md +613 -613
- package/.claude/skills/finops.md +184 -184
- package/.claude/skills/frontend-developer.md +108 -108
- package/.claude/skills/gcp.md +143 -143
- package/.claude/skills/ml-engineer.md +115 -115
- package/.claude/skills/mlops.md +187 -187
- package/.claude/skills/network-engineer.md +109 -109
- package/.claude/skills/optimization-advisor.md +329 -329
- package/.claude/skills/orchestrator.md +623 -623
- package/.claude/skills/platform-engineer.md +102 -102
- package/.claude/skills/process-automation.md +226 -226
- package/.claude/skills/process-changelog.md +184 -184
- package/.claude/skills/process-documentation.md +484 -484
- package/.claude/skills/process-kanban.md +324 -324
- package/.claude/skills/process-versioning.md +214 -214
- package/.claude/skills/product-designer.md +104 -104
- package/.claude/skills/project-starter.md +443 -443
- package/.claude/skills/qa-engineer.md +109 -109
- package/.claude/skills/security-architect.md +135 -135
- package/.claude/skills/sre.md +109 -109
- package/.claude/skills/system-design.md +126 -126
- package/.claude/skills/technical-writer.md +101 -101
- package/.gitattributes +2 -2
- package/GITHUB_COPILOT.md +106 -106
- package/README.md +192 -184
- package/package.json +16 -8
|
@@ -1,213 +1,213 @@
|
|
|
1
|
-
-- RAG Pipeline Analytics Queries
|
|
2
|
-
-- Track knowledge base usage, query patterns, and performance
|
|
3
|
-
|
|
4
|
-
-- ================================================================
|
|
5
|
-
-- 1. KNOWLEDGE BASE INVENTORY
|
|
6
|
-
-- ================================================================
|
|
7
|
-
|
|
8
|
-
-- Count documents by source
|
|
9
|
-
SELECT
|
|
10
|
-
source_type,
|
|
11
|
-
COUNT(*) as document_count,
|
|
12
|
-
SUM(chunk_count) as total_chunks,
|
|
13
|
-
AVG(chunk_count) as avg_chunks_per_doc,
|
|
14
|
-
MAX(last_updated) as latest_update
|
|
15
|
-
FROM knowledge_base_documents
|
|
16
|
-
GROUP BY source_type
|
|
17
|
-
ORDER BY document_count DESC;
|
|
18
|
-
|
|
19
|
-
-- ================================================================
|
|
20
|
-
-- 2. QUERY ANALYTICS
|
|
21
|
-
-- ================================================================
|
|
22
|
-
|
|
23
|
-
-- Top queries by frequency (last 30 days)
|
|
24
|
-
SELECT
|
|
25
|
-
query_text,
|
|
26
|
-
COUNT(*) as query_count,
|
|
27
|
-
AVG(latency_ms) as avg_latency_ms,
|
|
28
|
-
AVG(relevance_score) as avg_relevance,
|
|
29
|
-
COUNT(DISTINCT user_id) as unique_users
|
|
30
|
-
FROM rag_query_log
|
|
31
|
-
WHERE query_timestamp >= CURRENT_DATE - INTERVAL '30 days'
|
|
32
|
-
GROUP BY query_text
|
|
33
|
-
HAVING COUNT(*) > 5
|
|
34
|
-
ORDER BY query_count DESC
|
|
35
|
-
LIMIT 20;
|
|
36
|
-
|
|
37
|
-
-- ================================================================
|
|
38
|
-
-- 3. RETRIEVAL PERFORMANCE
|
|
39
|
-
-- ================================================================
|
|
40
|
-
|
|
41
|
-
-- Retrieval performance by top_k setting
|
|
42
|
-
SELECT
|
|
43
|
-
top_k,
|
|
44
|
-
COUNT(*) as query_count,
|
|
45
|
-
AVG(latency_ms) as avg_latency_ms,
|
|
46
|
-
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY latency_ms) as p50_latency,
|
|
47
|
-
PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY latency_ms) as p95_latency,
|
|
48
|
-
PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY latency_ms) as p99_latency,
|
|
49
|
-
AVG(relevance_score) as avg_relevance_score
|
|
50
|
-
FROM rag_query_log
|
|
51
|
-
WHERE query_timestamp >= CURRENT_DATE - INTERVAL '7 days'
|
|
52
|
-
GROUP BY top_k
|
|
53
|
-
ORDER BY top_k;
|
|
54
|
-
|
|
55
|
-
-- ================================================================
|
|
56
|
-
-- 4. SOURCE ATTRIBUTION
|
|
57
|
-
-- ================================================================
|
|
58
|
-
|
|
59
|
-
-- Which documents are most frequently retrieved?
|
|
60
|
-
SELECT
|
|
61
|
-
d.document_id,
|
|
62
|
-
d.title,
|
|
63
|
-
d.source_type,
|
|
64
|
-
COUNT(*) as retrieval_count,
|
|
65
|
-
AVG(r.relevance_score) as avg_relevance,
|
|
66
|
-
MAX(r.query_timestamp) as last_retrieved
|
|
67
|
-
FROM rag_retrievals r
|
|
68
|
-
JOIN knowledge_base_documents d ON r.document_id = d.document_id
|
|
69
|
-
WHERE r.query_timestamp >= CURRENT_DATE - INTERVAL '30 days'
|
|
70
|
-
GROUP BY d.document_id, d.title, d.source_type
|
|
71
|
-
ORDER BY retrieval_count DESC
|
|
72
|
-
LIMIT 50;
|
|
73
|
-
|
|
74
|
-
-- ================================================================
|
|
75
|
-
-- 5. USER ENGAGEMENT
|
|
76
|
-
-- ================================================================
|
|
77
|
-
|
|
78
|
-
-- User engagement with RAG system
|
|
79
|
-
SELECT
|
|
80
|
-
DATE_TRUNC('day', query_timestamp) as query_date,
|
|
81
|
-
COUNT(DISTINCT user_id) as unique_users,
|
|
82
|
-
COUNT(*) as total_queries,
|
|
83
|
-
COUNT(*) / COUNT(DISTINCT user_id) as queries_per_user,
|
|
84
|
-
AVG(relevance_score) as avg_relevance
|
|
85
|
-
FROM rag_query_log
|
|
86
|
-
WHERE query_timestamp >= CURRENT_DATE - INTERVAL '90 days'
|
|
87
|
-
GROUP BY DATE_TRUNC('day', query_timestamp)
|
|
88
|
-
ORDER BY query_date DESC;
|
|
89
|
-
|
|
90
|
-
-- ================================================================
|
|
91
|
-
-- 6. CHUNK PERFORMANCE
|
|
92
|
-
-- ================================================================
|
|
93
|
-
|
|
94
|
-
-- Which chunk size performs best?
|
|
95
|
-
SELECT
|
|
96
|
-
c.chunk_size_range,
|
|
97
|
-
COUNT(DISTINCT r.query_id) as query_count,
|
|
98
|
-
AVG(r.relevance_score) as avg_relevance,
|
|
99
|
-
AVG(r.rank_position) as avg_rank
|
|
100
|
-
FROM rag_retrievals r
|
|
101
|
-
JOIN knowledge_base_chunks c ON r.chunk_id = c.chunk_id
|
|
102
|
-
WHERE r.query_timestamp >= CURRENT_DATE - INTERVAL '30 days'
|
|
103
|
-
GROUP BY c.chunk_size_range
|
|
104
|
-
ORDER BY avg_relevance DESC;
|
|
105
|
-
|
|
106
|
-
-- ================================================================
|
|
107
|
-
-- 7. FAILED QUERIES
|
|
108
|
-
-- ================================================================
|
|
109
|
-
|
|
110
|
-
-- Queries with low relevance (need improvement)
|
|
111
|
-
SELECT
|
|
112
|
-
query_text,
|
|
113
|
-
COUNT(*) as failure_count,
|
|
114
|
-
AVG(relevance_score) as avg_relevance,
|
|
115
|
-
MIN(relevance_score) as min_relevance,
|
|
116
|
-
MAX(query_timestamp) as last_failed
|
|
117
|
-
FROM rag_query_log
|
|
118
|
-
WHERE relevance_score < 0.5
|
|
119
|
-
AND query_timestamp >= CURRENT_DATE - INTERVAL '7 days'
|
|
120
|
-
GROUP BY query_text
|
|
121
|
-
HAVING COUNT(*) > 2
|
|
122
|
-
ORDER BY failure_count DESC
|
|
123
|
-
LIMIT 30;
|
|
124
|
-
|
|
125
|
-
-- ================================================================
|
|
126
|
-
-- 8. EMBEDDING MODEL PERFORMANCE
|
|
127
|
-
-- ================================================================
|
|
128
|
-
|
|
129
|
-
-- Compare performance across embedding models
|
|
130
|
-
SELECT
|
|
131
|
-
embedding_model,
|
|
132
|
-
COUNT(*) as query_count,
|
|
133
|
-
AVG(embedding_latency_ms) as avg_embedding_latency,
|
|
134
|
-
AVG(retrieval_latency_ms) as avg_retrieval_latency,
|
|
135
|
-
AVG(relevance_score) as avg_relevance
|
|
136
|
-
FROM rag_query_log
|
|
137
|
-
WHERE query_timestamp >= CURRENT_DATE - INTERVAL '30 days'
|
|
138
|
-
GROUP BY embedding_model
|
|
139
|
-
ORDER BY avg_relevance DESC;
|
|
140
|
-
|
|
141
|
-
-- ================================================================
|
|
142
|
-
-- 9. KNOWLEDGE GAPS
|
|
143
|
-
-- ================================================================
|
|
144
|
-
|
|
145
|
-
-- Identify topics with no good answers
|
|
146
|
-
WITH poor_coverage AS (
|
|
147
|
-
SELECT
|
|
148
|
-
query_text,
|
|
149
|
-
COUNT(*) as frequency,
|
|
150
|
-
AVG(relevance_score) as avg_relevance
|
|
151
|
-
FROM rag_query_log
|
|
152
|
-
WHERE query_timestamp >= CURRENT_DATE - INTERVAL '30 days'
|
|
153
|
-
GROUP BY query_text
|
|
154
|
-
HAVING AVG(relevance_score) < 0.6 AND COUNT(*) > 3
|
|
155
|
-
)
|
|
156
|
-
SELECT
|
|
157
|
-
query_text,
|
|
158
|
-
frequency,
|
|
159
|
-
avg_relevance,
|
|
160
|
-
'Add documentation' as recommendation
|
|
161
|
-
FROM poor_coverage
|
|
162
|
-
ORDER BY frequency DESC;
|
|
163
|
-
|
|
164
|
-
-- ================================================================
|
|
165
|
-
-- 10. RAG PIPELINE HEALTH
|
|
166
|
-
-- ================================================================
|
|
167
|
-
|
|
168
|
-
-- Daily RAG pipeline health metrics
|
|
169
|
-
SELECT
|
|
170
|
-
DATE(query_timestamp) as date,
|
|
171
|
-
COUNT(*) as total_queries,
|
|
172
|
-
AVG(total_latency_ms) as avg_latency_ms,
|
|
173
|
-
AVG(relevance_score) as avg_relevance,
|
|
174
|
-
PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY total_latency_ms) as p95_latency,
|
|
175
|
-
COUNT(CASE WHEN total_latency_ms > 1000 THEN 1 END) as slow_queries,
|
|
176
|
-
COUNT(CASE WHEN relevance_score < 0.5 THEN 1 END) as low_relevance_queries
|
|
177
|
-
FROM rag_query_log
|
|
178
|
-
WHERE query_timestamp >= CURRENT_DATE - INTERVAL '14 days'
|
|
179
|
-
GROUP BY DATE(query_timestamp)
|
|
180
|
-
ORDER BY date DESC;
|
|
181
|
-
|
|
182
|
-
-- ================================================================
|
|
183
|
-
-- 11. VECTOR DATABASE STATISTICS
|
|
184
|
-
-- ================================================================
|
|
185
|
-
|
|
186
|
-
-- Vector database usage statistics
|
|
187
|
-
SELECT
|
|
188
|
-
collection_name,
|
|
189
|
-
COUNT(DISTINCT vector_id) as total_vectors,
|
|
190
|
-
AVG(vector_dimension) as avg_dimension,
|
|
191
|
-
MAX(last_updated) as last_updated,
|
|
192
|
-
SUM(storage_bytes) / (1024*1024) as storage_mb
|
|
193
|
-
FROM vector_database_collections
|
|
194
|
-
GROUP BY collection_name
|
|
195
|
-
ORDER BY total_vectors DESC;
|
|
196
|
-
|
|
197
|
-
-- ================================================================
|
|
198
|
-
-- 12. COST TRACKING
|
|
199
|
-
-- ================================================================
|
|
200
|
-
|
|
201
|
-
-- Estimated costs by provider
|
|
202
|
-
SELECT
|
|
203
|
-
DATE(query_timestamp) as date,
|
|
204
|
-
llm_provider,
|
|
205
|
-
COUNT(*) as query_count,
|
|
206
|
-
SUM(input_tokens) as total_input_tokens,
|
|
207
|
-
SUM(output_tokens) as total_output_tokens,
|
|
208
|
-
SUM(estimated_cost) as total_cost,
|
|
209
|
-
AVG(estimated_cost) as avg_cost_per_query
|
|
210
|
-
FROM rag_query_log
|
|
211
|
-
WHERE query_timestamp >= CURRENT_DATE - INTERVAL '30 days'
|
|
212
|
-
GROUP BY DATE(query_timestamp), llm_provider
|
|
213
|
-
ORDER BY date DESC, total_cost DESC;
|
|
1
|
+
-- RAG Pipeline Analytics Queries
|
|
2
|
+
-- Track knowledge base usage, query patterns, and performance
|
|
3
|
+
|
|
4
|
+
-- ================================================================
|
|
5
|
+
-- 1. KNOWLEDGE BASE INVENTORY
|
|
6
|
+
-- ================================================================
|
|
7
|
+
|
|
8
|
+
-- Count documents by source
|
|
9
|
+
SELECT
|
|
10
|
+
source_type,
|
|
11
|
+
COUNT(*) as document_count,
|
|
12
|
+
SUM(chunk_count) as total_chunks,
|
|
13
|
+
AVG(chunk_count) as avg_chunks_per_doc,
|
|
14
|
+
MAX(last_updated) as latest_update
|
|
15
|
+
FROM knowledge_base_documents
|
|
16
|
+
GROUP BY source_type
|
|
17
|
+
ORDER BY document_count DESC;
|
|
18
|
+
|
|
19
|
+
-- ================================================================
|
|
20
|
+
-- 2. QUERY ANALYTICS
|
|
21
|
+
-- ================================================================
|
|
22
|
+
|
|
23
|
+
-- Top queries by frequency (last 30 days)
|
|
24
|
+
SELECT
|
|
25
|
+
query_text,
|
|
26
|
+
COUNT(*) as query_count,
|
|
27
|
+
AVG(latency_ms) as avg_latency_ms,
|
|
28
|
+
AVG(relevance_score) as avg_relevance,
|
|
29
|
+
COUNT(DISTINCT user_id) as unique_users
|
|
30
|
+
FROM rag_query_log
|
|
31
|
+
WHERE query_timestamp >= CURRENT_DATE - INTERVAL '30 days'
|
|
32
|
+
GROUP BY query_text
|
|
33
|
+
HAVING COUNT(*) > 5
|
|
34
|
+
ORDER BY query_count DESC
|
|
35
|
+
LIMIT 20;
|
|
36
|
+
|
|
37
|
+
-- ================================================================
|
|
38
|
+
-- 3. RETRIEVAL PERFORMANCE
|
|
39
|
+
-- ================================================================
|
|
40
|
+
|
|
41
|
+
-- Retrieval performance by top_k setting
|
|
42
|
+
SELECT
|
|
43
|
+
top_k,
|
|
44
|
+
COUNT(*) as query_count,
|
|
45
|
+
AVG(latency_ms) as avg_latency_ms,
|
|
46
|
+
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY latency_ms) as p50_latency,
|
|
47
|
+
PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY latency_ms) as p95_latency,
|
|
48
|
+
PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY latency_ms) as p99_latency,
|
|
49
|
+
AVG(relevance_score) as avg_relevance_score
|
|
50
|
+
FROM rag_query_log
|
|
51
|
+
WHERE query_timestamp >= CURRENT_DATE - INTERVAL '7 days'
|
|
52
|
+
GROUP BY top_k
|
|
53
|
+
ORDER BY top_k;
|
|
54
|
+
|
|
55
|
+
-- ================================================================
|
|
56
|
+
-- 4. SOURCE ATTRIBUTION
|
|
57
|
+
-- ================================================================
|
|
58
|
+
|
|
59
|
+
-- Which documents are most frequently retrieved?
|
|
60
|
+
SELECT
|
|
61
|
+
d.document_id,
|
|
62
|
+
d.title,
|
|
63
|
+
d.source_type,
|
|
64
|
+
COUNT(*) as retrieval_count,
|
|
65
|
+
AVG(r.relevance_score) as avg_relevance,
|
|
66
|
+
MAX(r.query_timestamp) as last_retrieved
|
|
67
|
+
FROM rag_retrievals r
|
|
68
|
+
JOIN knowledge_base_documents d ON r.document_id = d.document_id
|
|
69
|
+
WHERE r.query_timestamp >= CURRENT_DATE - INTERVAL '30 days'
|
|
70
|
+
GROUP BY d.document_id, d.title, d.source_type
|
|
71
|
+
ORDER BY retrieval_count DESC
|
|
72
|
+
LIMIT 50;
|
|
73
|
+
|
|
74
|
+
-- ================================================================
|
|
75
|
+
-- 5. USER ENGAGEMENT
|
|
76
|
+
-- ================================================================
|
|
77
|
+
|
|
78
|
+
-- User engagement with RAG system
|
|
79
|
+
SELECT
|
|
80
|
+
DATE_TRUNC('day', query_timestamp) as query_date,
|
|
81
|
+
COUNT(DISTINCT user_id) as unique_users,
|
|
82
|
+
COUNT(*) as total_queries,
|
|
83
|
+
COUNT(*) / COUNT(DISTINCT user_id) as queries_per_user,
|
|
84
|
+
AVG(relevance_score) as avg_relevance
|
|
85
|
+
FROM rag_query_log
|
|
86
|
+
WHERE query_timestamp >= CURRENT_DATE - INTERVAL '90 days'
|
|
87
|
+
GROUP BY DATE_TRUNC('day', query_timestamp)
|
|
88
|
+
ORDER BY query_date DESC;
|
|
89
|
+
|
|
90
|
+
-- ================================================================
|
|
91
|
+
-- 6. CHUNK PERFORMANCE
|
|
92
|
+
-- ================================================================
|
|
93
|
+
|
|
94
|
+
-- Which chunk size performs best?
|
|
95
|
+
SELECT
|
|
96
|
+
c.chunk_size_range,
|
|
97
|
+
COUNT(DISTINCT r.query_id) as query_count,
|
|
98
|
+
AVG(r.relevance_score) as avg_relevance,
|
|
99
|
+
AVG(r.rank_position) as avg_rank
|
|
100
|
+
FROM rag_retrievals r
|
|
101
|
+
JOIN knowledge_base_chunks c ON r.chunk_id = c.chunk_id
|
|
102
|
+
WHERE r.query_timestamp >= CURRENT_DATE - INTERVAL '30 days'
|
|
103
|
+
GROUP BY c.chunk_size_range
|
|
104
|
+
ORDER BY avg_relevance DESC;
|
|
105
|
+
|
|
106
|
+
-- ================================================================
|
|
107
|
+
-- 7. FAILED QUERIES
|
|
108
|
+
-- ================================================================
|
|
109
|
+
|
|
110
|
+
-- Queries with low relevance (need improvement)
|
|
111
|
+
SELECT
|
|
112
|
+
query_text,
|
|
113
|
+
COUNT(*) as failure_count,
|
|
114
|
+
AVG(relevance_score) as avg_relevance,
|
|
115
|
+
MIN(relevance_score) as min_relevance,
|
|
116
|
+
MAX(query_timestamp) as last_failed
|
|
117
|
+
FROM rag_query_log
|
|
118
|
+
WHERE relevance_score < 0.5
|
|
119
|
+
AND query_timestamp >= CURRENT_DATE - INTERVAL '7 days'
|
|
120
|
+
GROUP BY query_text
|
|
121
|
+
HAVING COUNT(*) > 2
|
|
122
|
+
ORDER BY failure_count DESC
|
|
123
|
+
LIMIT 30;
|
|
124
|
+
|
|
125
|
+
-- ================================================================
|
|
126
|
+
-- 8. EMBEDDING MODEL PERFORMANCE
|
|
127
|
+
-- ================================================================
|
|
128
|
+
|
|
129
|
+
-- Compare performance across embedding models
|
|
130
|
+
SELECT
|
|
131
|
+
embedding_model,
|
|
132
|
+
COUNT(*) as query_count,
|
|
133
|
+
AVG(embedding_latency_ms) as avg_embedding_latency,
|
|
134
|
+
AVG(retrieval_latency_ms) as avg_retrieval_latency,
|
|
135
|
+
AVG(relevance_score) as avg_relevance
|
|
136
|
+
FROM rag_query_log
|
|
137
|
+
WHERE query_timestamp >= CURRENT_DATE - INTERVAL '30 days'
|
|
138
|
+
GROUP BY embedding_model
|
|
139
|
+
ORDER BY avg_relevance DESC;
|
|
140
|
+
|
|
141
|
+
-- ================================================================
|
|
142
|
+
-- 9. KNOWLEDGE GAPS
|
|
143
|
+
-- ================================================================
|
|
144
|
+
|
|
145
|
+
-- Identify topics with no good answers
|
|
146
|
+
WITH poor_coverage AS (
|
|
147
|
+
SELECT
|
|
148
|
+
query_text,
|
|
149
|
+
COUNT(*) as frequency,
|
|
150
|
+
AVG(relevance_score) as avg_relevance
|
|
151
|
+
FROM rag_query_log
|
|
152
|
+
WHERE query_timestamp >= CURRENT_DATE - INTERVAL '30 days'
|
|
153
|
+
GROUP BY query_text
|
|
154
|
+
HAVING AVG(relevance_score) < 0.6 AND COUNT(*) > 3
|
|
155
|
+
)
|
|
156
|
+
SELECT
|
|
157
|
+
query_text,
|
|
158
|
+
frequency,
|
|
159
|
+
avg_relevance,
|
|
160
|
+
'Add documentation' as recommendation
|
|
161
|
+
FROM poor_coverage
|
|
162
|
+
ORDER BY frequency DESC;
|
|
163
|
+
|
|
164
|
+
-- ================================================================
|
|
165
|
+
-- 10. RAG PIPELINE HEALTH
|
|
166
|
+
-- ================================================================
|
|
167
|
+
|
|
168
|
+
-- Daily RAG pipeline health metrics
|
|
169
|
+
SELECT
|
|
170
|
+
DATE(query_timestamp) as date,
|
|
171
|
+
COUNT(*) as total_queries,
|
|
172
|
+
AVG(total_latency_ms) as avg_latency_ms,
|
|
173
|
+
AVG(relevance_score) as avg_relevance,
|
|
174
|
+
PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY total_latency_ms) as p95_latency,
|
|
175
|
+
COUNT(CASE WHEN total_latency_ms > 1000 THEN 1 END) as slow_queries,
|
|
176
|
+
COUNT(CASE WHEN relevance_score < 0.5 THEN 1 END) as low_relevance_queries
|
|
177
|
+
FROM rag_query_log
|
|
178
|
+
WHERE query_timestamp >= CURRENT_DATE - INTERVAL '14 days'
|
|
179
|
+
GROUP BY DATE(query_timestamp)
|
|
180
|
+
ORDER BY date DESC;
|
|
181
|
+
|
|
182
|
+
-- ================================================================
|
|
183
|
+
-- 11. VECTOR DATABASE STATISTICS
|
|
184
|
+
-- ================================================================
|
|
185
|
+
|
|
186
|
+
-- Vector database usage statistics
|
|
187
|
+
SELECT
|
|
188
|
+
collection_name,
|
|
189
|
+
COUNT(DISTINCT vector_id) as total_vectors,
|
|
190
|
+
AVG(vector_dimension) as avg_dimension,
|
|
191
|
+
MAX(last_updated) as last_updated,
|
|
192
|
+
SUM(storage_bytes) / (1024*1024) as storage_mb
|
|
193
|
+
FROM vector_database_collections
|
|
194
|
+
GROUP BY collection_name
|
|
195
|
+
ORDER BY total_vectors DESC;
|
|
196
|
+
|
|
197
|
+
-- ================================================================
|
|
198
|
+
-- 12. COST TRACKING
|
|
199
|
+
-- ================================================================
|
|
200
|
+
|
|
201
|
+
-- Estimated costs by provider
|
|
202
|
+
SELECT
|
|
203
|
+
DATE(query_timestamp) as date,
|
|
204
|
+
llm_provider,
|
|
205
|
+
COUNT(*) as query_count,
|
|
206
|
+
SUM(input_tokens) as total_input_tokens,
|
|
207
|
+
SUM(output_tokens) as total_output_tokens,
|
|
208
|
+
SUM(estimated_cost) as total_cost,
|
|
209
|
+
AVG(estimated_cost) as avg_cost_per_query
|
|
210
|
+
FROM rag_query_log
|
|
211
|
+
WHERE query_timestamp >= CURRENT_DATE - INTERVAL '30 days'
|
|
212
|
+
GROUP BY DATE(query_timestamp), llm_provider
|
|
213
|
+
ORDER BY date DESC, total_cost DESC;
|