tech-hub-skills 1.5.1 → 1.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. package/.claude/LICENSE +21 -21
  2. package/.claude/README.md +291 -291
  3. package/.claude/bin/cli.js +266 -266
  4. package/.claude/bin/copilot.js +182 -182
  5. package/.claude/bin/postinstall.js +42 -42
  6. package/.claude/commands/README.md +336 -336
  7. package/.claude/commands/ai-engineer.md +104 -104
  8. package/.claude/commands/aws.md +143 -143
  9. package/.claude/commands/azure.md +149 -149
  10. package/.claude/commands/backend-developer.md +108 -108
  11. package/.claude/commands/code-review.md +399 -399
  12. package/.claude/commands/compliance-automation.md +747 -747
  13. package/.claude/commands/compliance-officer.md +108 -108
  14. package/.claude/commands/data-engineer.md +113 -113
  15. package/.claude/commands/data-governance.md +102 -102
  16. package/.claude/commands/data-scientist.md +123 -123
  17. package/.claude/commands/database-admin.md +109 -109
  18. package/.claude/commands/devops.md +160 -160
  19. package/.claude/commands/docker.md +160 -160
  20. package/.claude/commands/enterprise-dashboard.md +613 -613
  21. package/.claude/commands/finops.md +184 -184
  22. package/.claude/commands/frontend-developer.md +108 -108
  23. package/.claude/commands/gcp.md +143 -143
  24. package/.claude/commands/ml-engineer.md +115 -115
  25. package/.claude/commands/mlops.md +187 -187
  26. package/.claude/commands/network-engineer.md +109 -109
  27. package/.claude/commands/optimization-advisor.md +329 -329
  28. package/.claude/commands/orchestrator.md +623 -623
  29. package/.claude/commands/platform-engineer.md +102 -102
  30. package/.claude/commands/process-automation.md +226 -226
  31. package/.claude/commands/process-changelog.md +184 -184
  32. package/.claude/commands/process-documentation.md +484 -484
  33. package/.claude/commands/process-kanban.md +324 -324
  34. package/.claude/commands/process-versioning.md +214 -214
  35. package/.claude/commands/product-designer.md +104 -104
  36. package/.claude/commands/project-starter.md +443 -443
  37. package/.claude/commands/qa-engineer.md +109 -109
  38. package/.claude/commands/security-architect.md +135 -135
  39. package/.claude/commands/sre.md +109 -109
  40. package/.claude/commands/system-design.md +126 -126
  41. package/.claude/commands/technical-writer.md +101 -101
  42. package/.claude/package.json +46 -46
  43. package/.claude/roles/ai-engineer/skills/01-prompt-engineering/README.md +252 -252
  44. package/.claude/roles/ai-engineer/skills/01-prompt-engineering/prompt_ab_tester.py +356 -356
  45. package/.claude/roles/ai-engineer/skills/01-prompt-engineering/prompt_template_manager.py +274 -274
  46. package/.claude/roles/ai-engineer/skills/01-prompt-engineering/token_cost_estimator.py +324 -324
  47. package/.claude/roles/ai-engineer/skills/02-rag-pipeline/README.md +448 -448
  48. package/.claude/roles/ai-engineer/skills/02-rag-pipeline/document_chunker.py +336 -336
  49. package/.claude/roles/ai-engineer/skills/02-rag-pipeline/rag_pipeline.sql +213 -213
  50. package/.claude/roles/ai-engineer/skills/03-agent-orchestration/README.md +599 -599
  51. package/.claude/roles/ai-engineer/skills/04-llm-guardrails/README.md +735 -735
  52. package/.claude/roles/ai-engineer/skills/05-vector-embeddings/README.md +711 -711
  53. package/.claude/roles/ai-engineer/skills/06-llm-evaluation/README.md +777 -777
  54. package/.claude/roles/azure/skills/01-infrastructure-fundamentals/README.md +264 -264
  55. package/.claude/roles/azure/skills/02-data-factory/README.md +264 -264
  56. package/.claude/roles/azure/skills/03-synapse-analytics/README.md +264 -264
  57. package/.claude/roles/azure/skills/04-databricks/README.md +264 -264
  58. package/.claude/roles/azure/skills/05-functions/README.md +264 -264
  59. package/.claude/roles/azure/skills/06-kubernetes-service/README.md +264 -264
  60. package/.claude/roles/azure/skills/07-openai-service/README.md +264 -264
  61. package/.claude/roles/azure/skills/08-machine-learning/README.md +264 -264
  62. package/.claude/roles/azure/skills/09-storage-adls/README.md +264 -264
  63. package/.claude/roles/azure/skills/10-networking/README.md +264 -264
  64. package/.claude/roles/azure/skills/11-sql-cosmos/README.md +264 -264
  65. package/.claude/roles/azure/skills/12-event-hubs/README.md +264 -264
  66. package/.claude/roles/code-review/skills/01-automated-code-review/README.md +394 -394
  67. package/.claude/roles/code-review/skills/02-pr-review-workflow/README.md +427 -427
  68. package/.claude/roles/code-review/skills/03-code-quality-gates/README.md +518 -518
  69. package/.claude/roles/code-review/skills/04-reviewer-assignment/README.md +504 -504
  70. package/.claude/roles/code-review/skills/05-review-analytics/README.md +540 -540
  71. package/.claude/roles/data-engineer/skills/01-lakehouse-architecture/README.md +550 -550
  72. package/.claude/roles/data-engineer/skills/01-lakehouse-architecture/bronze_ingestion.py +337 -337
  73. package/.claude/roles/data-engineer/skills/01-lakehouse-architecture/medallion_queries.sql +300 -300
  74. package/.claude/roles/data-engineer/skills/02-etl-pipeline/README.md +580 -580
  75. package/.claude/roles/data-engineer/skills/03-data-quality/README.md +579 -579
  76. package/.claude/roles/data-engineer/skills/04-streaming-pipelines/README.md +608 -608
  77. package/.claude/roles/data-engineer/skills/05-performance-optimization/README.md +547 -547
  78. package/.claude/roles/data-governance/skills/01-data-catalog/README.md +112 -112
  79. package/.claude/roles/data-governance/skills/02-data-lineage/README.md +129 -129
  80. package/.claude/roles/data-governance/skills/03-data-quality-framework/README.md +182 -182
  81. package/.claude/roles/data-governance/skills/04-access-control/README.md +39 -39
  82. package/.claude/roles/data-governance/skills/05-master-data-management/README.md +40 -40
  83. package/.claude/roles/data-governance/skills/06-compliance-privacy/README.md +46 -46
  84. package/.claude/roles/data-scientist/skills/01-eda-automation/README.md +230 -230
  85. package/.claude/roles/data-scientist/skills/01-eda-automation/eda_generator.py +446 -446
  86. package/.claude/roles/data-scientist/skills/02-statistical-modeling/README.md +264 -264
  87. package/.claude/roles/data-scientist/skills/03-feature-engineering/README.md +264 -264
  88. package/.claude/roles/data-scientist/skills/04-predictive-modeling/README.md +264 -264
  89. package/.claude/roles/data-scientist/skills/05-customer-analytics/README.md +264 -264
  90. package/.claude/roles/data-scientist/skills/06-campaign-analysis/README.md +264 -264
  91. package/.claude/roles/data-scientist/skills/07-experimentation/README.md +264 -264
  92. package/.claude/roles/data-scientist/skills/08-data-visualization/README.md +264 -264
  93. package/.claude/roles/devops/skills/01-cicd-pipeline/README.md +264 -264
  94. package/.claude/roles/devops/skills/02-container-orchestration/README.md +264 -264
  95. package/.claude/roles/devops/skills/03-infrastructure-as-code/README.md +264 -264
  96. package/.claude/roles/devops/skills/04-gitops/README.md +264 -264
  97. package/.claude/roles/devops/skills/05-environment-management/README.md +264 -264
  98. package/.claude/roles/devops/skills/06-automated-testing/README.md +264 -264
  99. package/.claude/roles/devops/skills/07-release-management/README.md +264 -264
  100. package/.claude/roles/devops/skills/08-monitoring-alerting/README.md +264 -264
  101. package/.claude/roles/devops/skills/09-devsecops/README.md +265 -265
  102. package/.claude/roles/finops/skills/01-cost-visibility/README.md +264 -264
  103. package/.claude/roles/finops/skills/02-resource-tagging/README.md +264 -264
  104. package/.claude/roles/finops/skills/03-budget-management/README.md +264 -264
  105. package/.claude/roles/finops/skills/04-reserved-instances/README.md +264 -264
  106. package/.claude/roles/finops/skills/05-spot-optimization/README.md +264 -264
  107. package/.claude/roles/finops/skills/06-storage-tiering/README.md +264 -264
  108. package/.claude/roles/finops/skills/07-compute-rightsizing/README.md +264 -264
  109. package/.claude/roles/finops/skills/08-chargeback/README.md +264 -264
  110. package/.claude/roles/ml-engineer/skills/01-mlops-pipeline/README.md +566 -566
  111. package/.claude/roles/ml-engineer/skills/02-feature-engineering/README.md +655 -655
  112. package/.claude/roles/ml-engineer/skills/03-model-training/README.md +704 -704
  113. package/.claude/roles/ml-engineer/skills/04-model-serving/README.md +845 -845
  114. package/.claude/roles/ml-engineer/skills/05-model-monitoring/README.md +874 -874
  115. package/.claude/roles/mlops/skills/01-ml-pipeline-orchestration/README.md +264 -264
  116. package/.claude/roles/mlops/skills/02-experiment-tracking/README.md +264 -264
  117. package/.claude/roles/mlops/skills/03-model-registry/README.md +264 -264
  118. package/.claude/roles/mlops/skills/04-feature-store/README.md +264 -264
  119. package/.claude/roles/mlops/skills/05-model-deployment/README.md +264 -264
  120. package/.claude/roles/mlops/skills/06-model-observability/README.md +264 -264
  121. package/.claude/roles/mlops/skills/07-data-versioning/README.md +264 -264
  122. package/.claude/roles/mlops/skills/08-ab-testing/README.md +264 -264
  123. package/.claude/roles/mlops/skills/09-automated-retraining/README.md +264 -264
  124. package/.claude/roles/platform-engineer/skills/01-internal-developer-platform/README.md +153 -153
  125. package/.claude/roles/platform-engineer/skills/02-self-service-infrastructure/README.md +57 -57
  126. package/.claude/roles/platform-engineer/skills/03-slo-sli-management/README.md +59 -59
  127. package/.claude/roles/platform-engineer/skills/04-developer-experience/README.md +57 -57
  128. package/.claude/roles/platform-engineer/skills/05-incident-management/README.md +73 -73
  129. package/.claude/roles/platform-engineer/skills/06-capacity-management/README.md +59 -59
  130. package/.claude/roles/product-designer/skills/01-requirements-discovery/README.md +407 -407
  131. package/.claude/roles/product-designer/skills/02-user-research/README.md +382 -382
  132. package/.claude/roles/product-designer/skills/03-brainstorming-ideation/README.md +437 -437
  133. package/.claude/roles/product-designer/skills/04-ux-design/README.md +496 -496
  134. package/.claude/roles/product-designer/skills/05-product-market-fit/README.md +376 -376
  135. package/.claude/roles/product-designer/skills/06-stakeholder-management/README.md +412 -412
  136. package/.claude/roles/security-architect/skills/01-pii-detection/README.md +319 -319
  137. package/.claude/roles/security-architect/skills/02-threat-modeling/README.md +264 -264
  138. package/.claude/roles/security-architect/skills/03-infrastructure-security/README.md +264 -264
  139. package/.claude/roles/security-architect/skills/04-iam/README.md +264 -264
  140. package/.claude/roles/security-architect/skills/05-application-security/README.md +264 -264
  141. package/.claude/roles/security-architect/skills/06-secrets-management/README.md +264 -264
  142. package/.claude/roles/security-architect/skills/07-security-monitoring/README.md +264 -264
  143. package/.claude/roles/system-design/skills/01-architecture-patterns/README.md +337 -337
  144. package/.claude/roles/system-design/skills/02-requirements-engineering/README.md +264 -264
  145. package/.claude/roles/system-design/skills/03-scalability/README.md +264 -264
  146. package/.claude/roles/system-design/skills/04-high-availability/README.md +264 -264
  147. package/.claude/roles/system-design/skills/05-cost-optimization-design/README.md +264 -264
  148. package/.claude/roles/system-design/skills/06-api-design/README.md +264 -264
  149. package/.claude/roles/system-design/skills/07-observability-architecture/README.md +264 -264
  150. package/.claude/roles/system-design/skills/08-process-automation/PROCESS_TEMPLATE.md +336 -336
  151. package/.claude/roles/system-design/skills/08-process-automation/README.md +521 -521
  152. package/.claude/roles/system-design/skills/08-process-automation/ai_prompt_generator.py +744 -744
  153. package/.claude/roles/system-design/skills/08-process-automation/automation_recommender.py +688 -688
  154. package/.claude/roles/system-design/skills/08-process-automation/plan_generator.py +679 -679
  155. package/.claude/roles/system-design/skills/08-process-automation/process_analyzer.py +528 -528
  156. package/.claude/roles/system-design/skills/08-process-automation/process_parser.py +684 -684
  157. package/.claude/roles/system-design/skills/08-process-automation/role_matcher.py +615 -615
  158. package/.claude/skills/README.md +336 -336
  159. package/.claude/skills/ai-engineer.md +104 -104
  160. package/.claude/skills/aws.md +143 -143
  161. package/.claude/skills/azure.md +149 -149
  162. package/.claude/skills/backend-developer.md +108 -108
  163. package/.claude/skills/code-review.md +399 -399
  164. package/.claude/skills/compliance-automation.md +747 -747
  165. package/.claude/skills/compliance-officer.md +108 -108
  166. package/.claude/skills/data-engineer.md +113 -113
  167. package/.claude/skills/data-governance.md +102 -102
  168. package/.claude/skills/data-scientist.md +123 -123
  169. package/.claude/skills/database-admin.md +109 -109
  170. package/.claude/skills/devops.md +160 -160
  171. package/.claude/skills/docker.md +160 -160
  172. package/.claude/skills/enterprise-dashboard.md +613 -613
  173. package/.claude/skills/finops.md +184 -184
  174. package/.claude/skills/frontend-developer.md +108 -108
  175. package/.claude/skills/gcp.md +143 -143
  176. package/.claude/skills/ml-engineer.md +115 -115
  177. package/.claude/skills/mlops.md +187 -187
  178. package/.claude/skills/network-engineer.md +109 -109
  179. package/.claude/skills/optimization-advisor.md +329 -329
  180. package/.claude/skills/orchestrator.md +623 -623
  181. package/.claude/skills/platform-engineer.md +102 -102
  182. package/.claude/skills/process-automation.md +226 -226
  183. package/.claude/skills/process-changelog.md +184 -184
  184. package/.claude/skills/process-documentation.md +484 -484
  185. package/.claude/skills/process-kanban.md +324 -324
  186. package/.claude/skills/process-versioning.md +214 -214
  187. package/.claude/skills/product-designer.md +104 -104
  188. package/.claude/skills/project-starter.md +443 -443
  189. package/.claude/skills/qa-engineer.md +109 -109
  190. package/.claude/skills/security-architect.md +135 -135
  191. package/.claude/skills/sre.md +109 -109
  192. package/.claude/skills/system-design.md +126 -126
  193. package/.claude/skills/technical-writer.md +101 -101
  194. package/.gitattributes +2 -2
  195. package/GITHUB_COPILOT.md +106 -106
  196. package/README.md +192 -184
  197. package/package.json +16 -8
@@ -1,213 +1,213 @@
1
- -- RAG Pipeline Analytics Queries
2
- -- Track knowledge base usage, query patterns, and performance
3
-
4
- -- ================================================================
5
- -- 1. KNOWLEDGE BASE INVENTORY
6
- -- ================================================================
7
-
8
- -- Count documents by source
9
- SELECT
10
- source_type,
11
- COUNT(*) as document_count,
12
- SUM(chunk_count) as total_chunks,
13
- AVG(chunk_count) as avg_chunks_per_doc,
14
- MAX(last_updated) as latest_update
15
- FROM knowledge_base_documents
16
- GROUP BY source_type
17
- ORDER BY document_count DESC;
18
-
19
- -- ================================================================
20
- -- 2. QUERY ANALYTICS
21
- -- ================================================================
22
-
23
- -- Top queries by frequency (last 30 days)
24
- SELECT
25
- query_text,
26
- COUNT(*) as query_count,
27
- AVG(latency_ms) as avg_latency_ms,
28
- AVG(relevance_score) as avg_relevance,
29
- COUNT(DISTINCT user_id) as unique_users
30
- FROM rag_query_log
31
- WHERE query_timestamp >= CURRENT_DATE - INTERVAL '30 days'
32
- GROUP BY query_text
33
- HAVING COUNT(*) > 5
34
- ORDER BY query_count DESC
35
- LIMIT 20;
36
-
37
- -- ================================================================
38
- -- 3. RETRIEVAL PERFORMANCE
39
- -- ================================================================
40
-
41
- -- Retrieval performance by top_k setting
42
- SELECT
43
- top_k,
44
- COUNT(*) as query_count,
45
- AVG(latency_ms) as avg_latency_ms,
46
- PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY latency_ms) as p50_latency,
47
- PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY latency_ms) as p95_latency,
48
- PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY latency_ms) as p99_latency,
49
- AVG(relevance_score) as avg_relevance_score
50
- FROM rag_query_log
51
- WHERE query_timestamp >= CURRENT_DATE - INTERVAL '7 days'
52
- GROUP BY top_k
53
- ORDER BY top_k;
54
-
55
- -- ================================================================
56
- -- 4. SOURCE ATTRIBUTION
57
- -- ================================================================
58
-
59
- -- Which documents are most frequently retrieved?
60
- SELECT
61
- d.document_id,
62
- d.title,
63
- d.source_type,
64
- COUNT(*) as retrieval_count,
65
- AVG(r.relevance_score) as avg_relevance,
66
- MAX(r.query_timestamp) as last_retrieved
67
- FROM rag_retrievals r
68
- JOIN knowledge_base_documents d ON r.document_id = d.document_id
69
- WHERE r.query_timestamp >= CURRENT_DATE - INTERVAL '30 days'
70
- GROUP BY d.document_id, d.title, d.source_type
71
- ORDER BY retrieval_count DESC
72
- LIMIT 50;
73
-
74
- -- ================================================================
75
- -- 5. USER ENGAGEMENT
76
- -- ================================================================
77
-
78
- -- User engagement with RAG system
79
- SELECT
80
- DATE_TRUNC('day', query_timestamp) as query_date,
81
- COUNT(DISTINCT user_id) as unique_users,
82
- COUNT(*) as total_queries,
83
- COUNT(*) / COUNT(DISTINCT user_id) as queries_per_user,
84
- AVG(relevance_score) as avg_relevance
85
- FROM rag_query_log
86
- WHERE query_timestamp >= CURRENT_DATE - INTERVAL '90 days'
87
- GROUP BY DATE_TRUNC('day', query_timestamp)
88
- ORDER BY query_date DESC;
89
-
90
- -- ================================================================
91
- -- 6. CHUNK PERFORMANCE
92
- -- ================================================================
93
-
94
- -- Which chunk size performs best?
95
- SELECT
96
- c.chunk_size_range,
97
- COUNT(DISTINCT r.query_id) as query_count,
98
- AVG(r.relevance_score) as avg_relevance,
99
- AVG(r.rank_position) as avg_rank
100
- FROM rag_retrievals r
101
- JOIN knowledge_base_chunks c ON r.chunk_id = c.chunk_id
102
- WHERE r.query_timestamp >= CURRENT_DATE - INTERVAL '30 days'
103
- GROUP BY c.chunk_size_range
104
- ORDER BY avg_relevance DESC;
105
-
106
- -- ================================================================
107
- -- 7. FAILED QUERIES
108
- -- ================================================================
109
-
110
- -- Queries with low relevance (need improvement)
111
- SELECT
112
- query_text,
113
- COUNT(*) as failure_count,
114
- AVG(relevance_score) as avg_relevance,
115
- MIN(relevance_score) as min_relevance,
116
- MAX(query_timestamp) as last_failed
117
- FROM rag_query_log
118
- WHERE relevance_score < 0.5
119
- AND query_timestamp >= CURRENT_DATE - INTERVAL '7 days'
120
- GROUP BY query_text
121
- HAVING COUNT(*) > 2
122
- ORDER BY failure_count DESC
123
- LIMIT 30;
124
-
125
- -- ================================================================
126
- -- 8. EMBEDDING MODEL PERFORMANCE
127
- -- ================================================================
128
-
129
- -- Compare performance across embedding models
130
- SELECT
131
- embedding_model,
132
- COUNT(*) as query_count,
133
- AVG(embedding_latency_ms) as avg_embedding_latency,
134
- AVG(retrieval_latency_ms) as avg_retrieval_latency,
135
- AVG(relevance_score) as avg_relevance
136
- FROM rag_query_log
137
- WHERE query_timestamp >= CURRENT_DATE - INTERVAL '30 days'
138
- GROUP BY embedding_model
139
- ORDER BY avg_relevance DESC;
140
-
141
- -- ================================================================
142
- -- 9. KNOWLEDGE GAPS
143
- -- ================================================================
144
-
145
- -- Identify topics with no good answers
146
- WITH poor_coverage AS (
147
- SELECT
148
- query_text,
149
- COUNT(*) as frequency,
150
- AVG(relevance_score) as avg_relevance
151
- FROM rag_query_log
152
- WHERE query_timestamp >= CURRENT_DATE - INTERVAL '30 days'
153
- GROUP BY query_text
154
- HAVING AVG(relevance_score) < 0.6 AND COUNT(*) > 3
155
- )
156
- SELECT
157
- query_text,
158
- frequency,
159
- avg_relevance,
160
- 'Add documentation' as recommendation
161
- FROM poor_coverage
162
- ORDER BY frequency DESC;
163
-
164
- -- ================================================================
165
- -- 10. RAG PIPELINE HEALTH
166
- -- ================================================================
167
-
168
- -- Daily RAG pipeline health metrics
169
- SELECT
170
- DATE(query_timestamp) as date,
171
- COUNT(*) as total_queries,
172
- AVG(total_latency_ms) as avg_latency_ms,
173
- AVG(relevance_score) as avg_relevance,
174
- PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY total_latency_ms) as p95_latency,
175
- COUNT(CASE WHEN total_latency_ms > 1000 THEN 1 END) as slow_queries,
176
- COUNT(CASE WHEN relevance_score < 0.5 THEN 1 END) as low_relevance_queries
177
- FROM rag_query_log
178
- WHERE query_timestamp >= CURRENT_DATE - INTERVAL '14 days'
179
- GROUP BY DATE(query_timestamp)
180
- ORDER BY date DESC;
181
-
182
- -- ================================================================
183
- -- 11. VECTOR DATABASE STATISTICS
184
- -- ================================================================
185
-
186
- -- Vector database usage statistics
187
- SELECT
188
- collection_name,
189
- COUNT(DISTINCT vector_id) as total_vectors,
190
- AVG(vector_dimension) as avg_dimension,
191
- MAX(last_updated) as last_updated,
192
- SUM(storage_bytes) / (1024*1024) as storage_mb
193
- FROM vector_database_collections
194
- GROUP BY collection_name
195
- ORDER BY total_vectors DESC;
196
-
197
- -- ================================================================
198
- -- 12. COST TRACKING
199
- -- ================================================================
200
-
201
- -- Estimated costs by provider
202
- SELECT
203
- DATE(query_timestamp) as date,
204
- llm_provider,
205
- COUNT(*) as query_count,
206
- SUM(input_tokens) as total_input_tokens,
207
- SUM(output_tokens) as total_output_tokens,
208
- SUM(estimated_cost) as total_cost,
209
- AVG(estimated_cost) as avg_cost_per_query
210
- FROM rag_query_log
211
- WHERE query_timestamp >= CURRENT_DATE - INTERVAL '30 days'
212
- GROUP BY DATE(query_timestamp), llm_provider
213
- ORDER BY date DESC, total_cost DESC;
1
+ -- RAG Pipeline Analytics Queries
2
+ -- Track knowledge base usage, query patterns, and performance
3
+
4
+ -- ================================================================
5
+ -- 1. KNOWLEDGE BASE INVENTORY
6
+ -- ================================================================
7
+
8
+ -- Count documents by source
9
+ SELECT
10
+ source_type,
11
+ COUNT(*) as document_count,
12
+ SUM(chunk_count) as total_chunks,
13
+ AVG(chunk_count) as avg_chunks_per_doc,
14
+ MAX(last_updated) as latest_update
15
+ FROM knowledge_base_documents
16
+ GROUP BY source_type
17
+ ORDER BY document_count DESC;
18
+
19
+ -- ================================================================
20
+ -- 2. QUERY ANALYTICS
21
+ -- ================================================================
22
+
23
+ -- Top queries by frequency (last 30 days)
24
+ SELECT
25
+ query_text,
26
+ COUNT(*) as query_count,
27
+ AVG(latency_ms) as avg_latency_ms,
28
+ AVG(relevance_score) as avg_relevance,
29
+ COUNT(DISTINCT user_id) as unique_users
30
+ FROM rag_query_log
31
+ WHERE query_timestamp >= CURRENT_DATE - INTERVAL '30 days'
32
+ GROUP BY query_text
33
+ HAVING COUNT(*) > 5
34
+ ORDER BY query_count DESC
35
+ LIMIT 20;
36
+
37
+ -- ================================================================
38
+ -- 3. RETRIEVAL PERFORMANCE
39
+ -- ================================================================
40
+
41
+ -- Retrieval performance by top_k setting
42
+ SELECT
43
+ top_k,
44
+ COUNT(*) as query_count,
45
+ AVG(latency_ms) as avg_latency_ms,
46
+ PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY latency_ms) as p50_latency,
47
+ PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY latency_ms) as p95_latency,
48
+ PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY latency_ms) as p99_latency,
49
+ AVG(relevance_score) as avg_relevance_score
50
+ FROM rag_query_log
51
+ WHERE query_timestamp >= CURRENT_DATE - INTERVAL '7 days'
52
+ GROUP BY top_k
53
+ ORDER BY top_k;
54
+
55
+ -- ================================================================
56
+ -- 4. SOURCE ATTRIBUTION
57
+ -- ================================================================
58
+
59
+ -- Which documents are most frequently retrieved?
60
+ SELECT
61
+ d.document_id,
62
+ d.title,
63
+ d.source_type,
64
+ COUNT(*) as retrieval_count,
65
+ AVG(r.relevance_score) as avg_relevance,
66
+ MAX(r.query_timestamp) as last_retrieved
67
+ FROM rag_retrievals r
68
+ JOIN knowledge_base_documents d ON r.document_id = d.document_id
69
+ WHERE r.query_timestamp >= CURRENT_DATE - INTERVAL '30 days'
70
+ GROUP BY d.document_id, d.title, d.source_type
71
+ ORDER BY retrieval_count DESC
72
+ LIMIT 50;
73
+
74
+ -- ================================================================
75
+ -- 5. USER ENGAGEMENT
76
+ -- ================================================================
77
+
78
+ -- User engagement with RAG system
79
+ SELECT
80
+ DATE_TRUNC('day', query_timestamp) as query_date,
81
+ COUNT(DISTINCT user_id) as unique_users,
82
+ COUNT(*) as total_queries,
83
+ COUNT(*) / COUNT(DISTINCT user_id) as queries_per_user,
84
+ AVG(relevance_score) as avg_relevance
85
+ FROM rag_query_log
86
+ WHERE query_timestamp >= CURRENT_DATE - INTERVAL '90 days'
87
+ GROUP BY DATE_TRUNC('day', query_timestamp)
88
+ ORDER BY query_date DESC;
89
+
90
+ -- ================================================================
91
+ -- 6. CHUNK PERFORMANCE
92
+ -- ================================================================
93
+
94
+ -- Which chunk size performs best?
95
+ SELECT
96
+ c.chunk_size_range,
97
+ COUNT(DISTINCT r.query_id) as query_count,
98
+ AVG(r.relevance_score) as avg_relevance,
99
+ AVG(r.rank_position) as avg_rank
100
+ FROM rag_retrievals r
101
+ JOIN knowledge_base_chunks c ON r.chunk_id = c.chunk_id
102
+ WHERE r.query_timestamp >= CURRENT_DATE - INTERVAL '30 days'
103
+ GROUP BY c.chunk_size_range
104
+ ORDER BY avg_relevance DESC;
105
+
106
+ -- ================================================================
107
+ -- 7. FAILED QUERIES
108
+ -- ================================================================
109
+
110
+ -- Queries with low relevance (need improvement)
111
+ SELECT
112
+ query_text,
113
+ COUNT(*) as failure_count,
114
+ AVG(relevance_score) as avg_relevance,
115
+ MIN(relevance_score) as min_relevance,
116
+ MAX(query_timestamp) as last_failed
117
+ FROM rag_query_log
118
+ WHERE relevance_score < 0.5
119
+ AND query_timestamp >= CURRENT_DATE - INTERVAL '7 days'
120
+ GROUP BY query_text
121
+ HAVING COUNT(*) > 2
122
+ ORDER BY failure_count DESC
123
+ LIMIT 30;
124
+
125
+ -- ================================================================
126
+ -- 8. EMBEDDING MODEL PERFORMANCE
127
+ -- ================================================================
128
+
129
+ -- Compare performance across embedding models
130
+ SELECT
131
+ embedding_model,
132
+ COUNT(*) as query_count,
133
+ AVG(embedding_latency_ms) as avg_embedding_latency,
134
+ AVG(retrieval_latency_ms) as avg_retrieval_latency,
135
+ AVG(relevance_score) as avg_relevance
136
+ FROM rag_query_log
137
+ WHERE query_timestamp >= CURRENT_DATE - INTERVAL '30 days'
138
+ GROUP BY embedding_model
139
+ ORDER BY avg_relevance DESC;
140
+
141
+ -- ================================================================
142
+ -- 9. KNOWLEDGE GAPS
143
+ -- ================================================================
144
+
145
+ -- Identify topics with no good answers
146
+ WITH poor_coverage AS (
147
+ SELECT
148
+ query_text,
149
+ COUNT(*) as frequency,
150
+ AVG(relevance_score) as avg_relevance
151
+ FROM rag_query_log
152
+ WHERE query_timestamp >= CURRENT_DATE - INTERVAL '30 days'
153
+ GROUP BY query_text
154
+ HAVING AVG(relevance_score) < 0.6 AND COUNT(*) > 3
155
+ )
156
+ SELECT
157
+ query_text,
158
+ frequency,
159
+ avg_relevance,
160
+ 'Add documentation' as recommendation
161
+ FROM poor_coverage
162
+ ORDER BY frequency DESC;
163
+
164
+ -- ================================================================
165
+ -- 10. RAG PIPELINE HEALTH
166
+ -- ================================================================
167
+
168
+ -- Daily RAG pipeline health metrics
169
+ SELECT
170
+ DATE(query_timestamp) as date,
171
+ COUNT(*) as total_queries,
172
+ AVG(total_latency_ms) as avg_latency_ms,
173
+ AVG(relevance_score) as avg_relevance,
174
+ PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY total_latency_ms) as p95_latency,
175
+ COUNT(CASE WHEN total_latency_ms > 1000 THEN 1 END) as slow_queries,
176
+ COUNT(CASE WHEN relevance_score < 0.5 THEN 1 END) as low_relevance_queries
177
+ FROM rag_query_log
178
+ WHERE query_timestamp >= CURRENT_DATE - INTERVAL '14 days'
179
+ GROUP BY DATE(query_timestamp)
180
+ ORDER BY date DESC;
181
+
182
+ -- ================================================================
183
+ -- 11. VECTOR DATABASE STATISTICS
184
+ -- ================================================================
185
+
186
+ -- Vector database usage statistics
187
+ SELECT
188
+ collection_name,
189
+ COUNT(DISTINCT vector_id) as total_vectors,
190
+ AVG(vector_dimension) as avg_dimension,
191
+ MAX(last_updated) as last_updated,
192
+ SUM(storage_bytes) / (1024*1024) as storage_mb
193
+ FROM vector_database_collections
194
+ GROUP BY collection_name
195
+ ORDER BY total_vectors DESC;
196
+
197
+ -- ================================================================
198
+ -- 12. COST TRACKING
199
+ -- ================================================================
200
+
201
+ -- Estimated costs by provider
202
+ SELECT
203
+ DATE(query_timestamp) as date,
204
+ llm_provider,
205
+ COUNT(*) as query_count,
206
+ SUM(input_tokens) as total_input_tokens,
207
+ SUM(output_tokens) as total_output_tokens,
208
+ SUM(estimated_cost) as total_cost,
209
+ AVG(estimated_cost) as avg_cost_per_query
210
+ FROM rag_query_log
211
+ WHERE query_timestamp >= CURRENT_DATE - INTERVAL '30 days'
212
+ GROUP BY DATE(query_timestamp), llm_provider
213
+ ORDER BY date DESC, total_cost DESC;