tech-hub-skills 1.2.0 → 1.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/{LICENSE → .claude/LICENSE} +21 -21
  2. package/.claude/README.md +291 -0
  3. package/.claude/bin/cli.js +266 -0
  4. package/{bin → .claude/bin}/copilot.js +182 -182
  5. package/{bin → .claude/bin}/postinstall.js +42 -42
  6. package/{tech_hub_skills/skills → .claude/commands}/README.md +336 -336
  7. package/{tech_hub_skills/skills → .claude/commands}/ai-engineer.md +104 -104
  8. package/{tech_hub_skills/skills → .claude/commands}/aws.md +143 -143
  9. package/{tech_hub_skills/skills → .claude/commands}/azure.md +149 -149
  10. package/{tech_hub_skills/skills → .claude/commands}/backend-developer.md +108 -108
  11. package/{tech_hub_skills/skills → .claude/commands}/code-review.md +399 -399
  12. package/{tech_hub_skills/skills → .claude/commands}/compliance-automation.md +747 -747
  13. package/{tech_hub_skills/skills → .claude/commands}/compliance-officer.md +108 -108
  14. package/{tech_hub_skills/skills → .claude/commands}/data-engineer.md +113 -113
  15. package/{tech_hub_skills/skills → .claude/commands}/data-governance.md +102 -102
  16. package/{tech_hub_skills/skills → .claude/commands}/data-scientist.md +123 -123
  17. package/{tech_hub_skills/skills → .claude/commands}/database-admin.md +109 -109
  18. package/{tech_hub_skills/skills → .claude/commands}/devops.md +160 -160
  19. package/{tech_hub_skills/skills → .claude/commands}/docker.md +160 -160
  20. package/{tech_hub_skills/skills → .claude/commands}/enterprise-dashboard.md +613 -613
  21. package/{tech_hub_skills/skills → .claude/commands}/finops.md +184 -184
  22. package/{tech_hub_skills/skills → .claude/commands}/frontend-developer.md +108 -108
  23. package/{tech_hub_skills/skills → .claude/commands}/gcp.md +143 -143
  24. package/{tech_hub_skills/skills → .claude/commands}/ml-engineer.md +115 -115
  25. package/{tech_hub_skills/skills → .claude/commands}/mlops.md +187 -187
  26. package/{tech_hub_skills/skills → .claude/commands}/network-engineer.md +109 -109
  27. package/{tech_hub_skills/skills → .claude/commands}/optimization-advisor.md +329 -329
  28. package/{tech_hub_skills/skills → .claude/commands}/orchestrator.md +623 -623
  29. package/{tech_hub_skills/skills → .claude/commands}/platform-engineer.md +102 -102
  30. package/{tech_hub_skills/skills → .claude/commands}/process-automation.md +226 -226
  31. package/{tech_hub_skills/skills → .claude/commands}/process-changelog.md +184 -184
  32. package/{tech_hub_skills/skills → .claude/commands}/process-documentation.md +484 -484
  33. package/{tech_hub_skills/skills → .claude/commands}/process-kanban.md +324 -324
  34. package/{tech_hub_skills/skills → .claude/commands}/process-versioning.md +214 -214
  35. package/{tech_hub_skills/skills → .claude/commands}/product-designer.md +104 -104
  36. package/{tech_hub_skills/skills → .claude/commands}/project-starter.md +443 -443
  37. package/{tech_hub_skills/skills → .claude/commands}/qa-engineer.md +109 -109
  38. package/{tech_hub_skills/skills → .claude/commands}/security-architect.md +135 -135
  39. package/{tech_hub_skills/skills → .claude/commands}/sre.md +109 -109
  40. package/{tech_hub_skills/skills → .claude/commands}/system-design.md +126 -126
  41. package/{tech_hub_skills/skills → .claude/commands}/technical-writer.md +101 -101
  42. package/.claude/package.json +46 -0
  43. package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/01-prompt-engineering/README.md +252 -252
  44. package/.claude/roles/ai-engineer/skills/01-prompt-engineering/prompt_ab_tester.py +356 -0
  45. package/.claude/roles/ai-engineer/skills/01-prompt-engineering/prompt_template_manager.py +274 -0
  46. package/.claude/roles/ai-engineer/skills/01-prompt-engineering/token_cost_estimator.py +324 -0
  47. package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/02-rag-pipeline/README.md +448 -448
  48. package/.claude/roles/ai-engineer/skills/02-rag-pipeline/document_chunker.py +336 -0
  49. package/.claude/roles/ai-engineer/skills/02-rag-pipeline/rag_pipeline.sql +213 -0
  50. package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/03-agent-orchestration/README.md +599 -599
  51. package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/04-llm-guardrails/README.md +735 -735
  52. package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/05-vector-embeddings/README.md +711 -711
  53. package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/06-llm-evaluation/README.md +777 -777
  54. package/{tech_hub_skills → .claude}/roles/azure/skills/01-infrastructure-fundamentals/README.md +264 -264
  55. package/{tech_hub_skills → .claude}/roles/azure/skills/02-data-factory/README.md +264 -264
  56. package/{tech_hub_skills → .claude}/roles/azure/skills/03-synapse-analytics/README.md +264 -264
  57. package/{tech_hub_skills → .claude}/roles/azure/skills/04-databricks/README.md +264 -264
  58. package/{tech_hub_skills → .claude}/roles/azure/skills/05-functions/README.md +264 -264
  59. package/{tech_hub_skills → .claude}/roles/azure/skills/06-kubernetes-service/README.md +264 -264
  60. package/{tech_hub_skills → .claude}/roles/azure/skills/07-openai-service/README.md +264 -264
  61. package/{tech_hub_skills → .claude}/roles/azure/skills/08-machine-learning/README.md +264 -264
  62. package/{tech_hub_skills → .claude}/roles/azure/skills/09-storage-adls/README.md +264 -264
  63. package/{tech_hub_skills → .claude}/roles/azure/skills/10-networking/README.md +264 -264
  64. package/{tech_hub_skills → .claude}/roles/azure/skills/11-sql-cosmos/README.md +264 -264
  65. package/{tech_hub_skills → .claude}/roles/azure/skills/12-event-hubs/README.md +264 -264
  66. package/{tech_hub_skills → .claude}/roles/code-review/skills/01-automated-code-review/README.md +394 -394
  67. package/{tech_hub_skills → .claude}/roles/code-review/skills/02-pr-review-workflow/README.md +427 -427
  68. package/{tech_hub_skills → .claude}/roles/code-review/skills/03-code-quality-gates/README.md +518 -518
  69. package/{tech_hub_skills → .claude}/roles/code-review/skills/04-reviewer-assignment/README.md +504 -504
  70. package/{tech_hub_skills → .claude}/roles/code-review/skills/05-review-analytics/README.md +540 -540
  71. package/{tech_hub_skills → .claude}/roles/data-engineer/skills/01-lakehouse-architecture/README.md +550 -550
  72. package/.claude/roles/data-engineer/skills/01-lakehouse-architecture/bronze_ingestion.py +337 -0
  73. package/.claude/roles/data-engineer/skills/01-lakehouse-architecture/medallion_queries.sql +300 -0
  74. package/{tech_hub_skills → .claude}/roles/data-engineer/skills/02-etl-pipeline/README.md +580 -580
  75. package/{tech_hub_skills → .claude}/roles/data-engineer/skills/03-data-quality/README.md +579 -579
  76. package/{tech_hub_skills → .claude}/roles/data-engineer/skills/04-streaming-pipelines/README.md +608 -608
  77. package/{tech_hub_skills → .claude}/roles/data-engineer/skills/05-performance-optimization/README.md +547 -547
  78. package/{tech_hub_skills → .claude}/roles/data-governance/skills/01-data-catalog/README.md +112 -112
  79. package/{tech_hub_skills → .claude}/roles/data-governance/skills/02-data-lineage/README.md +129 -129
  80. package/{tech_hub_skills → .claude}/roles/data-governance/skills/03-data-quality-framework/README.md +182 -182
  81. package/{tech_hub_skills → .claude}/roles/data-governance/skills/04-access-control/README.md +39 -39
  82. package/{tech_hub_skills → .claude}/roles/data-governance/skills/05-master-data-management/README.md +40 -40
  83. package/{tech_hub_skills → .claude}/roles/data-governance/skills/06-compliance-privacy/README.md +46 -46
  84. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/01-eda-automation/README.md +230 -230
  85. package/.claude/roles/data-scientist/skills/01-eda-automation/eda_generator.py +446 -0
  86. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/02-statistical-modeling/README.md +264 -264
  87. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/03-feature-engineering/README.md +264 -264
  88. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/04-predictive-modeling/README.md +264 -264
  89. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/05-customer-analytics/README.md +264 -264
  90. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/06-campaign-analysis/README.md +264 -264
  91. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/07-experimentation/README.md +264 -264
  92. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/08-data-visualization/README.md +264 -264
  93. package/{tech_hub_skills → .claude}/roles/devops/skills/01-cicd-pipeline/README.md +264 -264
  94. package/{tech_hub_skills → .claude}/roles/devops/skills/02-container-orchestration/README.md +264 -264
  95. package/{tech_hub_skills → .claude}/roles/devops/skills/03-infrastructure-as-code/README.md +264 -264
  96. package/{tech_hub_skills → .claude}/roles/devops/skills/04-gitops/README.md +264 -264
  97. package/{tech_hub_skills → .claude}/roles/devops/skills/05-environment-management/README.md +264 -264
  98. package/{tech_hub_skills → .claude}/roles/devops/skills/06-automated-testing/README.md +264 -264
  99. package/{tech_hub_skills → .claude}/roles/devops/skills/07-release-management/README.md +264 -264
  100. package/{tech_hub_skills → .claude}/roles/devops/skills/08-monitoring-alerting/README.md +264 -264
  101. package/{tech_hub_skills → .claude}/roles/devops/skills/09-devsecops/README.md +265 -265
  102. package/{tech_hub_skills → .claude}/roles/finops/skills/01-cost-visibility/README.md +264 -264
  103. package/{tech_hub_skills → .claude}/roles/finops/skills/02-resource-tagging/README.md +264 -264
  104. package/{tech_hub_skills → .claude}/roles/finops/skills/03-budget-management/README.md +264 -264
  105. package/{tech_hub_skills → .claude}/roles/finops/skills/04-reserved-instances/README.md +264 -264
  106. package/{tech_hub_skills → .claude}/roles/finops/skills/05-spot-optimization/README.md +264 -264
  107. package/{tech_hub_skills → .claude}/roles/finops/skills/06-storage-tiering/README.md +264 -264
  108. package/{tech_hub_skills → .claude}/roles/finops/skills/07-compute-rightsizing/README.md +264 -264
  109. package/{tech_hub_skills → .claude}/roles/finops/skills/08-chargeback/README.md +264 -264
  110. package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/01-mlops-pipeline/README.md +566 -566
  111. package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/02-feature-engineering/README.md +655 -655
  112. package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/03-model-training/README.md +704 -704
  113. package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/04-model-serving/README.md +845 -845
  114. package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/05-model-monitoring/README.md +874 -874
  115. package/{tech_hub_skills → .claude}/roles/mlops/skills/01-ml-pipeline-orchestration/README.md +264 -264
  116. package/{tech_hub_skills → .claude}/roles/mlops/skills/02-experiment-tracking/README.md +264 -264
  117. package/{tech_hub_skills → .claude}/roles/mlops/skills/03-model-registry/README.md +264 -264
  118. package/{tech_hub_skills → .claude}/roles/mlops/skills/04-feature-store/README.md +264 -264
  119. package/{tech_hub_skills → .claude}/roles/mlops/skills/05-model-deployment/README.md +264 -264
  120. package/{tech_hub_skills → .claude}/roles/mlops/skills/06-model-observability/README.md +264 -264
  121. package/{tech_hub_skills → .claude}/roles/mlops/skills/07-data-versioning/README.md +264 -264
  122. package/{tech_hub_skills → .claude}/roles/mlops/skills/08-ab-testing/README.md +264 -264
  123. package/{tech_hub_skills → .claude}/roles/mlops/skills/09-automated-retraining/README.md +264 -264
  124. package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/01-internal-developer-platform/README.md +153 -153
  125. package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/02-self-service-infrastructure/README.md +57 -57
  126. package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/03-slo-sli-management/README.md +59 -59
  127. package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/04-developer-experience/README.md +57 -57
  128. package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/05-incident-management/README.md +73 -73
  129. package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/06-capacity-management/README.md +59 -59
  130. package/{tech_hub_skills → .claude}/roles/product-designer/skills/01-requirements-discovery/README.md +407 -407
  131. package/{tech_hub_skills → .claude}/roles/product-designer/skills/02-user-research/README.md +382 -382
  132. package/{tech_hub_skills → .claude}/roles/product-designer/skills/03-brainstorming-ideation/README.md +437 -437
  133. package/{tech_hub_skills → .claude}/roles/product-designer/skills/04-ux-design/README.md +496 -496
  134. package/{tech_hub_skills → .claude}/roles/product-designer/skills/05-product-market-fit/README.md +376 -376
  135. package/{tech_hub_skills → .claude}/roles/product-designer/skills/06-stakeholder-management/README.md +412 -412
  136. package/{tech_hub_skills → .claude}/roles/security-architect/skills/01-pii-detection/README.md +319 -319
  137. package/{tech_hub_skills → .claude}/roles/security-architect/skills/02-threat-modeling/README.md +264 -264
  138. package/{tech_hub_skills → .claude}/roles/security-architect/skills/03-infrastructure-security/README.md +264 -264
  139. package/{tech_hub_skills → .claude}/roles/security-architect/skills/04-iam/README.md +264 -264
  140. package/{tech_hub_skills → .claude}/roles/security-architect/skills/05-application-security/README.md +264 -264
  141. package/{tech_hub_skills → .claude}/roles/security-architect/skills/06-secrets-management/README.md +264 -264
  142. package/{tech_hub_skills → .claude}/roles/security-architect/skills/07-security-monitoring/README.md +264 -264
  143. package/{tech_hub_skills → .claude}/roles/system-design/skills/01-architecture-patterns/README.md +337 -337
  144. package/{tech_hub_skills → .claude}/roles/system-design/skills/02-requirements-engineering/README.md +264 -264
  145. package/{tech_hub_skills → .claude}/roles/system-design/skills/03-scalability/README.md +264 -264
  146. package/{tech_hub_skills → .claude}/roles/system-design/skills/04-high-availability/README.md +264 -264
  147. package/{tech_hub_skills → .claude}/roles/system-design/skills/05-cost-optimization-design/README.md +264 -264
  148. package/{tech_hub_skills → .claude}/roles/system-design/skills/06-api-design/README.md +264 -264
  149. package/{tech_hub_skills → .claude}/roles/system-design/skills/07-observability-architecture/README.md +264 -264
  150. package/{tech_hub_skills → .claude}/roles/system-design/skills/08-process-automation/PROCESS_TEMPLATE.md +336 -336
  151. package/{tech_hub_skills → .claude}/roles/system-design/skills/08-process-automation/README.md +521 -521
  152. package/.claude/roles/system-design/skills/08-process-automation/ai_prompt_generator.py +744 -0
  153. package/.claude/roles/system-design/skills/08-process-automation/automation_recommender.py +688 -0
  154. package/.claude/roles/system-design/skills/08-process-automation/plan_generator.py +679 -0
  155. package/.claude/roles/system-design/skills/08-process-automation/process_analyzer.py +528 -0
  156. package/.claude/roles/system-design/skills/08-process-automation/process_parser.py +684 -0
  157. package/.claude/roles/system-design/skills/08-process-automation/role_matcher.py +615 -0
  158. package/.claude/skills/README.md +336 -0
  159. package/.claude/skills/ai-engineer.md +104 -0
  160. package/.claude/skills/aws.md +143 -0
  161. package/.claude/skills/azure.md +149 -0
  162. package/.claude/skills/backend-developer.md +108 -0
  163. package/.claude/skills/code-review.md +399 -0
  164. package/.claude/skills/compliance-automation.md +747 -0
  165. package/.claude/skills/compliance-officer.md +108 -0
  166. package/.claude/skills/data-engineer.md +113 -0
  167. package/.claude/skills/data-governance.md +102 -0
  168. package/.claude/skills/data-scientist.md +123 -0
  169. package/.claude/skills/database-admin.md +109 -0
  170. package/.claude/skills/devops.md +160 -0
  171. package/.claude/skills/docker.md +160 -0
  172. package/.claude/skills/enterprise-dashboard.md +613 -0
  173. package/.claude/skills/finops.md +184 -0
  174. package/.claude/skills/frontend-developer.md +108 -0
  175. package/.claude/skills/gcp.md +143 -0
  176. package/.claude/skills/ml-engineer.md +115 -0
  177. package/.claude/skills/mlops.md +187 -0
  178. package/.claude/skills/network-engineer.md +109 -0
  179. package/.claude/skills/optimization-advisor.md +329 -0
  180. package/.claude/skills/orchestrator.md +623 -0
  181. package/.claude/skills/platform-engineer.md +102 -0
  182. package/.claude/skills/process-automation.md +226 -0
  183. package/.claude/skills/process-changelog.md +184 -0
  184. package/.claude/skills/process-documentation.md +484 -0
  185. package/.claude/skills/process-kanban.md +324 -0
  186. package/.claude/skills/process-versioning.md +214 -0
  187. package/.claude/skills/product-designer.md +104 -0
  188. package/.claude/skills/project-starter.md +443 -0
  189. package/.claude/skills/qa-engineer.md +109 -0
  190. package/.claude/skills/security-architect.md +135 -0
  191. package/.claude/skills/sre.md +109 -0
  192. package/.claude/skills/system-design.md +126 -0
  193. package/.claude/skills/technical-writer.md +101 -0
  194. package/.gitattributes +2 -0
  195. package/GITHUB_COPILOT.md +106 -0
  196. package/README.md +192 -291
  197. package/package.json +16 -46
  198. package/bin/cli.js +0 -241
@@ -1,448 +1,448 @@
1
- # Skill 2: RAG Pipeline Builder
2
-
3
- ## 🎯 Overview
4
- Build production-grade Retrieval-Augmented Generation (RAG) systems with advanced chunking, vector search, and evaluation.
5
-
6
- ## 🔗 Connections
7
- - **Data Engineer**: Ingests documents from Gold layer, manages vector embeddings (de-01, de-03)
8
- - **Security Architect**: PII masking before indexing, access control for knowledge base (sa-01, sa-02)
9
- - **ML Engineer**: Embedding model serving and optimization (ml-03, ml-04)
10
- - **MLOps**: Embedding model versioning, RAG metrics monitoring (mo-01, mo-04)
11
- - **FinOps**: Embedding cost optimization, vector DB cost tracking (fo-01, fo-07)
12
- - **DevOps**: Containerization, CI/CD for knowledge base updates (do-01, do-03, do-08)
13
- - **Data Scientist**: RAG evaluation metrics and experimentation (ds-01, ds-08)
14
-
15
- ## 🛠️ Tools Included
16
-
17
- ### 1. `document_chunker.py`
18
- Advanced document chunking with semantic, recursive, and fixed-size strategies.
19
-
20
- ### 2. `vector_db_connector.py`
21
- Unified interface for multiple vector databases (Pinecone, Weaviate, Chroma, Qdrant).
22
-
23
- ### 3. `hybrid_search.py`
24
- Hybrid search combining semantic (vector) and keyword (BM25) retrieval.
25
-
26
- ### 4. `rag_evaluator.py`
27
- RAG evaluation metrics: faithfulness, answer relevance, context recall/precision.
28
-
29
- ### 5. `rag_pipeline.sql`
30
- SQL queries for knowledge base auditing and usage analytics.
31
-
32
- ## 📊 Key Metrics
33
- - Retrieval precision@k and recall@k
34
- - Answer faithfulness score
35
- - Context relevance score
36
- - Query latency (p50, p95, p99)
37
-
38
- ## 🚀 Quick Start
39
-
40
- ```python
41
- from rag_pipeline import RAGPipeline
42
-
43
- # Initialize pipeline
44
- rag = RAGPipeline(
45
- vector_db="chroma",
46
- embedding_model="text-embedding-3-large",
47
- chunk_strategy="semantic"
48
- )
49
-
50
- # Index documents
51
- rag.index_documents("./knowledge_base")
52
-
53
- # Query
54
- response = rag.query(
55
- "How do we handle customer churn?",
56
- top_k=5
57
- )
58
-
59
- print(response.answer)
60
- print(f"Sources: {response.sources}")
61
- ```
62
-
63
- ## 📚 Best Practices
64
-
65
- ### Cost Optimization (FinOps Integration)
66
-
67
- 1. **Optimize Embedding Costs**
68
- - Cache embeddings for frequently accessed documents
69
- - Use batch embedding APIs to reduce costs
70
- - Choose appropriate embedding models (balance cost vs quality)
71
- - Track embedding costs per document collection
72
- - Reference: FinOps fo-07 (AI/ML Cost Optimization)
73
-
74
- 2. **Vector Database Cost Management**
75
- - Right-size vector DB instances based on query patterns
76
- - Use compression for vector storage
77
- - Implement tiered storage (hot/warm/cold)
78
- - Monitor vector DB costs and query efficiency
79
- - Reference: FinOps fo-01 (Cost Monitoring), fo-05 (Storage Optimization)
80
-
81
- 3. **Optimize Retrieval Costs**
82
- - Cache frequent queries with semantic similarity
83
- - Implement query result caching
84
- - Use hybrid search (vector + keyword) strategically
85
- - Tune top_k parameter to balance cost and quality
86
- - Reference: FinOps fo-03 (Budget Management)
87
-
88
- 4. **LLM Generation Cost Optimization**
89
- - Cache system prompts and retrieved context
90
- - Use prompt caching for RAG responses (90% savings)
91
- - Right-size context window (don't include unnecessary documents)
92
- - Monitor end-to-end RAG costs per query
93
- - Reference: ai-01 (Prompt Caching), FinOps fo-07
94
-
95
- ### Security & Privacy (Security Architect Integration)
96
-
97
- 5. **PII Detection Before Indexing**
98
- - Scan documents for PII before embedding
99
- - Mask or redact sensitive information
100
- - Maintain PII inventory for compliance
101
- - Implement consent-based indexing
102
- - Reference: Security Architect sa-01 (PII Detection)
103
-
104
- 6. **Access Control for Knowledge Base**
105
- - Implement role-based access control (RBAC)
106
- - Filter retrieval results based on user permissions
107
- - Audit document access and queries
108
- - Encrypt vectors at rest and in transit
109
- - Reference: Security Architect sa-02 (IAM), sa-06 (Data Governance)
110
-
111
- 7. **Prevent Data Leakage**
112
- - Validate retrieved context before LLM generation
113
- - Implement content safety filters
114
- - Monitor for prompt injection attacks
115
- - Log all queries for security auditing
116
- - Reference: Security Architect sa-08 (LLM Security)
117
-
118
- ### Data Quality & Governance (Data Engineer Integration)
119
-
120
- 8. **Document Quality Checks**
121
- - Validate document schemas before ingestion
122
- - Implement data quality rules from Gold layer
123
- - Monitor document freshness and staleness
124
- - Track document lineage and provenance
125
- - Reference: Data Engineer de-03 (Data Quality)
126
-
127
- 9. **Chunking Strategy Optimization**
128
- - Choose chunking strategy based on document type
129
- - Test semantic vs fixed-size vs recursive chunking
130
- - Monitor chunk size distribution
131
- - Optimize chunk overlap for context preservation
132
- - Reference: Data Engineer de-02 (ETL Orchestration)
133
-
134
- ### Model Lifecycle Management (MLOps Integration)
135
-
136
- 10. **Embedding Model Versioning**
137
- - Version embedding models in registry
138
- - Track embedding model changes and impacts
139
- - Implement A/B testing for embedding models
140
- - Re-embed documents when models change
141
- - Reference: MLOps mo-01 (Model Registry), mo-03 (Versioning)
142
-
143
- 11. **RAG Metrics Monitoring**
144
- - Track retrieval precision, recall, MRR
145
- - Monitor answer faithfulness and relevance
146
- - Set up alerts for quality degradation
147
- - Implement continuous RAG evaluation
148
- - Reference: MLOps mo-04 (Monitoring), ML Engineer ml-05
149
-
150
- 12. **Drift Detection**
151
- - Monitor embedding distribution drift
152
- - Detect query pattern changes
153
- - Alert on retrieval quality degradation
154
- - Track answer quality over time
155
- - Reference: MLOps mo-05 (Drift Detection)
156
-
157
- ### Deployment & Operations (DevOps Integration)
158
-
159
- 13. **Containerize RAG Pipeline**
160
- - Package RAG components in containers
161
- - Use Docker for local development
162
- - Deploy to AKS for production
163
- - Implement health checks and readiness probes
164
- - Reference: DevOps do-03 (Containerization)
165
-
166
- 14. **CI/CD for Knowledge Base Updates**
167
- - Automate document ingestion pipelines
168
- - Implement quality gates for new documents
169
- - Use blue-green deployments for index updates
170
- - Rollback capability for bad document batches
171
- - Reference: DevOps do-01 (CI/CD), do-05 (GitOps)
172
-
173
- 15. **Observability & Monitoring**
174
- - Instrument RAG pipeline with OpenTelemetry
175
- - Track end-to-end latency (retrieval + generation)
176
- - Monitor vector DB query performance
177
- - Set up Application Insights dashboards
178
- - Reference: DevOps do-08 (Monitoring & Observability)
179
-
180
- ### Azure-Specific Best Practices
181
-
182
- 16. **Leverage Azure AI Search**
183
- - Use Azure AI Search for hybrid search
184
- - Enable semantic ranking for better retrieval
185
- - Use integrated chunking and vectorization
186
- - Implement Azure RBAC for search indexes
187
- - Reference: Azure az-05 (Azure OpenAI), az-04 (AI/ML Services)
188
-
189
- 17. **Azure OpenAI Integration**
190
- - Use managed identity for authentication
191
- - Enable diagnostic logging
192
- - Implement retry logic with circuit breakers
193
- - Use provisioned throughput for predictable costs
194
- - Reference: Azure az-05, ai-01 (Prompt Engineering)
195
-
196
- ## 💰 Cost Optimization Examples
197
-
198
- ### Embedding Cost Tracking
199
- ```python
200
- from vector_db_connector import VectorDBConnector
201
- from embedding_cost_tracker import EmbeddingCostTracker
202
-
203
- cost_tracker = EmbeddingCostTracker()
204
-
205
- # Track embedding costs
206
- def embed_with_tracking(texts: List[str], model: str = "text-embedding-3-large"):
207
- embeddings = get_embeddings(texts, model)
208
-
209
- # Log costs
210
- cost_tracker.log_embedding_request(
211
- model=model,
212
- num_tokens=sum(len(text.split()) for text in texts),
213
- num_texts=len(texts)
214
- )
215
-
216
- return embeddings
217
-
218
- # Generate monthly cost report
219
- report = cost_tracker.monthly_report()
220
- print(f"Embedding costs: ${report.embedding_cost:.2f}")
221
- print(f"Vector DB costs: ${report.vector_db_cost:.2f}")
222
- print(f"LLM generation costs: ${report.llm_cost:.2f}")
223
- print(f"Total RAG costs: ${report.total_cost:.2f}")
224
-
225
- # Set budget alerts
226
- cost_tracker.set_budget_alert(
227
- monthly_budget=500.00,
228
- alert_threshold=0.8
229
- )
230
- ```
231
-
232
- ### RAG Response Caching (90% Cost Savings)
233
- ```python
234
- from anthropic import Anthropic
235
- from semantic_cache import SemanticCache
236
-
237
- client = Anthropic()
238
- cache = SemanticCache(similarity_threshold=0.95)
239
-
240
- def rag_query_with_caching(query: str, context_docs: List[str]):
241
- # Check semantic cache first
242
- cached_response = cache.get(query)
243
- if cached_response:
244
- return cached_response
245
-
246
- # Build context with caching
247
- context = "\n\n".join(context_docs)
248
-
249
- response = client.messages.create(
250
- model="claude-3-5-sonnet-20241022",
251
- max_tokens=1024,
252
- system=[
253
- {
254
- "type": "text",
255
- "text": "You are a helpful assistant that answers questions based on the provided context.",
256
- "cache_control": {"type": "ephemeral"} # Cache system prompt
257
- },
258
- {
259
- "type": "text",
260
- "text": f"Context:\n{context}",
261
- "cache_control": {"type": "ephemeral"} # Cache retrieved docs
262
- }
263
- ],
264
- messages=[{"role": "user", "content": query}]
265
- )
266
-
267
- # Cache the response
268
- cache.set(query, response.content[0].text)
269
-
270
- return response.content[0].text
271
- ```
272
-
273
- ### Vector DB Cost Optimization
274
- ```python
275
- from vector_db_connector import VectorDBConnector
276
-
277
- # Use tiered storage for cost optimization
278
- db = VectorDBConnector(
279
- provider="pinecone",
280
- tier_config={
281
- "hot": { # Recent documents, high-performance
282
- "age_days": 30,
283
- "index_type": "performance"
284
- },
285
- "warm": { # Older documents, balanced
286
- "age_days": 90,
287
- "index_type": "balanced"
288
- },
289
- "cold": { # Archive, cost-optimized
290
- "age_days": 365,
291
- "index_type": "storage_optimized"
292
- }
293
- }
294
- )
295
-
296
- # Auto-tier documents based on access patterns
297
- db.auto_tier_documents(
298
- access_threshold=10, # Move to cold if <10 accesses in 30 days
299
- review_period_days=30
300
- )
301
-
302
- # Monitor costs
303
- costs = db.get_cost_breakdown()
304
- print(f"Hot tier: ${costs.hot_cost:.2f}")
305
- print(f"Warm tier: ${costs.warm_cost:.2f}")
306
- print(f"Cold tier: ${costs.cold_cost:.2f}")
307
- ```
308
-
309
- ## 🔒 Security Best Practices Examples
310
-
311
- ### PII Detection Before Indexing
312
- ```python
313
- from pii_detector import PIIDetector # from sa-01
314
- from data_anonymizer import DataAnonymizer
315
-
316
- detector = PIIDetector()
317
- anonymizer = DataAnonymizer()
318
-
319
- def safe_index_document(document: str, metadata: dict):
320
- # Detect PII
321
- pii_findings = detector.analyze_text(document)
322
-
323
- if pii_findings:
324
- print(f"⚠️ PII detected: {pii_findings}")
325
-
326
- # Option 1: Anonymize
327
- anonymized_doc = anonymizer.mask_text(document, pii_findings)
328
-
329
- # Option 2: Skip indexing
330
- if pii_findings.severity == "high":
331
- print("❌ Skipping document due to sensitive PII")
332
- return None
333
-
334
- document = anonymized_doc
335
-
336
- # Index the document
337
- rag.index_document(document, metadata)
338
-
339
- # Log for compliance
340
- audit_log.record({
341
- "action": "document_indexed",
342
- "pii_detected": bool(pii_findings),
343
- "pii_types": [f.type for f in pii_findings],
344
- "timestamp": datetime.now()
345
- })
346
- ```
347
-
348
- ## 📊 Enhanced Metrics & Monitoring
349
-
350
- | Metric Category | Metric | Target | Tool |
351
- |-----------------|--------|--------|------|
352
- | **Retrieval Quality** | Precision@5 | >0.8 | Custom evaluator |
353
- | | Recall@10 | >0.9 | Custom evaluator |
354
- | | MRR (Mean Reciprocal Rank) | >0.85 | MLflow |
355
- | **Answer Quality** | Faithfulness | >0.9 | RAG evaluator |
356
- | | Answer relevance | >0.85 | RAG evaluator |
357
- | | Context relevance | >0.8 | RAG evaluator |
358
- | **Performance** | End-to-end latency (p95) | <3s | Azure Monitor |
359
- | | Retrieval latency (p95) | <500ms | App Insights |
360
- | | Generation latency (p95) | <2s | App Insights |
361
- | **Costs** | Cost per query | <$0.02 | FinOps dashboard |
362
- | | Embedding cost per doc | <$0.001 | Cost tracker |
363
- | | Cache hit rate | >70% | App Insights |
364
- | **Security** | PII detection rate | 100% | Security logs |
365
- | | Access control violations | 0 | Azure Monitor |
366
-
367
- ## 🚀 Deployment Pipeline
368
-
369
- ### CI/CD for RAG Knowledge Base
370
- ```yaml
371
- # .github/workflows/rag-deployment.yml
372
- name: RAG Knowledge Base Update
373
-
374
- on:
375
- push:
376
- paths:
377
- - 'knowledge_base/**'
378
- branches:
379
- - main
380
-
381
- jobs:
382
- validate-and-index:
383
- runs-on: ubuntu-latest
384
- steps:
385
- - name: Validate documents
386
- run: python scripts/validate_documents.py
387
-
388
- - name: Run PII detection
389
- run: python scripts/detect_pii.py --fail-on-high-severity
390
-
391
- - name: Check data quality
392
- run: python scripts/check_data_quality.py
393
-
394
- - name: Embed and index documents
395
- run: |
396
- python scripts/embed_documents.py --batch-size 100
397
- python scripts/index_to_vector_db.py --environment staging
398
-
399
- - name: Run RAG evaluation tests
400
- run: pytest tests/test_rag_quality.py --min-score 0.8
401
-
402
- - name: Deploy to production
403
- if: success()
404
- run: python scripts/deploy_index.py --environment production
405
-
406
- - name: Monitor RAG metrics
407
- run: python scripts/monitor_rag.py --duration 1h
408
- ```
409
-
410
- ## 🔄 Integration Workflow
411
-
412
- ### End-to-End RAG Pipeline with All Roles
413
- ```
414
- 1. Data Ingestion (de-01)
415
-
416
- 2. Data Quality Checks (de-03)
417
-
418
- 3. PII Detection & Masking (sa-01)
419
-
420
- 4. Document Chunking (ai-02)
421
-
422
- 5. Embedding Generation (ml-03)
423
-
424
- 6. Vector Indexing (ai-02)
425
-
426
- 7. Embed Cost Tracking (fo-07)
427
-
428
- 8. Deploy via CI/CD (do-01)
429
-
430
- 9. Monitor Quality (mo-04)
431
-
432
- 10. RAG Query with Caching (ai-01, ai-02)
433
-
434
- 11. Monitor Costs (fo-01)
435
-
436
- 12. Detect Drift (mo-05)
437
- ```
438
-
439
- ## 🎯 Quick Wins
440
-
441
- 1. **Enable prompt caching** - 90% cost reduction on RAG responses
442
- 2. **Implement PII detection** - Prevent compliance violations before indexing
443
- 3. **Set up embedding caching** - Reduce re-embedding costs
444
- 4. **Add vector DB cost monitoring** - Track and optimize storage costs
445
- 5. **Implement query result caching** - Reduce costs for frequent queries
446
- 6. **Set up RAG evaluation** - Catch quality degradation early
447
- 7. **Containerize for deployment** - Easier scaling and updates
448
- 8. **Enable Application Insights** - Full observability of RAG pipeline
1
+ # Skill 2: RAG Pipeline Builder
2
+
3
+ ## 🎯 Overview
4
+ Build production-grade Retrieval-Augmented Generation (RAG) systems with advanced chunking, vector search, and evaluation.
5
+
6
+ ## 🔗 Connections
7
+ - **Data Engineer**: Ingests documents from Gold layer, manages vector embeddings (de-01, de-03)
8
+ - **Security Architect**: PII masking before indexing, access control for knowledge base (sa-01, sa-02)
9
+ - **ML Engineer**: Embedding model serving and optimization (ml-03, ml-04)
10
+ - **MLOps**: Embedding model versioning, RAG metrics monitoring (mo-01, mo-04)
11
+ - **FinOps**: Embedding cost optimization, vector DB cost tracking (fo-01, fo-07)
12
+ - **DevOps**: Containerization, CI/CD for knowledge base updates (do-01, do-03, do-08)
13
+ - **Data Scientist**: RAG evaluation metrics and experimentation (ds-01, ds-08)
14
+
15
+ ## 🛠️ Tools Included
16
+
17
+ ### 1. `document_chunker.py`
18
+ Advanced document chunking with semantic, recursive, and fixed-size strategies.
19
+
20
+ ### 2. `vector_db_connector.py`
21
+ Unified interface for multiple vector databases (Pinecone, Weaviate, Chroma, Qdrant).
22
+
23
+ ### 3. `hybrid_search.py`
24
+ Hybrid search combining semantic (vector) and keyword (BM25) retrieval.
25
+
26
+ ### 4. `rag_evaluator.py`
27
+ RAG evaluation metrics: faithfulness, answer relevance, context recall/precision.
28
+
29
+ ### 5. `rag_pipeline.sql`
30
+ SQL queries for knowledge base auditing and usage analytics.
31
+
32
+ ## 📊 Key Metrics
33
+ - Retrieval precision@k and recall@k
34
+ - Answer faithfulness score
35
+ - Context relevance score
36
+ - Query latency (p50, p95, p99)
37
+
38
+ ## 🚀 Quick Start
39
+
40
+ ```python
41
+ from rag_pipeline import RAGPipeline
42
+
43
+ # Initialize pipeline
44
+ rag = RAGPipeline(
45
+ vector_db="chroma",
46
+ embedding_model="text-embedding-3-large",
47
+ chunk_strategy="semantic"
48
+ )
49
+
50
+ # Index documents
51
+ rag.index_documents("./knowledge_base")
52
+
53
+ # Query
54
+ response = rag.query(
55
+ "How do we handle customer churn?",
56
+ top_k=5
57
+ )
58
+
59
+ print(response.answer)
60
+ print(f"Sources: {response.sources}")
61
+ ```
62
+
63
+ ## 📚 Best Practices
64
+
65
+ ### Cost Optimization (FinOps Integration)
66
+
67
+ 1. **Optimize Embedding Costs**
68
+ - Cache embeddings for frequently accessed documents
69
+ - Use batch embedding APIs to reduce costs
70
+ - Choose appropriate embedding models (balance cost vs quality)
71
+ - Track embedding costs per document collection
72
+ - Reference: FinOps fo-07 (AI/ML Cost Optimization)
73
+
74
+ 2. **Vector Database Cost Management**
75
+ - Right-size vector DB instances based on query patterns
76
+ - Use compression for vector storage
77
+ - Implement tiered storage (hot/warm/cold)
78
+ - Monitor vector DB costs and query efficiency
79
+ - Reference: FinOps fo-01 (Cost Monitoring), fo-05 (Storage Optimization)
80
+
81
+ 3. **Optimize Retrieval Costs**
82
+ - Cache frequent queries with semantic similarity
83
+ - Implement query result caching
84
+ - Use hybrid search (vector + keyword) strategically
85
+ - Tune top_k parameter to balance cost and quality
86
+ - Reference: FinOps fo-03 (Budget Management)
87
+
88
+ 4. **LLM Generation Cost Optimization**
89
+ - Cache system prompts and retrieved context
90
+ - Use prompt caching for RAG responses (90% savings)
91
+ - Right-size context window (don't include unnecessary documents)
92
+ - Monitor end-to-end RAG costs per query
93
+ - Reference: ai-01 (Prompt Caching), FinOps fo-07
94
+
95
+ ### Security & Privacy (Security Architect Integration)
96
+
97
+ 5. **PII Detection Before Indexing**
98
+ - Scan documents for PII before embedding
99
+ - Mask or redact sensitive information
100
+ - Maintain PII inventory for compliance
101
+ - Implement consent-based indexing
102
+ - Reference: Security Architect sa-01 (PII Detection)
103
+
104
+ 6. **Access Control for Knowledge Base**
105
+ - Implement role-based access control (RBAC)
106
+ - Filter retrieval results based on user permissions
107
+ - Audit document access and queries
108
+ - Encrypt vectors at rest and in transit
109
+ - Reference: Security Architect sa-02 (IAM), sa-06 (Data Governance)
110
+
111
+ 7. **Prevent Data Leakage**
112
+ - Validate retrieved context before LLM generation
113
+ - Implement content safety filters
114
+ - Monitor for prompt injection attacks
115
+ - Log all queries for security auditing
116
+ - Reference: Security Architect sa-08 (LLM Security)
117
+
118
+ ### Data Quality & Governance (Data Engineer Integration)
119
+
120
+ 8. **Document Quality Checks**
121
+ - Validate document schemas before ingestion
122
+ - Implement data quality rules from Gold layer
123
+ - Monitor document freshness and staleness
124
+ - Track document lineage and provenance
125
+ - Reference: Data Engineer de-03 (Data Quality)
126
+
127
+ 9. **Chunking Strategy Optimization**
128
+ - Choose chunking strategy based on document type
129
+ - Test semantic vs fixed-size vs recursive chunking
130
+ - Monitor chunk size distribution
131
+ - Optimize chunk overlap for context preservation
132
+ - Reference: Data Engineer de-02 (ETL Orchestration)
133
+
134
+ ### Model Lifecycle Management (MLOps Integration)
135
+
136
+ 10. **Embedding Model Versioning**
137
+ - Version embedding models in registry
138
+ - Track embedding model changes and impacts
139
+ - Implement A/B testing for embedding models
140
+ - Re-embed documents when models change
141
+ - Reference: MLOps mo-01 (Model Registry), mo-03 (Versioning)
142
+
143
+ 11. **RAG Metrics Monitoring**
144
+ - Track retrieval precision, recall, MRR
145
+ - Monitor answer faithfulness and relevance
146
+ - Set up alerts for quality degradation
147
+ - Implement continuous RAG evaluation
148
+ - Reference: MLOps mo-04 (Monitoring), ML Engineer ml-05
149
+
150
+ 12. **Drift Detection**
151
+ - Monitor embedding distribution drift
152
+ - Detect query pattern changes
153
+ - Alert on retrieval quality degradation
154
+ - Track answer quality over time
155
+ - Reference: MLOps mo-05 (Drift Detection)
156
+
157
+ ### Deployment & Operations (DevOps Integration)
158
+
159
+ 13. **Containerize RAG Pipeline**
160
+ - Package RAG components in containers
161
+ - Use Docker for local development
162
+ - Deploy to AKS for production
163
+ - Implement health checks and readiness probes
164
+ - Reference: DevOps do-03 (Containerization)
165
+
166
+ 14. **CI/CD for Knowledge Base Updates**
167
+ - Automate document ingestion pipelines
168
+ - Implement quality gates for new documents
169
+ - Use blue-green deployments for index updates
170
+ - Rollback capability for bad document batches
171
+ - Reference: DevOps do-01 (CI/CD), do-05 (GitOps)
172
+
173
+ 15. **Observability & Monitoring**
174
+ - Instrument RAG pipeline with OpenTelemetry
175
+ - Track end-to-end latency (retrieval + generation)
176
+ - Monitor vector DB query performance
177
+ - Set up Application Insights dashboards
178
+ - Reference: DevOps do-08 (Monitoring & Observability)
179
+
180
+ ### Azure-Specific Best Practices
181
+
182
+ 16. **Leverage Azure AI Search**
183
+ - Use Azure AI Search for hybrid search
184
+ - Enable semantic ranking for better retrieval
185
+ - Use integrated chunking and vectorization
186
+ - Implement Azure RBAC for search indexes
187
+ - Reference: Azure az-05 (Azure OpenAI), az-04 (AI/ML Services)
188
+
189
+ 17. **Azure OpenAI Integration**
190
+ - Use managed identity for authentication
191
+ - Enable diagnostic logging
192
+ - Implement retry logic with circuit breakers
193
+ - Use provisioned throughput for predictable costs
194
+ - Reference: Azure az-05, ai-01 (Prompt Engineering)
195
+
196
+ ## 💰 Cost Optimization Examples
197
+
198
+ ### Embedding Cost Tracking
199
+ ```python
200
+ from vector_db_connector import VectorDBConnector
201
+ from embedding_cost_tracker import EmbeddingCostTracker
202
+
203
+ cost_tracker = EmbeddingCostTracker()
204
+
205
+ # Track embedding costs
206
+ def embed_with_tracking(texts: List[str], model: str = "text-embedding-3-large"):
207
+ embeddings = get_embeddings(texts, model)
208
+
209
+ # Log costs
210
+ cost_tracker.log_embedding_request(
211
+ model=model,
212
+ num_tokens=sum(len(text.split()) for text in texts),
213
+ num_texts=len(texts)
214
+ )
215
+
216
+ return embeddings
217
+
218
+ # Generate monthly cost report
219
+ report = cost_tracker.monthly_report()
220
+ print(f"Embedding costs: ${report.embedding_cost:.2f}")
221
+ print(f"Vector DB costs: ${report.vector_db_cost:.2f}")
222
+ print(f"LLM generation costs: ${report.llm_cost:.2f}")
223
+ print(f"Total RAG costs: ${report.total_cost:.2f}")
224
+
225
+ # Set budget alerts
226
+ cost_tracker.set_budget_alert(
227
+ monthly_budget=500.00,
228
+ alert_threshold=0.8
229
+ )
230
+ ```
231
+
232
+ ### RAG Response Caching (90% Cost Savings)
233
+ ```python
234
+ from anthropic import Anthropic
235
+ from semantic_cache import SemanticCache
236
+
237
+ client = Anthropic()
238
+ cache = SemanticCache(similarity_threshold=0.95)
239
+
240
+ def rag_query_with_caching(query: str, context_docs: List[str]):
241
+ # Check semantic cache first
242
+ cached_response = cache.get(query)
243
+ if cached_response:
244
+ return cached_response
245
+
246
+ # Build context with caching
247
+ context = "\n\n".join(context_docs)
248
+
249
+ response = client.messages.create(
250
+ model="claude-3-5-sonnet-20241022",
251
+ max_tokens=1024,
252
+ system=[
253
+ {
254
+ "type": "text",
255
+ "text": "You are a helpful assistant that answers questions based on the provided context.",
256
+ "cache_control": {"type": "ephemeral"} # Cache system prompt
257
+ },
258
+ {
259
+ "type": "text",
260
+ "text": f"Context:\n{context}",
261
+ "cache_control": {"type": "ephemeral"} # Cache retrieved docs
262
+ }
263
+ ],
264
+ messages=[{"role": "user", "content": query}]
265
+ )
266
+
267
+ # Cache the response
268
+ cache.set(query, response.content[0].text)
269
+
270
+ return response.content[0].text
271
+ ```
272
+
273
+ ### Vector DB Cost Optimization
274
+ ```python
275
+ from vector_db_connector import VectorDBConnector
276
+
277
+ # Use tiered storage for cost optimization
278
+ db = VectorDBConnector(
279
+ provider="pinecone",
280
+ tier_config={
281
+ "hot": { # Recent documents, high-performance
282
+ "age_days": 30,
283
+ "index_type": "performance"
284
+ },
285
+ "warm": { # Older documents, balanced
286
+ "age_days": 90,
287
+ "index_type": "balanced"
288
+ },
289
+ "cold": { # Archive, cost-optimized
290
+ "age_days": 365,
291
+ "index_type": "storage_optimized"
292
+ }
293
+ }
294
+ )
295
+
296
+ # Auto-tier documents based on access patterns
297
+ db.auto_tier_documents(
298
+ access_threshold=10, # Move to cold if <10 accesses in 30 days
299
+ review_period_days=30
300
+ )
301
+
302
+ # Monitor costs
303
+ costs = db.get_cost_breakdown()
304
+ print(f"Hot tier: ${costs.hot_cost:.2f}")
305
+ print(f"Warm tier: ${costs.warm_cost:.2f}")
306
+ print(f"Cold tier: ${costs.cold_cost:.2f}")
307
+ ```
308
+
309
+ ## 🔒 Security Best Practices Examples
310
+
311
+ ### PII Detection Before Indexing
312
+ ```python
313
+ from pii_detector import PIIDetector # from sa-01
314
+ from data_anonymizer import DataAnonymizer
315
+
316
+ detector = PIIDetector()
317
+ anonymizer = DataAnonymizer()
318
+
319
+ def safe_index_document(document: str, metadata: dict):
320
+ # Detect PII
321
+ pii_findings = detector.analyze_text(document)
322
+
323
+ if pii_findings:
324
+ print(f"⚠️ PII detected: {pii_findings}")
325
+
326
+ # Option 1: Anonymize
327
+ anonymized_doc = anonymizer.mask_text(document, pii_findings)
328
+
329
+ # Option 2: Skip indexing
330
+ if pii_findings.severity == "high":
331
+ print("❌ Skipping document due to sensitive PII")
332
+ return None
333
+
334
+ document = anonymized_doc
335
+
336
+ # Index the document
337
+ rag.index_document(document, metadata)
338
+
339
+ # Log for compliance
340
+ audit_log.record({
341
+ "action": "document_indexed",
342
+ "pii_detected": bool(pii_findings),
343
+ "pii_types": [f.type for f in pii_findings],
344
+ "timestamp": datetime.now()
345
+ })
346
+ ```
347
+
348
+ ## 📊 Enhanced Metrics & Monitoring
349
+
350
+ | Metric Category | Metric | Target | Tool |
351
+ |-----------------|--------|--------|------|
352
+ | **Retrieval Quality** | Precision@5 | >0.8 | Custom evaluator |
353
+ | | Recall@10 | >0.9 | Custom evaluator |
354
+ | | MRR (Mean Reciprocal Rank) | >0.85 | MLflow |
355
+ | **Answer Quality** | Faithfulness | >0.9 | RAG evaluator |
356
+ | | Answer relevance | >0.85 | RAG evaluator |
357
+ | | Context relevance | >0.8 | RAG evaluator |
358
+ | **Performance** | End-to-end latency (p95) | <3s | Azure Monitor |
359
+ | | Retrieval latency (p95) | <500ms | App Insights |
360
+ | | Generation latency (p95) | <2s | App Insights |
361
+ | **Costs** | Cost per query | <$0.02 | FinOps dashboard |
362
+ | | Embedding cost per doc | <$0.001 | Cost tracker |
363
+ | | Cache hit rate | >70% | App Insights |
364
+ | **Security** | PII detection rate | 100% | Security logs |
365
+ | | Access control violations | 0 | Azure Monitor |
366
+
367
+ ## 🚀 Deployment Pipeline
368
+
369
+ ### CI/CD for RAG Knowledge Base
370
+ ```yaml
371
+ # .github/workflows/rag-deployment.yml
372
+ name: RAG Knowledge Base Update
373
+
374
+ on:
375
+ push:
376
+ paths:
377
+ - 'knowledge_base/**'
378
+ branches:
379
+ - main
380
+
381
+ jobs:
382
+ validate-and-index:
383
+ runs-on: ubuntu-latest
384
+ steps:
385
+ - name: Validate documents
386
+ run: python scripts/validate_documents.py
387
+
388
+ - name: Run PII detection
389
+ run: python scripts/detect_pii.py --fail-on-high-severity
390
+
391
+ - name: Check data quality
392
+ run: python scripts/check_data_quality.py
393
+
394
+ - name: Embed and index documents
395
+ run: |
396
+ python scripts/embed_documents.py --batch-size 100
397
+ python scripts/index_to_vector_db.py --environment staging
398
+
399
+ - name: Run RAG evaluation tests
400
+ run: pytest tests/test_rag_quality.py --min-score 0.8
401
+
402
+ - name: Deploy to production
403
+ if: success()
404
+ run: python scripts/deploy_index.py --environment production
405
+
406
+ - name: Monitor RAG metrics
407
+ run: python scripts/monitor_rag.py --duration 1h
408
+ ```
409
+
410
+ ## 🔄 Integration Workflow
411
+
412
+ ### End-to-End RAG Pipeline with All Roles
413
+ ```
414
+ 1. Data Ingestion (de-01)
415
+
416
+ 2. Data Quality Checks (de-03)
417
+
418
+ 3. PII Detection & Masking (sa-01)
419
+
420
+ 4. Document Chunking (ai-02)
421
+
422
+ 5. Embedding Generation (ml-03)
423
+
424
+ 6. Vector Indexing (ai-02)
425
+
426
+ 7. Embed Cost Tracking (fo-07)
427
+
428
+ 8. Deploy via CI/CD (do-01)
429
+
430
+ 9. Monitor Quality (mo-04)
431
+
432
+ 10. RAG Query with Caching (ai-01, ai-02)
433
+
434
+ 11. Monitor Costs (fo-01)
435
+
436
+ 12. Detect Drift (mo-05)
437
+ ```
438
+
439
+ ## 🎯 Quick Wins
440
+
441
+ 1. **Enable prompt caching** - 90% cost reduction on RAG responses
442
+ 2. **Implement PII detection** - Prevent compliance violations before indexing
443
+ 3. **Set up embedding caching** - Reduce re-embedding costs
444
+ 4. **Add vector DB cost monitoring** - Track and optimize storage costs
445
+ 5. **Implement query result caching** - Reduce costs for frequent queries
446
+ 6. **Set up RAG evaluation** - Catch quality degradation early
447
+ 7. **Containerize for deployment** - Easier scaling and updates
448
+ 8. **Enable Application Insights** - Full observability of RAG pipeline