tech-hub-skills 1.2.0 → 1.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/{LICENSE → .claude/LICENSE} +21 -21
  2. package/.claude/README.md +291 -0
  3. package/.claude/bin/cli.js +266 -0
  4. package/{bin → .claude/bin}/copilot.js +182 -182
  5. package/{bin → .claude/bin}/postinstall.js +42 -42
  6. package/{tech_hub_skills/skills → .claude/commands}/README.md +336 -336
  7. package/{tech_hub_skills/skills → .claude/commands}/ai-engineer.md +104 -104
  8. package/{tech_hub_skills/skills → .claude/commands}/aws.md +143 -143
  9. package/{tech_hub_skills/skills → .claude/commands}/azure.md +149 -149
  10. package/{tech_hub_skills/skills → .claude/commands}/backend-developer.md +108 -108
  11. package/{tech_hub_skills/skills → .claude/commands}/code-review.md +399 -399
  12. package/{tech_hub_skills/skills → .claude/commands}/compliance-automation.md +747 -747
  13. package/{tech_hub_skills/skills → .claude/commands}/compliance-officer.md +108 -108
  14. package/{tech_hub_skills/skills → .claude/commands}/data-engineer.md +113 -113
  15. package/{tech_hub_skills/skills → .claude/commands}/data-governance.md +102 -102
  16. package/{tech_hub_skills/skills → .claude/commands}/data-scientist.md +123 -123
  17. package/{tech_hub_skills/skills → .claude/commands}/database-admin.md +109 -109
  18. package/{tech_hub_skills/skills → .claude/commands}/devops.md +160 -160
  19. package/{tech_hub_skills/skills → .claude/commands}/docker.md +160 -160
  20. package/{tech_hub_skills/skills → .claude/commands}/enterprise-dashboard.md +613 -613
  21. package/{tech_hub_skills/skills → .claude/commands}/finops.md +184 -184
  22. package/{tech_hub_skills/skills → .claude/commands}/frontend-developer.md +108 -108
  23. package/{tech_hub_skills/skills → .claude/commands}/gcp.md +143 -143
  24. package/{tech_hub_skills/skills → .claude/commands}/ml-engineer.md +115 -115
  25. package/{tech_hub_skills/skills → .claude/commands}/mlops.md +187 -187
  26. package/{tech_hub_skills/skills → .claude/commands}/network-engineer.md +109 -109
  27. package/{tech_hub_skills/skills → .claude/commands}/optimization-advisor.md +329 -329
  28. package/{tech_hub_skills/skills → .claude/commands}/orchestrator.md +623 -623
  29. package/{tech_hub_skills/skills → .claude/commands}/platform-engineer.md +102 -102
  30. package/{tech_hub_skills/skills → .claude/commands}/process-automation.md +226 -226
  31. package/{tech_hub_skills/skills → .claude/commands}/process-changelog.md +184 -184
  32. package/{tech_hub_skills/skills → .claude/commands}/process-documentation.md +484 -484
  33. package/{tech_hub_skills/skills → .claude/commands}/process-kanban.md +324 -324
  34. package/{tech_hub_skills/skills → .claude/commands}/process-versioning.md +214 -214
  35. package/{tech_hub_skills/skills → .claude/commands}/product-designer.md +104 -104
  36. package/{tech_hub_skills/skills → .claude/commands}/project-starter.md +443 -443
  37. package/{tech_hub_skills/skills → .claude/commands}/qa-engineer.md +109 -109
  38. package/{tech_hub_skills/skills → .claude/commands}/security-architect.md +135 -135
  39. package/{tech_hub_skills/skills → .claude/commands}/sre.md +109 -109
  40. package/{tech_hub_skills/skills → .claude/commands}/system-design.md +126 -126
  41. package/{tech_hub_skills/skills → .claude/commands}/technical-writer.md +101 -101
  42. package/.claude/package.json +46 -0
  43. package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/01-prompt-engineering/README.md +252 -252
  44. package/.claude/roles/ai-engineer/skills/01-prompt-engineering/prompt_ab_tester.py +356 -0
  45. package/.claude/roles/ai-engineer/skills/01-prompt-engineering/prompt_template_manager.py +274 -0
  46. package/.claude/roles/ai-engineer/skills/01-prompt-engineering/token_cost_estimator.py +324 -0
  47. package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/02-rag-pipeline/README.md +448 -448
  48. package/.claude/roles/ai-engineer/skills/02-rag-pipeline/document_chunker.py +336 -0
  49. package/.claude/roles/ai-engineer/skills/02-rag-pipeline/rag_pipeline.sql +213 -0
  50. package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/03-agent-orchestration/README.md +599 -599
  51. package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/04-llm-guardrails/README.md +735 -735
  52. package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/05-vector-embeddings/README.md +711 -711
  53. package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/06-llm-evaluation/README.md +777 -777
  54. package/{tech_hub_skills → .claude}/roles/azure/skills/01-infrastructure-fundamentals/README.md +264 -264
  55. package/{tech_hub_skills → .claude}/roles/azure/skills/02-data-factory/README.md +264 -264
  56. package/{tech_hub_skills → .claude}/roles/azure/skills/03-synapse-analytics/README.md +264 -264
  57. package/{tech_hub_skills → .claude}/roles/azure/skills/04-databricks/README.md +264 -264
  58. package/{tech_hub_skills → .claude}/roles/azure/skills/05-functions/README.md +264 -264
  59. package/{tech_hub_skills → .claude}/roles/azure/skills/06-kubernetes-service/README.md +264 -264
  60. package/{tech_hub_skills → .claude}/roles/azure/skills/07-openai-service/README.md +264 -264
  61. package/{tech_hub_skills → .claude}/roles/azure/skills/08-machine-learning/README.md +264 -264
  62. package/{tech_hub_skills → .claude}/roles/azure/skills/09-storage-adls/README.md +264 -264
  63. package/{tech_hub_skills → .claude}/roles/azure/skills/10-networking/README.md +264 -264
  64. package/{tech_hub_skills → .claude}/roles/azure/skills/11-sql-cosmos/README.md +264 -264
  65. package/{tech_hub_skills → .claude}/roles/azure/skills/12-event-hubs/README.md +264 -264
  66. package/{tech_hub_skills → .claude}/roles/code-review/skills/01-automated-code-review/README.md +394 -394
  67. package/{tech_hub_skills → .claude}/roles/code-review/skills/02-pr-review-workflow/README.md +427 -427
  68. package/{tech_hub_skills → .claude}/roles/code-review/skills/03-code-quality-gates/README.md +518 -518
  69. package/{tech_hub_skills → .claude}/roles/code-review/skills/04-reviewer-assignment/README.md +504 -504
  70. package/{tech_hub_skills → .claude}/roles/code-review/skills/05-review-analytics/README.md +540 -540
  71. package/{tech_hub_skills → .claude}/roles/data-engineer/skills/01-lakehouse-architecture/README.md +550 -550
  72. package/.claude/roles/data-engineer/skills/01-lakehouse-architecture/bronze_ingestion.py +337 -0
  73. package/.claude/roles/data-engineer/skills/01-lakehouse-architecture/medallion_queries.sql +300 -0
  74. package/{tech_hub_skills → .claude}/roles/data-engineer/skills/02-etl-pipeline/README.md +580 -580
  75. package/{tech_hub_skills → .claude}/roles/data-engineer/skills/03-data-quality/README.md +579 -579
  76. package/{tech_hub_skills → .claude}/roles/data-engineer/skills/04-streaming-pipelines/README.md +608 -608
  77. package/{tech_hub_skills → .claude}/roles/data-engineer/skills/05-performance-optimization/README.md +547 -547
  78. package/{tech_hub_skills → .claude}/roles/data-governance/skills/01-data-catalog/README.md +112 -112
  79. package/{tech_hub_skills → .claude}/roles/data-governance/skills/02-data-lineage/README.md +129 -129
  80. package/{tech_hub_skills → .claude}/roles/data-governance/skills/03-data-quality-framework/README.md +182 -182
  81. package/{tech_hub_skills → .claude}/roles/data-governance/skills/04-access-control/README.md +39 -39
  82. package/{tech_hub_skills → .claude}/roles/data-governance/skills/05-master-data-management/README.md +40 -40
  83. package/{tech_hub_skills → .claude}/roles/data-governance/skills/06-compliance-privacy/README.md +46 -46
  84. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/01-eda-automation/README.md +230 -230
  85. package/.claude/roles/data-scientist/skills/01-eda-automation/eda_generator.py +446 -0
  86. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/02-statistical-modeling/README.md +264 -264
  87. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/03-feature-engineering/README.md +264 -264
  88. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/04-predictive-modeling/README.md +264 -264
  89. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/05-customer-analytics/README.md +264 -264
  90. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/06-campaign-analysis/README.md +264 -264
  91. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/07-experimentation/README.md +264 -264
  92. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/08-data-visualization/README.md +264 -264
  93. package/{tech_hub_skills → .claude}/roles/devops/skills/01-cicd-pipeline/README.md +264 -264
  94. package/{tech_hub_skills → .claude}/roles/devops/skills/02-container-orchestration/README.md +264 -264
  95. package/{tech_hub_skills → .claude}/roles/devops/skills/03-infrastructure-as-code/README.md +264 -264
  96. package/{tech_hub_skills → .claude}/roles/devops/skills/04-gitops/README.md +264 -264
  97. package/{tech_hub_skills → .claude}/roles/devops/skills/05-environment-management/README.md +264 -264
  98. package/{tech_hub_skills → .claude}/roles/devops/skills/06-automated-testing/README.md +264 -264
  99. package/{tech_hub_skills → .claude}/roles/devops/skills/07-release-management/README.md +264 -264
  100. package/{tech_hub_skills → .claude}/roles/devops/skills/08-monitoring-alerting/README.md +264 -264
  101. package/{tech_hub_skills → .claude}/roles/devops/skills/09-devsecops/README.md +265 -265
  102. package/{tech_hub_skills → .claude}/roles/finops/skills/01-cost-visibility/README.md +264 -264
  103. package/{tech_hub_skills → .claude}/roles/finops/skills/02-resource-tagging/README.md +264 -264
  104. package/{tech_hub_skills → .claude}/roles/finops/skills/03-budget-management/README.md +264 -264
  105. package/{tech_hub_skills → .claude}/roles/finops/skills/04-reserved-instances/README.md +264 -264
  106. package/{tech_hub_skills → .claude}/roles/finops/skills/05-spot-optimization/README.md +264 -264
  107. package/{tech_hub_skills → .claude}/roles/finops/skills/06-storage-tiering/README.md +264 -264
  108. package/{tech_hub_skills → .claude}/roles/finops/skills/07-compute-rightsizing/README.md +264 -264
  109. package/{tech_hub_skills → .claude}/roles/finops/skills/08-chargeback/README.md +264 -264
  110. package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/01-mlops-pipeline/README.md +566 -566
  111. package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/02-feature-engineering/README.md +655 -655
  112. package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/03-model-training/README.md +704 -704
  113. package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/04-model-serving/README.md +845 -845
  114. package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/05-model-monitoring/README.md +874 -874
  115. package/{tech_hub_skills → .claude}/roles/mlops/skills/01-ml-pipeline-orchestration/README.md +264 -264
  116. package/{tech_hub_skills → .claude}/roles/mlops/skills/02-experiment-tracking/README.md +264 -264
  117. package/{tech_hub_skills → .claude}/roles/mlops/skills/03-model-registry/README.md +264 -264
  118. package/{tech_hub_skills → .claude}/roles/mlops/skills/04-feature-store/README.md +264 -264
  119. package/{tech_hub_skills → .claude}/roles/mlops/skills/05-model-deployment/README.md +264 -264
  120. package/{tech_hub_skills → .claude}/roles/mlops/skills/06-model-observability/README.md +264 -264
  121. package/{tech_hub_skills → .claude}/roles/mlops/skills/07-data-versioning/README.md +264 -264
  122. package/{tech_hub_skills → .claude}/roles/mlops/skills/08-ab-testing/README.md +264 -264
  123. package/{tech_hub_skills → .claude}/roles/mlops/skills/09-automated-retraining/README.md +264 -264
  124. package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/01-internal-developer-platform/README.md +153 -153
  125. package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/02-self-service-infrastructure/README.md +57 -57
  126. package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/03-slo-sli-management/README.md +59 -59
  127. package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/04-developer-experience/README.md +57 -57
  128. package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/05-incident-management/README.md +73 -73
  129. package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/06-capacity-management/README.md +59 -59
  130. package/{tech_hub_skills → .claude}/roles/product-designer/skills/01-requirements-discovery/README.md +407 -407
  131. package/{tech_hub_skills → .claude}/roles/product-designer/skills/02-user-research/README.md +382 -382
  132. package/{tech_hub_skills → .claude}/roles/product-designer/skills/03-brainstorming-ideation/README.md +437 -437
  133. package/{tech_hub_skills → .claude}/roles/product-designer/skills/04-ux-design/README.md +496 -496
  134. package/{tech_hub_skills → .claude}/roles/product-designer/skills/05-product-market-fit/README.md +376 -376
  135. package/{tech_hub_skills → .claude}/roles/product-designer/skills/06-stakeholder-management/README.md +412 -412
  136. package/{tech_hub_skills → .claude}/roles/security-architect/skills/01-pii-detection/README.md +319 -319
  137. package/{tech_hub_skills → .claude}/roles/security-architect/skills/02-threat-modeling/README.md +264 -264
  138. package/{tech_hub_skills → .claude}/roles/security-architect/skills/03-infrastructure-security/README.md +264 -264
  139. package/{tech_hub_skills → .claude}/roles/security-architect/skills/04-iam/README.md +264 -264
  140. package/{tech_hub_skills → .claude}/roles/security-architect/skills/05-application-security/README.md +264 -264
  141. package/{tech_hub_skills → .claude}/roles/security-architect/skills/06-secrets-management/README.md +264 -264
  142. package/{tech_hub_skills → .claude}/roles/security-architect/skills/07-security-monitoring/README.md +264 -264
  143. package/{tech_hub_skills → .claude}/roles/system-design/skills/01-architecture-patterns/README.md +337 -337
  144. package/{tech_hub_skills → .claude}/roles/system-design/skills/02-requirements-engineering/README.md +264 -264
  145. package/{tech_hub_skills → .claude}/roles/system-design/skills/03-scalability/README.md +264 -264
  146. package/{tech_hub_skills → .claude}/roles/system-design/skills/04-high-availability/README.md +264 -264
  147. package/{tech_hub_skills → .claude}/roles/system-design/skills/05-cost-optimization-design/README.md +264 -264
  148. package/{tech_hub_skills → .claude}/roles/system-design/skills/06-api-design/README.md +264 -264
  149. package/{tech_hub_skills → .claude}/roles/system-design/skills/07-observability-architecture/README.md +264 -264
  150. package/{tech_hub_skills → .claude}/roles/system-design/skills/08-process-automation/PROCESS_TEMPLATE.md +336 -336
  151. package/{tech_hub_skills → .claude}/roles/system-design/skills/08-process-automation/README.md +521 -521
  152. package/.claude/roles/system-design/skills/08-process-automation/ai_prompt_generator.py +744 -0
  153. package/.claude/roles/system-design/skills/08-process-automation/automation_recommender.py +688 -0
  154. package/.claude/roles/system-design/skills/08-process-automation/plan_generator.py +679 -0
  155. package/.claude/roles/system-design/skills/08-process-automation/process_analyzer.py +528 -0
  156. package/.claude/roles/system-design/skills/08-process-automation/process_parser.py +684 -0
  157. package/.claude/roles/system-design/skills/08-process-automation/role_matcher.py +615 -0
  158. package/.claude/skills/README.md +336 -0
  159. package/.claude/skills/ai-engineer.md +104 -0
  160. package/.claude/skills/aws.md +143 -0
  161. package/.claude/skills/azure.md +149 -0
  162. package/.claude/skills/backend-developer.md +108 -0
  163. package/.claude/skills/code-review.md +399 -0
  164. package/.claude/skills/compliance-automation.md +747 -0
  165. package/.claude/skills/compliance-officer.md +108 -0
  166. package/.claude/skills/data-engineer.md +113 -0
  167. package/.claude/skills/data-governance.md +102 -0
  168. package/.claude/skills/data-scientist.md +123 -0
  169. package/.claude/skills/database-admin.md +109 -0
  170. package/.claude/skills/devops.md +160 -0
  171. package/.claude/skills/docker.md +160 -0
  172. package/.claude/skills/enterprise-dashboard.md +613 -0
  173. package/.claude/skills/finops.md +184 -0
  174. package/.claude/skills/frontend-developer.md +108 -0
  175. package/.claude/skills/gcp.md +143 -0
  176. package/.claude/skills/ml-engineer.md +115 -0
  177. package/.claude/skills/mlops.md +187 -0
  178. package/.claude/skills/network-engineer.md +109 -0
  179. package/.claude/skills/optimization-advisor.md +329 -0
  180. package/.claude/skills/orchestrator.md +623 -0
  181. package/.claude/skills/platform-engineer.md +102 -0
  182. package/.claude/skills/process-automation.md +226 -0
  183. package/.claude/skills/process-changelog.md +184 -0
  184. package/.claude/skills/process-documentation.md +484 -0
  185. package/.claude/skills/process-kanban.md +324 -0
  186. package/.claude/skills/process-versioning.md +214 -0
  187. package/.claude/skills/product-designer.md +104 -0
  188. package/.claude/skills/project-starter.md +443 -0
  189. package/.claude/skills/qa-engineer.md +109 -0
  190. package/.claude/skills/security-architect.md +135 -0
  191. package/.claude/skills/sre.md +109 -0
  192. package/.claude/skills/system-design.md +126 -0
  193. package/.claude/skills/technical-writer.md +101 -0
  194. package/.gitattributes +2 -0
  195. package/GITHUB_COPILOT.md +106 -0
  196. package/README.md +192 -291
  197. package/package.json +16 -46
  198. package/bin/cli.js +0 -241
@@ -1,711 +1,711 @@
1
- # Skill 5: Vector Embeddings & Search
2
-
3
- ## 🎯 Overview
4
- Master vector embeddings, semantic search, and similarity matching for building intelligent search systems, recommendations, and clustering applications at scale.
5
-
6
- ## 🔗 Connections
7
- - **Data Engineer**: Embedding data pipelines, vector storage optimization (de-01, de-03)
8
- - **Security Architect**: Secure embedding APIs, access control for vector indices (sa-02, sa-06)
9
- - **ML Engineer**: Embedding model fine-tuning and deployment (ml-03, ml-04)
10
- - **MLOps**: Embedding model versioning, drift detection (mo-01, mo-05)
11
- - **FinOps**: Embedding cost optimization, vector DB cost management (fo-01, fo-07)
12
- - **DevOps**: Vector DB deployment, index update automation (do-01, do-03)
13
- - **Data Scientist**: Dimensionality reduction, clustering analysis (ds-01, ds-08)
14
-
15
- ## 🛠️ Tools Included
16
-
17
- ### 1. `embedding_generator.py`
18
- Multi-provider embedding generation (OpenAI, Cohere, Azure OpenAI, sentence-transformers).
19
-
20
- ### 2. `vector_index_manager.py`
21
- Unified interface for vector databases (Pinecone, Weaviate, Qdrant, Chroma, FAISS).
22
-
23
- ### 3. `semantic_search.py`
24
- Advanced semantic search with hybrid retrieval, re-ranking, and filtering.
25
-
26
- ### 4. `embedding_evaluator.py`
27
- Evaluate embedding quality with retrieval metrics, clustering scores, and similarity distributions.
28
-
29
- ### 5. `vector_compression.py`
30
- Dimension reduction and quantization for cost-efficient vector storage.
31
-
32
- ## 📊 Key Metrics
33
- - Retrieval precision@k and recall@k
34
- - Mean Reciprocal Rank (MRR)
35
- - Embedding generation latency
36
- - Vector DB query latency (p50, p95)
37
- - Storage cost per million vectors
38
-
39
- ## 🚀 Quick Start
40
-
41
- ```python
42
- from embedding_generator import EmbeddingGenerator
43
- from vector_index_manager import VectorIndexManager
44
-
45
- # Initialize embedding generator
46
- embedder = EmbeddingGenerator(
47
- provider="azure_openai",
48
- model="text-embedding-3-large",
49
- dimensions=1536
50
- )
51
-
52
- # Initialize vector index
53
- vector_db = VectorIndexManager(
54
- provider="pinecone",
55
- index_name="product-search",
56
- dimension=1536,
57
- metric="cosine"
58
- )
59
-
60
- # Index documents
61
- documents = [
62
- {"id": "1", "text": "High-performance laptop with 32GB RAM"},
63
- {"id": "2", "text": "Lightweight tablet with long battery life"},
64
- {"id": "3", "text": "Professional gaming desktop with RTX 4090"}
65
- ]
66
-
67
- for doc in documents:
68
- embedding = embedder.embed(doc["text"])
69
- vector_db.upsert(
70
- id=doc["id"],
71
- vector=embedding,
72
- metadata={"text": doc["text"]}
73
- )
74
-
75
- # Semantic search
76
- query = "portable computer for work"
77
- query_embedding = embedder.embed(query)
78
-
79
- results = vector_db.search(
80
- query_vector=query_embedding,
81
- top_k=3,
82
- filter={"category": "electronics"}
83
- )
84
-
85
- for result in results:
86
- print(f"Score: {result.score:.3f} - {result.metadata['text']}")
87
- ```
88
-
89
- ## 📚 Best Practices
90
-
91
- ### Cost Optimization (FinOps Integration)
92
-
93
- 1. **Optimize Embedding Generation Costs**
94
- - Batch embed documents (up to 2048 per request)
95
- - Cache embeddings for reused content
96
- - Choose cost-effective models (ada vs large)
97
- - Monitor embedding costs per collection
98
- - Reference: FinOps fo-07 (AI/ML Cost Optimization)
99
-
100
- 2. **Vector Database Cost Management**
101
- - Use quantization to reduce storage by 75%
102
- - Implement tiered storage (hot/warm/cold)
103
- - Right-size index replicas
104
- - Monitor query costs and optimize indices
105
- - Reference: FinOps fo-05 (Storage Optimization)
106
-
107
- 3. **Dimension Reduction Strategies**
108
- - Reduce dimensions with PCA/UMAP
109
- - Use matryoshka embeddings for flexible dimensions
110
- - Test quality vs cost tradeoffs
111
- - Monitor retrieval quality after reduction
112
- - Reference: FinOps fo-01 (Cost Monitoring)
113
-
114
- 4. **Smart Caching Strategy**
115
- - Cache frequent query embeddings
116
- - Implement approximate caching with similarity threshold
117
- - Cache re-ranking results
118
- - Monitor cache hit rates
119
- - Reference: ai-01 (Prompt Caching)
120
-
121
- ### Security & Privacy (Security Architect Integration)
122
-
123
- 5. **Secure Embedding APIs**
124
- - Use managed identity for Azure OpenAI
125
- - Rotate API keys regularly
126
- - Implement rate limiting
127
- - Monitor for API abuse
128
- - Reference: Security Architect sa-02 (IAM)
129
-
130
- 6. **Access Control for Vector Indices**
131
- - Implement RBAC for vector databases
132
- - Filter search results by user permissions
133
- - Encrypt vectors at rest and in transit
134
- - Audit vector access patterns
135
- - Reference: Security Architect sa-06 (Data Governance)
136
-
137
- 7. **Prevent Embedding Inversion Attacks**
138
- - Don't expose raw embeddings to users
139
- - Monitor for adversarial queries
140
- - Implement query result limits
141
- - Add noise to embeddings if needed
142
- - Reference: Security Architect sa-08 (LLM Security)
143
-
144
- ### Data Quality & Governance (Data Engineer Integration)
145
-
146
- 8. **Document Preprocessing Quality**
147
- - Clean and normalize text before embedding
148
- - Handle multiple languages consistently
149
- - Remove low-quality or duplicate documents
150
- - Track document freshness
151
- - Reference: Data Engineer de-03 (Data Quality)
152
-
153
- 9. **Embedding Data Pipeline**
154
- - Automate document ingestion and embedding
155
- - Implement incremental embedding updates
156
- - Version embedding datasets
157
- - Monitor pipeline health
158
- - Reference: Data Engineer de-01 (Data Ingestion), de-02 (ETL)
159
-
160
- ### Model Lifecycle Management (MLOps Integration)
161
-
162
- 10. **Embedding Model Versioning**
163
- - Track embedding model versions in registry
164
- - Version vector indices by embedding model
165
- - A/B test embedding model changes
166
- - Maintain backward compatibility
167
- - Reference: MLOps mo-01 (Model Registry), mo-03 (Versioning)
168
-
169
- 11. **Retrieval Quality Monitoring**
170
- - Track precision, recall, MRR over time
171
- - Monitor query latency and success rates
172
- - Set up alerts for degradation
173
- - Continuous evaluation with test queries
174
- - Reference: MLOps mo-04 (Monitoring)
175
-
176
- 12. **Embedding Drift Detection**
177
- - Monitor embedding distribution shifts
178
- - Detect query pattern changes
179
- - Alert on retrieval quality degradation
180
- - Trigger retraining when needed
181
- - Reference: MLOps mo-05 (Drift Detection)
182
-
183
- ### Deployment & Operations (DevOps Integration)
184
-
185
- 13. **Containerize Vector Services**
186
- - Package embedding services in containers
187
- - Use Docker for local development
188
- - Deploy to AKS for production
189
- - Implement auto-scaling based on load
190
- - Reference: DevOps do-03 (Containerization)
191
-
192
- 14. **CI/CD for Vector Indices**
193
- - Automate index creation and updates
194
- - Implement blue-green deployments
195
- - Test index quality before promotion
196
- - Rollback capability for bad updates
197
- - Reference: DevOps do-01 (CI/CD)
198
-
199
- 15. **Observability for Vector Search**
200
- - Instrument with OpenTelemetry
201
- - Track end-to-end search latency
202
- - Monitor vector DB performance
203
- - Set up Application Insights dashboards
204
- - Reference: DevOps do-08 (Monitoring & Observability)
205
-
206
- ### Azure-Specific Best Practices
207
-
208
- 16. **Azure AI Search Integration**
209
- - Use built-in vectorization in Azure AI Search
210
- - Leverage semantic ranking
211
- - Implement hybrid search (vector + keyword)
212
- - Use managed identity for security
213
- - Reference: Azure az-05 (Azure OpenAI), az-04 (AI/ML Services)
214
-
215
- 17. **Azure OpenAI Embeddings**
216
- - Use Azure OpenAI for embeddings
217
- - Enable diagnostic logging
218
- - Implement retry logic with exponential backoff
219
- - Use provisioned throughput for predictable costs
220
- - Reference: Azure az-05 (Azure OpenAI)
221
-
222
- ## 💰 Cost Optimization Examples
223
-
224
- ### Batch Embedding with Cost Tracking
225
- ```python
226
- from embedding_generator import EmbeddingGenerator
227
- from cost_tracker import EmbeddingCostTracker
228
-
229
- embedder = EmbeddingGenerator(
230
- provider="azure_openai",
231
- model="text-embedding-3-large"
232
- )
233
-
234
- cost_tracker = EmbeddingCostTracker()
235
-
236
- def embed_documents_efficiently(documents: List[str], batch_size: int = 100):
237
- """Batch embed documents with cost tracking."""
238
- all_embeddings = []
239
-
240
- for i in range(0, len(documents), batch_size):
241
- batch = documents[i:i + batch_size]
242
-
243
- # Batch embedding (more cost-effective)
244
- embeddings = embedder.embed_batch(batch)
245
- all_embeddings.extend(embeddings)
246
-
247
- # Track costs
248
- num_tokens = sum(len(doc.split()) * 1.3 for doc in batch) # Estimate
249
- cost_tracker.log_embedding_request(
250
- model="text-embedding-3-large",
251
- num_tokens=num_tokens,
252
- num_texts=len(batch)
253
- )
254
-
255
- # Report costs
256
- report = cost_tracker.get_report()
257
- print(f"📊 Embedding Cost Report:")
258
- print(f" Documents embedded: {len(documents)}")
259
- print(f" Total cost: ${report.total_cost:.4f}")
260
- print(f" Cost per document: ${report.cost_per_doc:.6f}")
261
-
262
- return all_embeddings
263
-
264
- # Compare costs
265
- # Single requests: 1000 docs × $0.00013 = $0.130
266
- # Batch requests: 10 batches × $0.00011 = $0.011 (85% savings)
267
- ```
268
-
269
- ### Vector Compression for Storage Savings
270
- ```python
271
- from vector_compression import VectorCompressor
272
- import numpy as np
273
-
274
- class CompressedVectorIndex:
275
- def __init__(self, index_manager, compression_type="quantization"):
276
- self.index = index_manager
277
- self.compressor = VectorCompressor(
278
- method=compression_type,
279
- bits=8 # 8-bit quantization (75% storage reduction)
280
- )
281
-
282
- def upsert(self, id: str, vector: np.ndarray, metadata: dict):
283
- """Upsert compressed vector."""
284
- # Compress vector (1536 dims × 4 bytes → 1536 dims × 1 byte)
285
- compressed = self.compressor.compress(vector)
286
-
287
- # Store compressed vector
288
- self.index.upsert(
289
- id=id,
290
- vector=compressed,
291
- metadata=metadata
292
- )
293
-
294
- def search(self, query_vector: np.ndarray, top_k: int = 10):
295
- """Search with query vector."""
296
- # Compress query
297
- compressed_query = self.compressor.compress(query_vector)
298
-
299
- # Search
300
- results = self.index.search(
301
- query_vector=compressed_query,
302
- top_k=top_k
303
- )
304
-
305
- return results
306
-
307
- # Storage cost comparison:
308
- # Uncompressed: 1M vectors × 1536 dims × 4 bytes = 6.144 GB → $15/month
309
- # Quantized (8-bit): 1M vectors × 1536 dims × 1 byte = 1.536 GB → $4/month
310
- # Savings: 75% reduction
311
- ```
312
-
313
- ### Tiered Vector Storage
314
- ```python
315
- from vector_index_manager import VectorIndexManager
316
-
317
- class TieredVectorStorage:
318
- def __init__(self):
319
- # Hot tier: Recent, frequently accessed (high performance)
320
- self.hot_index = VectorIndexManager(
321
- provider="pinecone",
322
- index_type="performance",
323
- replicas=3
324
- )
325
-
326
- # Warm tier: Older, occasionally accessed (balanced)
327
- self.warm_index = VectorIndexManager(
328
- provider="pinecone",
329
- index_type="balanced",
330
- replicas=2
331
- )
332
-
333
- # Cold tier: Archive, rarely accessed (cost-optimized)
334
- self.cold_index = VectorIndexManager(
335
- provider="qdrant", # Cheaper option
336
- index_type="storage_optimized",
337
- quantization=True
338
- )
339
-
340
- def auto_tier_vectors(self, age_days: int, access_count: int):
341
- """Automatically move vectors to appropriate tier."""
342
- if age_days <= 30 or access_count > 100:
343
- return self.hot_index # $0.096/hour
344
-
345
- elif age_days <= 90 or access_count > 10:
346
- return self.warm_index # $0.048/hour
347
-
348
- else:
349
- return self.cold_index # $0.012/hour
350
-
351
- def search_all_tiers(self, query_vector: np.ndarray, top_k: int = 10):
352
- """Search across all tiers with priority."""
353
- # Search hot tier first
354
- hot_results = self.hot_index.search(query_vector, top_k)
355
-
356
- if len(hot_results) >= top_k:
357
- return hot_results
358
-
359
- # Search warm tier if needed
360
- warm_results = self.warm_index.search(query_vector, top_k)
361
-
362
- # Combine and re-rank
363
- all_results = self._merge_results(hot_results, warm_results)
364
-
365
- return all_results[:top_k]
366
-
367
- # Cost comparison:
368
- # All hot: 1M vectors → $70/month
369
- # Tiered (70% warm, 20% cold): $30/month (57% savings)
370
- ```
371
-
372
- ### Embedding Cache Implementation
373
- ```python
374
- from functools import lru_cache
375
- import hashlib
376
- from semantic_cache import SemanticCache
377
-
378
- class CachedEmbeddingGenerator:
379
- def __init__(self, provider: str, model: str):
380
- self.embedder = EmbeddingGenerator(provider, model)
381
- self.exact_cache = {} # Exact match cache
382
- self.semantic_cache = SemanticCache(
383
- similarity_threshold=0.99,
384
- max_size=10000
385
- )
386
-
387
- def embed(self, text: str) -> np.ndarray:
388
- """Generate embedding with caching."""
389
- # Check exact cache
390
- text_hash = hashlib.md5(text.encode()).hexdigest()
391
- if text_hash in self.exact_cache:
392
- print("✅ Exact cache hit")
393
- return self.exact_cache[text_hash]
394
-
395
- # Check semantic cache for similar text
396
- cached_embedding = self.semantic_cache.get(text)
397
- if cached_embedding is not None:
398
- print("✅ Semantic cache hit")
399
- return cached_embedding
400
-
401
- # Generate new embedding
402
- embedding = self.embedder.embed(text)
403
-
404
- # Cache the result
405
- self.exact_cache[text_hash] = embedding
406
- self.semantic_cache.set(text, embedding)
407
-
408
- return embedding
409
-
410
- def get_cache_stats(self):
411
- """Get cache performance statistics."""
412
- return {
413
- "exact_cache_size": len(self.exact_cache),
414
- "semantic_cache_size": self.semantic_cache.size(),
415
- "cache_hit_rate": self.semantic_cache.hit_rate(),
416
- "cost_saved": self.semantic_cache.cost_saved()
417
- }
418
-
419
- # Usage
420
- embedder = CachedEmbeddingGenerator("azure_openai", "text-embedding-3-large")
421
-
422
- # First call: Cache miss, generates embedding
423
- emb1 = embedder.embed("machine learning tutorial")
424
-
425
- # Second call: Exact cache hit, no cost
426
- emb2 = embedder.embed("machine learning tutorial")
427
-
428
- # Similar call: Semantic cache hit, no cost
429
- emb3 = embedder.embed("tutorial on machine learning")
430
-
431
- # Report
432
- stats = embedder.get_cache_stats()
433
- print(f"Cache hit rate: {stats['cache_hit_rate']:.2%}")
434
- print(f"Cost saved: ${stats['cost_saved']:.4f}")
435
- ```
436
-
437
- ## 🔒 Security Best Practices Examples
438
-
439
- ### Secure Embedding API Access
440
- ```python
441
- from azure.identity import DefaultAzureCredential
442
- from embedding_generator import EmbeddingGenerator
443
-
444
- class SecureEmbeddingGenerator:
445
- def __init__(self):
446
- # Use managed identity (no API keys in code)
447
- self.credential = DefaultAzureCredential()
448
-
449
- self.embedder = EmbeddingGenerator(
450
- provider="azure_openai",
451
- credential=self.credential,
452
- endpoint="https://my-openai.openai.azure.com/",
453
- api_version="2024-02-01"
454
- )
455
-
456
- # Rate limiting
457
- self.rate_limiter = RateLimiter(
458
- requests_per_minute=60,
459
- requests_per_day=10000
460
- )
461
-
462
- def embed_with_security(self, text: str, user_id: str):
463
- """Generate embedding with security controls."""
464
- # Rate limiting per user
465
- if not self.rate_limiter.check(user_id):
466
- raise RateLimitError(f"Rate limit exceeded for user {user_id}")
467
-
468
- # Input validation
469
- if len(text) > 8191: # Max tokens for text-embedding-3-large
470
- raise ValueError("Text exceeds maximum length")
471
-
472
- # Generate embedding
473
- embedding = self.embedder.embed(text)
474
-
475
- # Audit logging
476
- self.audit_log.record({
477
- "timestamp": datetime.now(),
478
- "user_id": user_id,
479
- "text_length": len(text),
480
- "model": "text-embedding-3-large",
481
- "success": True
482
- })
483
-
484
- return embedding
485
-
486
- # Usage with managed identity
487
- embedder = SecureEmbeddingGenerator()
488
- embedding = embedder.embed_with_security(
489
- text="sensitive document content",
490
- user_id="user_123"
491
- )
492
- ```
493
-
494
- ### Access-Controlled Vector Search
495
- ```python
496
- from vector_index_manager import VectorIndexManager
497
-
498
- class SecureVectorSearch:
499
- def __init__(self, index_manager):
500
- self.index = index_manager
501
-
502
- def search_with_rbac(
503
- self,
504
- query_vector: np.ndarray,
505
- user_permissions: List[str],
506
- top_k: int = 10
507
- ):
508
- """Search with role-based access control."""
509
- # Retrieve more results than needed for filtering
510
- raw_results = self.index.search(
511
- query_vector=query_vector,
512
- top_k=top_k * 3 # Over-fetch for filtering
513
- )
514
-
515
- # Filter based on user permissions
516
- filtered_results = []
517
- for result in raw_results:
518
- # Check if user has access to this document
519
- required_permission = result.metadata.get("required_permission")
520
-
521
- if required_permission in user_permissions or "admin" in user_permissions:
522
- filtered_results.append(result)
523
-
524
- if len(filtered_results) >= top_k:
525
- break
526
-
527
- # Audit log
528
- self.audit_log.record({
529
- "timestamp": datetime.now(),
530
- "user_permissions": user_permissions,
531
- "results_returned": len(filtered_results),
532
- "results_filtered": len(raw_results) - len(filtered_results)
533
- })
534
-
535
- return filtered_results[:top_k]
536
-
537
- # Usage
538
- search = SecureVectorSearch(vector_db)
539
-
540
- results = search.search_with_rbac(
541
- query_vector=query_embedding,
542
- user_permissions=["read:public", "read:internal"],
543
- top_k=10
544
- )
545
- ```
546
-
547
- ## 📊 Enhanced Metrics & Monitoring
548
-
549
- | Metric Category | Metric | Target | Tool |
550
- |-----------------|--------|--------|------|
551
- | **Retrieval Quality** | Precision@10 | >0.85 | Custom evaluator |
552
- | | Recall@10 | >0.90 | Custom evaluator |
553
- | | Mean Reciprocal Rank (MRR) | >0.80 | MLflow |
554
- | | NDCG@10 | >0.85 | Custom evaluator |
555
- | **Performance** | Embedding generation (p95) | <100ms | Azure Monitor |
556
- | | Vector search latency (p95) | <50ms | App Insights |
557
- | | Index update latency | <200ms | Custom monitor |
558
- | **Costs** | Embedding cost per 1K docs | <$0.13 | Cost tracker |
559
- | | Storage cost per 1M vectors | <$5/month | FinOps dashboard |
560
- | | Query cost per 1K searches | <$0.50 | Cost analyzer |
561
- | | Cache hit rate | >70% | Redis metrics |
562
- | **Quality** | Embedding distribution stability | >0.95 | MLflow |
563
- | | Duplicate detection rate | >0.98 | Custom monitor |
564
- | | Null embedding rate | 0% | Data quality |
565
- | **Security** | API rate limit violations | 0 | Azure Monitor |
566
- | | Unauthorized access attempts | 0 | Security logs |
567
-
568
- ## 🚀 Deployment Pipeline
569
-
570
- ### CI/CD for Vector Search System
571
- ```yaml
572
- # .github/workflows/vector-search-deployment.yml
573
- name: Vector Search Deployment
574
-
575
- on:
576
- push:
577
- paths:
578
- - 'embeddings/**'
579
- - 'vector_db/**'
580
- branches:
581
- - main
582
-
583
- jobs:
584
- test-embeddings:
585
- runs-on: ubuntu-latest
586
- steps:
587
- - name: Unit test embedding generation
588
- run: pytest tests/test_embeddings.py -v
589
-
590
- - name: Test vector operations
591
- run: pytest tests/test_vector_ops.py -v
592
-
593
- - name: Benchmark embedding quality
594
- run: python scripts/benchmark_embeddings.py
595
-
596
- - name: Test retrieval quality
597
- run: pytest tests/test_retrieval_quality.py --min-precision 0.85
598
-
599
- validate-vector-db:
600
- runs-on: ubuntu-latest
601
- steps:
602
- - name: Test vector DB connections
603
- run: pytest tests/test_vector_db.py
604
-
605
- - name: Validate index schema
606
- run: python scripts/validate_index_schema.py
607
-
608
- - name: Test search performance
609
- run: python scripts/benchmark_search.py --max-latency-ms 50
610
-
611
- deploy-to-staging:
612
- needs: [test-embeddings, validate-vector-db]
613
- runs-on: ubuntu-latest
614
- steps:
615
- - name: Build embedding service
616
- run: docker build -t embedding-service:${{ github.sha }} .
617
-
618
- - name: Push to registry
619
- run: |
620
- az acr login --name myregistry
621
- docker push myregistry.azurecr.io/embedding-service:${{ github.sha }}
622
-
623
- - name: Deploy to AKS staging
624
- run: |
625
- kubectl set image deployment/embedding-service \
626
- embedding-service=myregistry.azurecr.io/embedding-service:${{ github.sha }} \
627
- --namespace staging
628
-
629
- - name: Create staging vector index
630
- run: python scripts/create_index.py --environment staging
631
-
632
- - name: Run integration tests
633
- run: pytest tests/integration/ --environment staging
634
-
635
- deploy-to-production:
636
- needs: deploy-to-staging
637
- runs-on: ubuntu-latest
638
- environment: production
639
- steps:
640
- - name: Blue-green index swap
641
- run: python scripts/blue_green_index_swap.py
642
-
643
- - name: Deploy embedding service
644
- run: |
645
- kubectl set image deployment/embedding-service \
646
- embedding-service=myregistry.azurecr.io/embedding-service:${{ github.sha }} \
647
- --namespace production
648
-
649
- - name: Monitor search quality
650
- run: python scripts/monitor_search_quality.py --duration 1h
651
-
652
- - name: Rollback if quality degrades
653
- if: failure()
654
- run: python scripts/rollback_index.py
655
- ```
656
-
657
- ## 🔄 Integration Workflow
658
-
659
- ### End-to-End Vector Search Pipeline with All Roles
660
- ```
661
- 1. Document Ingestion (de-01)
662
-
663
- 2. Data Quality Checks (de-03)
664
-
665
- 3. Text Preprocessing & Cleaning
666
-
667
- 4. Check Embedding Cache (ai-05)
668
-
669
- 5. Batch Embedding Generation (ai-05)
670
-
671
- 6. Embedding Cost Tracking (fo-07)
672
-
673
- 7. Vector Compression (optional) (fo-05)
674
-
675
- 8. Vector Index Upsert (ai-05)
676
-
677
- 9. Access Control Metadata (sa-02)
678
-
679
- 10. Index Health Check (mo-04)
680
-
681
- 11. Query Received
682
-
683
- 12. User Permission Validation (sa-02)
684
-
685
- 13. Query Embedding Generation (ai-05)
686
-
687
- 14. Semantic Search Execution (ai-05)
688
-
689
- 15. RBAC Result Filtering (sa-06)
690
-
691
- 16. Re-ranking (optional) (ai-02)
692
-
693
- 17. Results Caching (ai-01)
694
-
695
- 18. Search Quality Metrics (mo-04)
696
-
697
- 19. Cost Attribution (fo-01)
698
-
699
- 20. Embedding Drift Detection (mo-05)
700
- ```
701
-
702
- ## 🎯 Quick Wins
703
-
704
- 1. **Batch embed documents** - 85% cost reduction vs individual embedding calls
705
- 2. **Implement embedding caching** - 70%+ cost savings on repeated content
706
- 3. **Use 8-bit quantization** - 75% storage cost reduction with minimal quality loss
707
- 4. **Set up tiered storage** - 50%+ savings by moving old vectors to cold tier
708
- 5. **Enable hybrid search** - Combine vector + keyword for better accuracy
709
- 6. **Add retrieval monitoring** - Track precision/recall to catch quality issues
710
- 7. **Implement RBAC filtering** - Secure vector search with permission controls
711
- 8. **Use managed identity** - Eliminate API key management for Azure OpenAI
1
+ # Skill 5: Vector Embeddings & Search
2
+
3
+ ## 🎯 Overview
4
+ Master vector embeddings, semantic search, and similarity matching for building intelligent search systems, recommendations, and clustering applications at scale.
5
+
6
+ ## 🔗 Connections
7
+ - **Data Engineer**: Embedding data pipelines, vector storage optimization (de-01, de-03)
8
+ - **Security Architect**: Secure embedding APIs, access control for vector indices (sa-02, sa-06)
9
+ - **ML Engineer**: Embedding model fine-tuning and deployment (ml-03, ml-04)
10
+ - **MLOps**: Embedding model versioning, drift detection (mo-01, mo-05)
11
+ - **FinOps**: Embedding cost optimization, vector DB cost management (fo-01, fo-07)
12
+ - **DevOps**: Vector DB deployment, index update automation (do-01, do-03)
13
+ - **Data Scientist**: Dimensionality reduction, clustering analysis (ds-01, ds-08)
14
+
15
+ ## 🛠️ Tools Included
16
+
17
+ ### 1. `embedding_generator.py`
18
+ Multi-provider embedding generation (OpenAI, Cohere, Azure OpenAI, sentence-transformers).
19
+
20
+ ### 2. `vector_index_manager.py`
21
+ Unified interface for vector databases (Pinecone, Weaviate, Qdrant, Chroma, FAISS).
22
+
23
+ ### 3. `semantic_search.py`
24
+ Advanced semantic search with hybrid retrieval, re-ranking, and filtering.
25
+
26
+ ### 4. `embedding_evaluator.py`
27
+ Evaluate embedding quality with retrieval metrics, clustering scores, and similarity distributions.
28
+
29
+ ### 5. `vector_compression.py`
30
+ Dimension reduction and quantization for cost-efficient vector storage.
31
+
32
+ ## 📊 Key Metrics
33
+ - Retrieval precision@k and recall@k
34
+ - Mean Reciprocal Rank (MRR)
35
+ - Embedding generation latency
36
+ - Vector DB query latency (p50, p95)
37
+ - Storage cost per million vectors
38
+
39
+ ## 🚀 Quick Start
40
+
41
+ ```python
42
+ from embedding_generator import EmbeddingGenerator
43
+ from vector_index_manager import VectorIndexManager
44
+
45
+ # Initialize embedding generator
46
+ embedder = EmbeddingGenerator(
47
+ provider="azure_openai",
48
+ model="text-embedding-3-large",
49
+ dimensions=1536
50
+ )
51
+
52
+ # Initialize vector index
53
+ vector_db = VectorIndexManager(
54
+ provider="pinecone",
55
+ index_name="product-search",
56
+ dimension=1536,
57
+ metric="cosine"
58
+ )
59
+
60
+ # Index documents
61
+ documents = [
62
+ {"id": "1", "text": "High-performance laptop with 32GB RAM"},
63
+ {"id": "2", "text": "Lightweight tablet with long battery life"},
64
+ {"id": "3", "text": "Professional gaming desktop with RTX 4090"}
65
+ ]
66
+
67
+ for doc in documents:
68
+ embedding = embedder.embed(doc["text"])
69
+ vector_db.upsert(
70
+ id=doc["id"],
71
+ vector=embedding,
72
+ metadata={"text": doc["text"]}
73
+ )
74
+
75
+ # Semantic search
76
+ query = "portable computer for work"
77
+ query_embedding = embedder.embed(query)
78
+
79
+ results = vector_db.search(
80
+ query_vector=query_embedding,
81
+ top_k=3,
82
+ filter={"category": "electronics"}
83
+ )
84
+
85
+ for result in results:
86
+ print(f"Score: {result.score:.3f} - {result.metadata['text']}")
87
+ ```
88
+
89
+ ## 📚 Best Practices
90
+
91
+ ### Cost Optimization (FinOps Integration)
92
+
93
+ 1. **Optimize Embedding Generation Costs**
94
+ - Batch embed documents (up to 2048 per request)
95
+ - Cache embeddings for reused content
96
+ - Choose cost-effective models (ada vs large)
97
+ - Monitor embedding costs per collection
98
+ - Reference: FinOps fo-07 (AI/ML Cost Optimization)
99
+
100
+ 2. **Vector Database Cost Management**
101
+ - Use quantization to reduce storage by 75%
102
+ - Implement tiered storage (hot/warm/cold)
103
+ - Right-size index replicas
104
+ - Monitor query costs and optimize indices
105
+ - Reference: FinOps fo-05 (Storage Optimization)
106
+
107
+ 3. **Dimension Reduction Strategies**
108
+ - Reduce dimensions with PCA/UMAP
109
+ - Use matryoshka embeddings for flexible dimensions
110
+ - Test quality vs cost tradeoffs
111
+ - Monitor retrieval quality after reduction
112
+ - Reference: FinOps fo-01 (Cost Monitoring)
113
+
114
+ 4. **Smart Caching Strategy**
115
+ - Cache frequent query embeddings
116
+ - Implement approximate caching with similarity threshold
117
+ - Cache re-ranking results
118
+ - Monitor cache hit rates
119
+ - Reference: ai-01 (Prompt Caching)
120
+
121
+ ### Security & Privacy (Security Architect Integration)
122
+
123
+ 5. **Secure Embedding APIs**
124
+ - Use managed identity for Azure OpenAI
125
+ - Rotate API keys regularly
126
+ - Implement rate limiting
127
+ - Monitor for API abuse
128
+ - Reference: Security Architect sa-02 (IAM)
129
+
130
+ 6. **Access Control for Vector Indices**
131
+ - Implement RBAC for vector databases
132
+ - Filter search results by user permissions
133
+ - Encrypt vectors at rest and in transit
134
+ - Audit vector access patterns
135
+ - Reference: Security Architect sa-06 (Data Governance)
136
+
137
+ 7. **Prevent Embedding Inversion Attacks**
138
+ - Don't expose raw embeddings to users
139
+ - Monitor for adversarial queries
140
+ - Implement query result limits
141
+ - Add noise to embeddings if needed
142
+ - Reference: Security Architect sa-08 (LLM Security)
143
+
144
+ ### Data Quality & Governance (Data Engineer Integration)
145
+
146
+ 8. **Document Preprocessing Quality**
147
+ - Clean and normalize text before embedding
148
+ - Handle multiple languages consistently
149
+ - Remove low-quality or duplicate documents
150
+ - Track document freshness
151
+ - Reference: Data Engineer de-03 (Data Quality)
152
+
153
+ 9. **Embedding Data Pipeline**
154
+ - Automate document ingestion and embedding
155
+ - Implement incremental embedding updates
156
+ - Version embedding datasets
157
+ - Monitor pipeline health
158
+ - Reference: Data Engineer de-01 (Data Ingestion), de-02 (ETL)
159
+
160
+ ### Model Lifecycle Management (MLOps Integration)
161
+
162
+ 10. **Embedding Model Versioning**
163
+ - Track embedding model versions in registry
164
+ - Version vector indices by embedding model
165
+ - A/B test embedding model changes
166
+ - Maintain backward compatibility
167
+ - Reference: MLOps mo-01 (Model Registry), mo-03 (Versioning)
168
+
169
+ 11. **Retrieval Quality Monitoring**
170
+ - Track precision, recall, MRR over time
171
+ - Monitor query latency and success rates
172
+ - Set up alerts for degradation
173
+ - Continuous evaluation with test queries
174
+ - Reference: MLOps mo-04 (Monitoring)
175
+
176
+ 12. **Embedding Drift Detection**
177
+ - Monitor embedding distribution shifts
178
+ - Detect query pattern changes
179
+ - Alert on retrieval quality degradation
180
+ - Trigger retraining when needed
181
+ - Reference: MLOps mo-05 (Drift Detection)
182
+
183
+ ### Deployment & Operations (DevOps Integration)
184
+
185
+ 13. **Containerize Vector Services**
186
+ - Package embedding services in containers
187
+ - Use Docker for local development
188
+ - Deploy to AKS for production
189
+ - Implement auto-scaling based on load
190
+ - Reference: DevOps do-03 (Containerization)
191
+
192
+ 14. **CI/CD for Vector Indices**
193
+ - Automate index creation and updates
194
+ - Implement blue-green deployments
195
+ - Test index quality before promotion
196
+ - Rollback capability for bad updates
197
+ - Reference: DevOps do-01 (CI/CD)
198
+
199
+ 15. **Observability for Vector Search**
200
+ - Instrument with OpenTelemetry
201
+ - Track end-to-end search latency
202
+ - Monitor vector DB performance
203
+ - Set up Application Insights dashboards
204
+ - Reference: DevOps do-08 (Monitoring & Observability)
205
+
206
+ ### Azure-Specific Best Practices
207
+
208
+ 16. **Azure AI Search Integration**
209
+ - Use built-in vectorization in Azure AI Search
210
+ - Leverage semantic ranking
211
+ - Implement hybrid search (vector + keyword)
212
+ - Use managed identity for security
213
+ - Reference: Azure az-05 (Azure OpenAI), az-04 (AI/ML Services)
214
+
215
+ 17. **Azure OpenAI Embeddings**
216
+ - Use Azure OpenAI for embeddings
217
+ - Enable diagnostic logging
218
+ - Implement retry logic with exponential backoff
219
+ - Use provisioned throughput for predictable costs
220
+ - Reference: Azure az-05 (Azure OpenAI)
221
+
222
+ ## 💰 Cost Optimization Examples
223
+
224
+ ### Batch Embedding with Cost Tracking
225
+ ```python
226
+ from embedding_generator import EmbeddingGenerator
227
+ from cost_tracker import EmbeddingCostTracker
228
+
229
+ embedder = EmbeddingGenerator(
230
+ provider="azure_openai",
231
+ model="text-embedding-3-large"
232
+ )
233
+
234
+ cost_tracker = EmbeddingCostTracker()
235
+
236
+ def embed_documents_efficiently(documents: List[str], batch_size: int = 100):
237
+ """Batch embed documents with cost tracking."""
238
+ all_embeddings = []
239
+
240
+ for i in range(0, len(documents), batch_size):
241
+ batch = documents[i:i + batch_size]
242
+
243
+ # Batch embedding (more cost-effective)
244
+ embeddings = embedder.embed_batch(batch)
245
+ all_embeddings.extend(embeddings)
246
+
247
+ # Track costs
248
+ num_tokens = sum(len(doc.split()) * 1.3 for doc in batch) # Estimate
249
+ cost_tracker.log_embedding_request(
250
+ model="text-embedding-3-large",
251
+ num_tokens=num_tokens,
252
+ num_texts=len(batch)
253
+ )
254
+
255
+ # Report costs
256
+ report = cost_tracker.get_report()
257
+ print(f"📊 Embedding Cost Report:")
258
+ print(f" Documents embedded: {len(documents)}")
259
+ print(f" Total cost: ${report.total_cost:.4f}")
260
+ print(f" Cost per document: ${report.cost_per_doc:.6f}")
261
+
262
+ return all_embeddings
263
+
264
+ # Compare costs
265
+ # Single requests: 1000 docs × $0.00013 = $0.130
266
+ # Batch requests: 10 batches × $0.00011 = $0.011 (85% savings)
267
+ ```
268
+
269
+ ### Vector Compression for Storage Savings
270
+ ```python
271
+ from vector_compression import VectorCompressor
272
+ import numpy as np
273
+
274
+ class CompressedVectorIndex:
275
+ def __init__(self, index_manager, compression_type="quantization"):
276
+ self.index = index_manager
277
+ self.compressor = VectorCompressor(
278
+ method=compression_type,
279
+ bits=8 # 8-bit quantization (75% storage reduction)
280
+ )
281
+
282
+ def upsert(self, id: str, vector: np.ndarray, metadata: dict):
283
+ """Upsert compressed vector."""
284
+ # Compress vector (1536 dims × 4 bytes → 1536 dims × 1 byte)
285
+ compressed = self.compressor.compress(vector)
286
+
287
+ # Store compressed vector
288
+ self.index.upsert(
289
+ id=id,
290
+ vector=compressed,
291
+ metadata=metadata
292
+ )
293
+
294
+ def search(self, query_vector: np.ndarray, top_k: int = 10):
295
+ """Search with query vector."""
296
+ # Compress query
297
+ compressed_query = self.compressor.compress(query_vector)
298
+
299
+ # Search
300
+ results = self.index.search(
301
+ query_vector=compressed_query,
302
+ top_k=top_k
303
+ )
304
+
305
+ return results
306
+
307
+ # Storage cost comparison:
308
+ # Uncompressed: 1M vectors × 1536 dims × 4 bytes = 6.144 GB → $15/month
309
+ # Quantized (8-bit): 1M vectors × 1536 dims × 1 byte = 1.536 GB → $4/month
310
+ # Savings: 75% reduction
311
+ ```
312
+
313
+ ### Tiered Vector Storage
314
+ ```python
315
+ from vector_index_manager import VectorIndexManager
316
+
317
+ class TieredVectorStorage:
318
+ def __init__(self):
319
+ # Hot tier: Recent, frequently accessed (high performance)
320
+ self.hot_index = VectorIndexManager(
321
+ provider="pinecone",
322
+ index_type="performance",
323
+ replicas=3
324
+ )
325
+
326
+ # Warm tier: Older, occasionally accessed (balanced)
327
+ self.warm_index = VectorIndexManager(
328
+ provider="pinecone",
329
+ index_type="balanced",
330
+ replicas=2
331
+ )
332
+
333
+ # Cold tier: Archive, rarely accessed (cost-optimized)
334
+ self.cold_index = VectorIndexManager(
335
+ provider="qdrant", # Cheaper option
336
+ index_type="storage_optimized",
337
+ quantization=True
338
+ )
339
+
340
+ def auto_tier_vectors(self, age_days: int, access_count: int):
341
+ """Automatically move vectors to appropriate tier."""
342
+ if age_days <= 30 or access_count > 100:
343
+ return self.hot_index # $0.096/hour
344
+
345
+ elif age_days <= 90 or access_count > 10:
346
+ return self.warm_index # $0.048/hour
347
+
348
+ else:
349
+ return self.cold_index # $0.012/hour
350
+
351
+ def search_all_tiers(self, query_vector: np.ndarray, top_k: int = 10):
352
+ """Search across all tiers with priority."""
353
+ # Search hot tier first
354
+ hot_results = self.hot_index.search(query_vector, top_k)
355
+
356
+ if len(hot_results) >= top_k:
357
+ return hot_results
358
+
359
+ # Search warm tier if needed
360
+ warm_results = self.warm_index.search(query_vector, top_k)
361
+
362
+ # Combine and re-rank
363
+ all_results = self._merge_results(hot_results, warm_results)
364
+
365
+ return all_results[:top_k]
366
+
367
+ # Cost comparison:
368
+ # All hot: 1M vectors → $70/month
369
+ # Tiered (70% warm, 20% cold): $30/month (57% savings)
370
+ ```
371
+
372
+ ### Embedding Cache Implementation
373
+ ```python
374
+ from functools import lru_cache
375
+ import hashlib
376
+ from semantic_cache import SemanticCache
377
+
378
+ class CachedEmbeddingGenerator:
379
+ def __init__(self, provider: str, model: str):
380
+ self.embedder = EmbeddingGenerator(provider, model)
381
+ self.exact_cache = {} # Exact match cache
382
+ self.semantic_cache = SemanticCache(
383
+ similarity_threshold=0.99,
384
+ max_size=10000
385
+ )
386
+
387
+ def embed(self, text: str) -> np.ndarray:
388
+ """Generate embedding with caching."""
389
+ # Check exact cache
390
+ text_hash = hashlib.md5(text.encode()).hexdigest()
391
+ if text_hash in self.exact_cache:
392
+ print("✅ Exact cache hit")
393
+ return self.exact_cache[text_hash]
394
+
395
+ # Check semantic cache for similar text
396
+ cached_embedding = self.semantic_cache.get(text)
397
+ if cached_embedding is not None:
398
+ print("✅ Semantic cache hit")
399
+ return cached_embedding
400
+
401
+ # Generate new embedding
402
+ embedding = self.embedder.embed(text)
403
+
404
+ # Cache the result
405
+ self.exact_cache[text_hash] = embedding
406
+ self.semantic_cache.set(text, embedding)
407
+
408
+ return embedding
409
+
410
+ def get_cache_stats(self):
411
+ """Get cache performance statistics."""
412
+ return {
413
+ "exact_cache_size": len(self.exact_cache),
414
+ "semantic_cache_size": self.semantic_cache.size(),
415
+ "cache_hit_rate": self.semantic_cache.hit_rate(),
416
+ "cost_saved": self.semantic_cache.cost_saved()
417
+ }
418
+
419
+ # Usage
420
+ embedder = CachedEmbeddingGenerator("azure_openai", "text-embedding-3-large")
421
+
422
+ # First call: Cache miss, generates embedding
423
+ emb1 = embedder.embed("machine learning tutorial")
424
+
425
+ # Second call: Exact cache hit, no cost
426
+ emb2 = embedder.embed("machine learning tutorial")
427
+
428
+ # Similar call: Semantic cache hit, no cost
429
+ emb3 = embedder.embed("tutorial on machine learning")
430
+
431
+ # Report
432
+ stats = embedder.get_cache_stats()
433
+ print(f"Cache hit rate: {stats['cache_hit_rate']:.2%}")
434
+ print(f"Cost saved: ${stats['cost_saved']:.4f}")
435
+ ```
436
+
437
+ ## 🔒 Security Best Practices Examples
438
+
439
+ ### Secure Embedding API Access
440
+ ```python
441
+ from azure.identity import DefaultAzureCredential
442
+ from embedding_generator import EmbeddingGenerator
443
+
444
+ class SecureEmbeddingGenerator:
445
+ def __init__(self):
446
+ # Use managed identity (no API keys in code)
447
+ self.credential = DefaultAzureCredential()
448
+
449
+ self.embedder = EmbeddingGenerator(
450
+ provider="azure_openai",
451
+ credential=self.credential,
452
+ endpoint="https://my-openai.openai.azure.com/",
453
+ api_version="2024-02-01"
454
+ )
455
+
456
+ # Rate limiting
457
+ self.rate_limiter = RateLimiter(
458
+ requests_per_minute=60,
459
+ requests_per_day=10000
460
+ )
461
+
462
+ def embed_with_security(self, text: str, user_id: str):
463
+ """Generate embedding with security controls."""
464
+ # Rate limiting per user
465
+ if not self.rate_limiter.check(user_id):
466
+ raise RateLimitError(f"Rate limit exceeded for user {user_id}")
467
+
468
+ # Input validation
469
+ if len(text) > 8191: # Max tokens for text-embedding-3-large
470
+ raise ValueError("Text exceeds maximum length")
471
+
472
+ # Generate embedding
473
+ embedding = self.embedder.embed(text)
474
+
475
+ # Audit logging
476
+ self.audit_log.record({
477
+ "timestamp": datetime.now(),
478
+ "user_id": user_id,
479
+ "text_length": len(text),
480
+ "model": "text-embedding-3-large",
481
+ "success": True
482
+ })
483
+
484
+ return embedding
485
+
486
+ # Usage with managed identity
487
+ embedder = SecureEmbeddingGenerator()
488
+ embedding = embedder.embed_with_security(
489
+ text="sensitive document content",
490
+ user_id="user_123"
491
+ )
492
+ ```
493
+
494
+ ### Access-Controlled Vector Search
495
+ ```python
496
+ from vector_index_manager import VectorIndexManager
497
+
498
+ class SecureVectorSearch:
499
+ def __init__(self, index_manager):
500
+ self.index = index_manager
501
+
502
+ def search_with_rbac(
503
+ self,
504
+ query_vector: np.ndarray,
505
+ user_permissions: List[str],
506
+ top_k: int = 10
507
+ ):
508
+ """Search with role-based access control."""
509
+ # Retrieve more results than needed for filtering
510
+ raw_results = self.index.search(
511
+ query_vector=query_vector,
512
+ top_k=top_k * 3 # Over-fetch for filtering
513
+ )
514
+
515
+ # Filter based on user permissions
516
+ filtered_results = []
517
+ for result in raw_results:
518
+ # Check if user has access to this document
519
+ required_permission = result.metadata.get("required_permission")
520
+
521
+ if required_permission in user_permissions or "admin" in user_permissions:
522
+ filtered_results.append(result)
523
+
524
+ if len(filtered_results) >= top_k:
525
+ break
526
+
527
+ # Audit log
528
+ self.audit_log.record({
529
+ "timestamp": datetime.now(),
530
+ "user_permissions": user_permissions,
531
+ "results_returned": len(filtered_results),
532
+ "results_filtered": len(raw_results) - len(filtered_results)
533
+ })
534
+
535
+ return filtered_results[:top_k]
536
+
537
+ # Usage
538
+ search = SecureVectorSearch(vector_db)
539
+
540
+ results = search.search_with_rbac(
541
+ query_vector=query_embedding,
542
+ user_permissions=["read:public", "read:internal"],
543
+ top_k=10
544
+ )
545
+ ```
546
+
547
+ ## 📊 Enhanced Metrics & Monitoring
548
+
549
+ | Metric Category | Metric | Target | Tool |
550
+ |-----------------|--------|--------|------|
551
+ | **Retrieval Quality** | Precision@10 | >0.85 | Custom evaluator |
552
+ | | Recall@10 | >0.90 | Custom evaluator |
553
+ | | Mean Reciprocal Rank (MRR) | >0.80 | MLflow |
554
+ | | NDCG@10 | >0.85 | Custom evaluator |
555
+ | **Performance** | Embedding generation (p95) | <100ms | Azure Monitor |
556
+ | | Vector search latency (p95) | <50ms | App Insights |
557
+ | | Index update latency | <200ms | Custom monitor |
558
+ | **Costs** | Embedding cost per 1K docs | <$0.13 | Cost tracker |
559
+ | | Storage cost per 1M vectors | <$5/month | FinOps dashboard |
560
+ | | Query cost per 1K searches | <$0.50 | Cost analyzer |
561
+ | | Cache hit rate | >70% | Redis metrics |
562
+ | **Quality** | Embedding distribution stability | >0.95 | MLflow |
563
+ | | Duplicate detection rate | >0.98 | Custom monitor |
564
+ | | Null embedding rate | 0% | Data quality |
565
+ | **Security** | API rate limit violations | 0 | Azure Monitor |
566
+ | | Unauthorized access attempts | 0 | Security logs |
567
+
568
+ ## 🚀 Deployment Pipeline
569
+
570
+ ### CI/CD for Vector Search System
571
+ ```yaml
572
+ # .github/workflows/vector-search-deployment.yml
573
+ name: Vector Search Deployment
574
+
575
+ on:
576
+ push:
577
+ paths:
578
+ - 'embeddings/**'
579
+ - 'vector_db/**'
580
+ branches:
581
+ - main
582
+
583
+ jobs:
584
+ test-embeddings:
585
+ runs-on: ubuntu-latest
586
+ steps:
587
+ - name: Unit test embedding generation
588
+ run: pytest tests/test_embeddings.py -v
589
+
590
+ - name: Test vector operations
591
+ run: pytest tests/test_vector_ops.py -v
592
+
593
+ - name: Benchmark embedding quality
594
+ run: python scripts/benchmark_embeddings.py
595
+
596
+ - name: Test retrieval quality
597
+ run: pytest tests/test_retrieval_quality.py --min-precision 0.85
598
+
599
+ validate-vector-db:
600
+ runs-on: ubuntu-latest
601
+ steps:
602
+ - name: Test vector DB connections
603
+ run: pytest tests/test_vector_db.py
604
+
605
+ - name: Validate index schema
606
+ run: python scripts/validate_index_schema.py
607
+
608
+ - name: Test search performance
609
+ run: python scripts/benchmark_search.py --max-latency-ms 50
610
+
611
+ deploy-to-staging:
612
+ needs: [test-embeddings, validate-vector-db]
613
+ runs-on: ubuntu-latest
614
+ steps:
615
+ - name: Build embedding service
616
+ run: docker build -t embedding-service:${{ github.sha }} .
617
+
618
+ - name: Push to registry
619
+ run: |
620
+ az acr login --name myregistry
621
+ docker push myregistry.azurecr.io/embedding-service:${{ github.sha }}
622
+
623
+ - name: Deploy to AKS staging
624
+ run: |
625
+ kubectl set image deployment/embedding-service \
626
+ embedding-service=myregistry.azurecr.io/embedding-service:${{ github.sha }} \
627
+ --namespace staging
628
+
629
+ - name: Create staging vector index
630
+ run: python scripts/create_index.py --environment staging
631
+
632
+ - name: Run integration tests
633
+ run: pytest tests/integration/ --environment staging
634
+
635
+ deploy-to-production:
636
+ needs: deploy-to-staging
637
+ runs-on: ubuntu-latest
638
+ environment: production
639
+ steps:
640
+ - name: Blue-green index swap
641
+ run: python scripts/blue_green_index_swap.py
642
+
643
+ - name: Deploy embedding service
644
+ run: |
645
+ kubectl set image deployment/embedding-service \
646
+ embedding-service=myregistry.azurecr.io/embedding-service:${{ github.sha }} \
647
+ --namespace production
648
+
649
+ - name: Monitor search quality
650
+ run: python scripts/monitor_search_quality.py --duration 1h
651
+
652
+ - name: Rollback if quality degrades
653
+ if: failure()
654
+ run: python scripts/rollback_index.py
655
+ ```
656
+
657
+ ## 🔄 Integration Workflow
658
+
659
+ ### End-to-End Vector Search Pipeline with All Roles
660
+ ```
661
+ 1. Document Ingestion (de-01)
662
+
663
+ 2. Data Quality Checks (de-03)
664
+
665
+ 3. Text Preprocessing & Cleaning
666
+
667
+ 4. Check Embedding Cache (ai-05)
668
+
669
+ 5. Batch Embedding Generation (ai-05)
670
+
671
+ 6. Embedding Cost Tracking (fo-07)
672
+
673
+ 7. Vector Compression (optional) (fo-05)
674
+
675
+ 8. Vector Index Upsert (ai-05)
676
+
677
+ 9. Access Control Metadata (sa-02)
678
+
679
+ 10. Index Health Check (mo-04)
680
+
681
+ 11. Query Received
682
+
683
+ 12. User Permission Validation (sa-02)
684
+
685
+ 13. Query Embedding Generation (ai-05)
686
+
687
+ 14. Semantic Search Execution (ai-05)
688
+
689
+ 15. RBAC Result Filtering (sa-06)
690
+
691
+ 16. Re-ranking (optional) (ai-02)
692
+
693
+ 17. Results Caching (ai-01)
694
+
695
+ 18. Search Quality Metrics (mo-04)
696
+
697
+ 19. Cost Attribution (fo-01)
698
+
699
+ 20. Embedding Drift Detection (mo-05)
700
+ ```
701
+
702
+ ## 🎯 Quick Wins
703
+
704
+ 1. **Batch embed documents** - 85% cost reduction vs individual embedding calls
705
+ 2. **Implement embedding caching** - 70%+ cost savings on repeated content
706
+ 3. **Use 8-bit quantization** - 75% storage cost reduction with minimal quality loss
707
+ 4. **Set up tiered storage** - 50%+ savings by moving old vectors to cold tier
708
+ 5. **Enable hybrid search** - Combine vector + keyword for better accuracy
709
+ 6. **Add retrieval monitoring** - Track precision/recall to catch quality issues
710
+ 7. **Implement RBAC filtering** - Secure vector search with permission controls
711
+ 8. **Use managed identity** - Eliminate API key management for Azure OpenAI