tech-hub-skills 1.2.0 → 1.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/{LICENSE → .claude/LICENSE} +21 -21
  2. package/.claude/README.md +291 -0
  3. package/.claude/bin/cli.js +266 -0
  4. package/{bin → .claude/bin}/copilot.js +182 -182
  5. package/{bin → .claude/bin}/postinstall.js +42 -42
  6. package/{tech_hub_skills/skills → .claude/commands}/README.md +336 -336
  7. package/{tech_hub_skills/skills → .claude/commands}/ai-engineer.md +104 -104
  8. package/{tech_hub_skills/skills → .claude/commands}/aws.md +143 -143
  9. package/{tech_hub_skills/skills → .claude/commands}/azure.md +149 -149
  10. package/{tech_hub_skills/skills → .claude/commands}/backend-developer.md +108 -108
  11. package/{tech_hub_skills/skills → .claude/commands}/code-review.md +399 -399
  12. package/{tech_hub_skills/skills → .claude/commands}/compliance-automation.md +747 -747
  13. package/{tech_hub_skills/skills → .claude/commands}/compliance-officer.md +108 -108
  14. package/{tech_hub_skills/skills → .claude/commands}/data-engineer.md +113 -113
  15. package/{tech_hub_skills/skills → .claude/commands}/data-governance.md +102 -102
  16. package/{tech_hub_skills/skills → .claude/commands}/data-scientist.md +123 -123
  17. package/{tech_hub_skills/skills → .claude/commands}/database-admin.md +109 -109
  18. package/{tech_hub_skills/skills → .claude/commands}/devops.md +160 -160
  19. package/{tech_hub_skills/skills → .claude/commands}/docker.md +160 -160
  20. package/{tech_hub_skills/skills → .claude/commands}/enterprise-dashboard.md +613 -613
  21. package/{tech_hub_skills/skills → .claude/commands}/finops.md +184 -184
  22. package/{tech_hub_skills/skills → .claude/commands}/frontend-developer.md +108 -108
  23. package/{tech_hub_skills/skills → .claude/commands}/gcp.md +143 -143
  24. package/{tech_hub_skills/skills → .claude/commands}/ml-engineer.md +115 -115
  25. package/{tech_hub_skills/skills → .claude/commands}/mlops.md +187 -187
  26. package/{tech_hub_skills/skills → .claude/commands}/network-engineer.md +109 -109
  27. package/{tech_hub_skills/skills → .claude/commands}/optimization-advisor.md +329 -329
  28. package/{tech_hub_skills/skills → .claude/commands}/orchestrator.md +623 -623
  29. package/{tech_hub_skills/skills → .claude/commands}/platform-engineer.md +102 -102
  30. package/{tech_hub_skills/skills → .claude/commands}/process-automation.md +226 -226
  31. package/{tech_hub_skills/skills → .claude/commands}/process-changelog.md +184 -184
  32. package/{tech_hub_skills/skills → .claude/commands}/process-documentation.md +484 -484
  33. package/{tech_hub_skills/skills → .claude/commands}/process-kanban.md +324 -324
  34. package/{tech_hub_skills/skills → .claude/commands}/process-versioning.md +214 -214
  35. package/{tech_hub_skills/skills → .claude/commands}/product-designer.md +104 -104
  36. package/{tech_hub_skills/skills → .claude/commands}/project-starter.md +443 -443
  37. package/{tech_hub_skills/skills → .claude/commands}/qa-engineer.md +109 -109
  38. package/{tech_hub_skills/skills → .claude/commands}/security-architect.md +135 -135
  39. package/{tech_hub_skills/skills → .claude/commands}/sre.md +109 -109
  40. package/{tech_hub_skills/skills → .claude/commands}/system-design.md +126 -126
  41. package/{tech_hub_skills/skills → .claude/commands}/technical-writer.md +101 -101
  42. package/.claude/package.json +46 -0
  43. package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/01-prompt-engineering/README.md +252 -252
  44. package/.claude/roles/ai-engineer/skills/01-prompt-engineering/prompt_ab_tester.py +356 -0
  45. package/.claude/roles/ai-engineer/skills/01-prompt-engineering/prompt_template_manager.py +274 -0
  46. package/.claude/roles/ai-engineer/skills/01-prompt-engineering/token_cost_estimator.py +324 -0
  47. package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/02-rag-pipeline/README.md +448 -448
  48. package/.claude/roles/ai-engineer/skills/02-rag-pipeline/document_chunker.py +336 -0
  49. package/.claude/roles/ai-engineer/skills/02-rag-pipeline/rag_pipeline.sql +213 -0
  50. package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/03-agent-orchestration/README.md +599 -599
  51. package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/04-llm-guardrails/README.md +735 -735
  52. package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/05-vector-embeddings/README.md +711 -711
  53. package/{tech_hub_skills → .claude}/roles/ai-engineer/skills/06-llm-evaluation/README.md +777 -777
  54. package/{tech_hub_skills → .claude}/roles/azure/skills/01-infrastructure-fundamentals/README.md +264 -264
  55. package/{tech_hub_skills → .claude}/roles/azure/skills/02-data-factory/README.md +264 -264
  56. package/{tech_hub_skills → .claude}/roles/azure/skills/03-synapse-analytics/README.md +264 -264
  57. package/{tech_hub_skills → .claude}/roles/azure/skills/04-databricks/README.md +264 -264
  58. package/{tech_hub_skills → .claude}/roles/azure/skills/05-functions/README.md +264 -264
  59. package/{tech_hub_skills → .claude}/roles/azure/skills/06-kubernetes-service/README.md +264 -264
  60. package/{tech_hub_skills → .claude}/roles/azure/skills/07-openai-service/README.md +264 -264
  61. package/{tech_hub_skills → .claude}/roles/azure/skills/08-machine-learning/README.md +264 -264
  62. package/{tech_hub_skills → .claude}/roles/azure/skills/09-storage-adls/README.md +264 -264
  63. package/{tech_hub_skills → .claude}/roles/azure/skills/10-networking/README.md +264 -264
  64. package/{tech_hub_skills → .claude}/roles/azure/skills/11-sql-cosmos/README.md +264 -264
  65. package/{tech_hub_skills → .claude}/roles/azure/skills/12-event-hubs/README.md +264 -264
  66. package/{tech_hub_skills → .claude}/roles/code-review/skills/01-automated-code-review/README.md +394 -394
  67. package/{tech_hub_skills → .claude}/roles/code-review/skills/02-pr-review-workflow/README.md +427 -427
  68. package/{tech_hub_skills → .claude}/roles/code-review/skills/03-code-quality-gates/README.md +518 -518
  69. package/{tech_hub_skills → .claude}/roles/code-review/skills/04-reviewer-assignment/README.md +504 -504
  70. package/{tech_hub_skills → .claude}/roles/code-review/skills/05-review-analytics/README.md +540 -540
  71. package/{tech_hub_skills → .claude}/roles/data-engineer/skills/01-lakehouse-architecture/README.md +550 -550
  72. package/.claude/roles/data-engineer/skills/01-lakehouse-architecture/bronze_ingestion.py +337 -0
  73. package/.claude/roles/data-engineer/skills/01-lakehouse-architecture/medallion_queries.sql +300 -0
  74. package/{tech_hub_skills → .claude}/roles/data-engineer/skills/02-etl-pipeline/README.md +580 -580
  75. package/{tech_hub_skills → .claude}/roles/data-engineer/skills/03-data-quality/README.md +579 -579
  76. package/{tech_hub_skills → .claude}/roles/data-engineer/skills/04-streaming-pipelines/README.md +608 -608
  77. package/{tech_hub_skills → .claude}/roles/data-engineer/skills/05-performance-optimization/README.md +547 -547
  78. package/{tech_hub_skills → .claude}/roles/data-governance/skills/01-data-catalog/README.md +112 -112
  79. package/{tech_hub_skills → .claude}/roles/data-governance/skills/02-data-lineage/README.md +129 -129
  80. package/{tech_hub_skills → .claude}/roles/data-governance/skills/03-data-quality-framework/README.md +182 -182
  81. package/{tech_hub_skills → .claude}/roles/data-governance/skills/04-access-control/README.md +39 -39
  82. package/{tech_hub_skills → .claude}/roles/data-governance/skills/05-master-data-management/README.md +40 -40
  83. package/{tech_hub_skills → .claude}/roles/data-governance/skills/06-compliance-privacy/README.md +46 -46
  84. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/01-eda-automation/README.md +230 -230
  85. package/.claude/roles/data-scientist/skills/01-eda-automation/eda_generator.py +446 -0
  86. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/02-statistical-modeling/README.md +264 -264
  87. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/03-feature-engineering/README.md +264 -264
  88. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/04-predictive-modeling/README.md +264 -264
  89. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/05-customer-analytics/README.md +264 -264
  90. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/06-campaign-analysis/README.md +264 -264
  91. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/07-experimentation/README.md +264 -264
  92. package/{tech_hub_skills → .claude}/roles/data-scientist/skills/08-data-visualization/README.md +264 -264
  93. package/{tech_hub_skills → .claude}/roles/devops/skills/01-cicd-pipeline/README.md +264 -264
  94. package/{tech_hub_skills → .claude}/roles/devops/skills/02-container-orchestration/README.md +264 -264
  95. package/{tech_hub_skills → .claude}/roles/devops/skills/03-infrastructure-as-code/README.md +264 -264
  96. package/{tech_hub_skills → .claude}/roles/devops/skills/04-gitops/README.md +264 -264
  97. package/{tech_hub_skills → .claude}/roles/devops/skills/05-environment-management/README.md +264 -264
  98. package/{tech_hub_skills → .claude}/roles/devops/skills/06-automated-testing/README.md +264 -264
  99. package/{tech_hub_skills → .claude}/roles/devops/skills/07-release-management/README.md +264 -264
  100. package/{tech_hub_skills → .claude}/roles/devops/skills/08-monitoring-alerting/README.md +264 -264
  101. package/{tech_hub_skills → .claude}/roles/devops/skills/09-devsecops/README.md +265 -265
  102. package/{tech_hub_skills → .claude}/roles/finops/skills/01-cost-visibility/README.md +264 -264
  103. package/{tech_hub_skills → .claude}/roles/finops/skills/02-resource-tagging/README.md +264 -264
  104. package/{tech_hub_skills → .claude}/roles/finops/skills/03-budget-management/README.md +264 -264
  105. package/{tech_hub_skills → .claude}/roles/finops/skills/04-reserved-instances/README.md +264 -264
  106. package/{tech_hub_skills → .claude}/roles/finops/skills/05-spot-optimization/README.md +264 -264
  107. package/{tech_hub_skills → .claude}/roles/finops/skills/06-storage-tiering/README.md +264 -264
  108. package/{tech_hub_skills → .claude}/roles/finops/skills/07-compute-rightsizing/README.md +264 -264
  109. package/{tech_hub_skills → .claude}/roles/finops/skills/08-chargeback/README.md +264 -264
  110. package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/01-mlops-pipeline/README.md +566 -566
  111. package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/02-feature-engineering/README.md +655 -655
  112. package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/03-model-training/README.md +704 -704
  113. package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/04-model-serving/README.md +845 -845
  114. package/{tech_hub_skills → .claude}/roles/ml-engineer/skills/05-model-monitoring/README.md +874 -874
  115. package/{tech_hub_skills → .claude}/roles/mlops/skills/01-ml-pipeline-orchestration/README.md +264 -264
  116. package/{tech_hub_skills → .claude}/roles/mlops/skills/02-experiment-tracking/README.md +264 -264
  117. package/{tech_hub_skills → .claude}/roles/mlops/skills/03-model-registry/README.md +264 -264
  118. package/{tech_hub_skills → .claude}/roles/mlops/skills/04-feature-store/README.md +264 -264
  119. package/{tech_hub_skills → .claude}/roles/mlops/skills/05-model-deployment/README.md +264 -264
  120. package/{tech_hub_skills → .claude}/roles/mlops/skills/06-model-observability/README.md +264 -264
  121. package/{tech_hub_skills → .claude}/roles/mlops/skills/07-data-versioning/README.md +264 -264
  122. package/{tech_hub_skills → .claude}/roles/mlops/skills/08-ab-testing/README.md +264 -264
  123. package/{tech_hub_skills → .claude}/roles/mlops/skills/09-automated-retraining/README.md +264 -264
  124. package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/01-internal-developer-platform/README.md +153 -153
  125. package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/02-self-service-infrastructure/README.md +57 -57
  126. package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/03-slo-sli-management/README.md +59 -59
  127. package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/04-developer-experience/README.md +57 -57
  128. package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/05-incident-management/README.md +73 -73
  129. package/{tech_hub_skills → .claude}/roles/platform-engineer/skills/06-capacity-management/README.md +59 -59
  130. package/{tech_hub_skills → .claude}/roles/product-designer/skills/01-requirements-discovery/README.md +407 -407
  131. package/{tech_hub_skills → .claude}/roles/product-designer/skills/02-user-research/README.md +382 -382
  132. package/{tech_hub_skills → .claude}/roles/product-designer/skills/03-brainstorming-ideation/README.md +437 -437
  133. package/{tech_hub_skills → .claude}/roles/product-designer/skills/04-ux-design/README.md +496 -496
  134. package/{tech_hub_skills → .claude}/roles/product-designer/skills/05-product-market-fit/README.md +376 -376
  135. package/{tech_hub_skills → .claude}/roles/product-designer/skills/06-stakeholder-management/README.md +412 -412
  136. package/{tech_hub_skills → .claude}/roles/security-architect/skills/01-pii-detection/README.md +319 -319
  137. package/{tech_hub_skills → .claude}/roles/security-architect/skills/02-threat-modeling/README.md +264 -264
  138. package/{tech_hub_skills → .claude}/roles/security-architect/skills/03-infrastructure-security/README.md +264 -264
  139. package/{tech_hub_skills → .claude}/roles/security-architect/skills/04-iam/README.md +264 -264
  140. package/{tech_hub_skills → .claude}/roles/security-architect/skills/05-application-security/README.md +264 -264
  141. package/{tech_hub_skills → .claude}/roles/security-architect/skills/06-secrets-management/README.md +264 -264
  142. package/{tech_hub_skills → .claude}/roles/security-architect/skills/07-security-monitoring/README.md +264 -264
  143. package/{tech_hub_skills → .claude}/roles/system-design/skills/01-architecture-patterns/README.md +337 -337
  144. package/{tech_hub_skills → .claude}/roles/system-design/skills/02-requirements-engineering/README.md +264 -264
  145. package/{tech_hub_skills → .claude}/roles/system-design/skills/03-scalability/README.md +264 -264
  146. package/{tech_hub_skills → .claude}/roles/system-design/skills/04-high-availability/README.md +264 -264
  147. package/{tech_hub_skills → .claude}/roles/system-design/skills/05-cost-optimization-design/README.md +264 -264
  148. package/{tech_hub_skills → .claude}/roles/system-design/skills/06-api-design/README.md +264 -264
  149. package/{tech_hub_skills → .claude}/roles/system-design/skills/07-observability-architecture/README.md +264 -264
  150. package/{tech_hub_skills → .claude}/roles/system-design/skills/08-process-automation/PROCESS_TEMPLATE.md +336 -336
  151. package/{tech_hub_skills → .claude}/roles/system-design/skills/08-process-automation/README.md +521 -521
  152. package/.claude/roles/system-design/skills/08-process-automation/ai_prompt_generator.py +744 -0
  153. package/.claude/roles/system-design/skills/08-process-automation/automation_recommender.py +688 -0
  154. package/.claude/roles/system-design/skills/08-process-automation/plan_generator.py +679 -0
  155. package/.claude/roles/system-design/skills/08-process-automation/process_analyzer.py +528 -0
  156. package/.claude/roles/system-design/skills/08-process-automation/process_parser.py +684 -0
  157. package/.claude/roles/system-design/skills/08-process-automation/role_matcher.py +615 -0
  158. package/.claude/skills/README.md +336 -0
  159. package/.claude/skills/ai-engineer.md +104 -0
  160. package/.claude/skills/aws.md +143 -0
  161. package/.claude/skills/azure.md +149 -0
  162. package/.claude/skills/backend-developer.md +108 -0
  163. package/.claude/skills/code-review.md +399 -0
  164. package/.claude/skills/compliance-automation.md +747 -0
  165. package/.claude/skills/compliance-officer.md +108 -0
  166. package/.claude/skills/data-engineer.md +113 -0
  167. package/.claude/skills/data-governance.md +102 -0
  168. package/.claude/skills/data-scientist.md +123 -0
  169. package/.claude/skills/database-admin.md +109 -0
  170. package/.claude/skills/devops.md +160 -0
  171. package/.claude/skills/docker.md +160 -0
  172. package/.claude/skills/enterprise-dashboard.md +613 -0
  173. package/.claude/skills/finops.md +184 -0
  174. package/.claude/skills/frontend-developer.md +108 -0
  175. package/.claude/skills/gcp.md +143 -0
  176. package/.claude/skills/ml-engineer.md +115 -0
  177. package/.claude/skills/mlops.md +187 -0
  178. package/.claude/skills/network-engineer.md +109 -0
  179. package/.claude/skills/optimization-advisor.md +329 -0
  180. package/.claude/skills/orchestrator.md +623 -0
  181. package/.claude/skills/platform-engineer.md +102 -0
  182. package/.claude/skills/process-automation.md +226 -0
  183. package/.claude/skills/process-changelog.md +184 -0
  184. package/.claude/skills/process-documentation.md +484 -0
  185. package/.claude/skills/process-kanban.md +324 -0
  186. package/.claude/skills/process-versioning.md +214 -0
  187. package/.claude/skills/product-designer.md +104 -0
  188. package/.claude/skills/project-starter.md +443 -0
  189. package/.claude/skills/qa-engineer.md +109 -0
  190. package/.claude/skills/security-architect.md +135 -0
  191. package/.claude/skills/sre.md +109 -0
  192. package/.claude/skills/system-design.md +126 -0
  193. package/.claude/skills/technical-writer.md +101 -0
  194. package/.gitattributes +2 -0
  195. package/GITHUB_COPILOT.md +106 -0
  196. package/README.md +192 -291
  197. package/package.json +16 -46
  198. package/bin/cli.js +0 -241
@@ -1,599 +1,599 @@
1
- # Skill 3: LLM Agent Orchestration
2
-
3
- ## 🎯 Overview
4
- Build advanced multi-agent systems with autonomous task delegation, tool execution, and intelligent workflow orchestration for complex AI applications.
5
-
6
- ## 🔗 Connections
7
- - **Data Engineer**: Agent state persistence, conversation history storage (de-01, de-03)
8
- - **Security Architect**: Tool execution sandboxing, agent permission management (sa-02, sa-08)
9
- - **ML Engineer**: Agent model selection and optimization (ml-03, ml-04)
10
- - **MLOps**: Agent performance tracking, multi-agent metrics (mo-01, mo-04)
11
- - **FinOps**: Multi-agent cost attribution, tool execution cost tracking (fo-01, fo-07)
12
- - **DevOps**: Agent deployment, horizontal scaling for agent clusters (do-01, do-03)
13
- - **Data Scientist**: Agent behavior analysis, conversation analytics (ds-01, ds-08)
14
-
15
- ## 🛠️ Tools Included
16
-
17
- ### 1. `agent_orchestrator.py`
18
- Multi-agent coordination system with task delegation, conversation routing, and state management.
19
-
20
- ### 2. `tool_registry.py`
21
- Centralized tool registry for agent function calling with versioning and permission controls.
22
-
23
- ### 3. `agent_memory.py`
24
- Short-term and long-term memory management with vector-based conversation retrieval.
25
-
26
- ### 4. `workflow_executor.py`
27
- Sequential and parallel workflow execution for complex multi-step agent tasks.
28
-
29
- ### 5. `agent_monitor.py`
30
- Real-time agent performance monitoring with cost tracking and quality metrics.
31
-
32
- ## 📊 Key Metrics
33
- - Agent task completion rate
34
- - Tool execution success rate
35
- - Multi-agent coordination latency
36
- - Agent decision quality score
37
- - Cost per agent interaction
38
-
39
- ## 🚀 Quick Start
40
-
41
- ```python
42
- from agent_orchestrator import AgentOrchestrator, Agent
43
-
44
- # Define specialized agents
45
- research_agent = Agent(
46
- name="research_agent",
47
- model="claude-3-5-sonnet-20241022",
48
- tools=["web_search", "document_retrieval"],
49
- system_prompt="You are a research specialist..."
50
- )
51
-
52
- coding_agent = Agent(
53
- name="coding_agent",
54
- model="claude-3-5-sonnet-20241022",
55
- tools=["code_executor", "github_api"],
56
- system_prompt="You are a coding specialist..."
57
- )
58
-
59
- # Initialize orchestrator
60
- orchestrator = AgentOrchestrator(
61
- agents=[research_agent, coding_agent],
62
- router_model="claude-3-5-sonnet-20241022"
63
- )
64
-
65
- # Execute complex task with automatic routing
66
- result = orchestrator.execute(
67
- task="Research best practices for rate limiting and implement them",
68
- max_iterations=10
69
- )
70
-
71
- print(f"Result: {result.output}")
72
- print(f"Agents used: {result.agents_invoked}")
73
- print(f"Total cost: ${result.total_cost:.4f}")
74
- ```
75
-
76
- ## 📚 Best Practices
77
-
78
- ### Cost Optimization (FinOps Integration)
79
-
80
- 1. **Agent-Level Cost Attribution**
81
- - Track costs per agent type and conversation
82
- - Identify expensive agent patterns
83
- - Optimize model selection per agent role
84
- - Monitor tool execution costs separately
85
- - Reference: FinOps fo-07 (AI/ML Cost Optimization)
86
-
87
- 2. **Optimize Agent Routing**
88
- - Use lightweight router model for task delegation
89
- - Cache routing decisions for similar queries
90
- - Minimize unnecessary agent hand-offs
91
- - Implement early termination for simple tasks
92
- - Reference: FinOps fo-03 (Budget Management)
93
-
94
- 3. **Tool Execution Optimization**
95
- - Cache tool results for frequent calls
96
- - Batch API calls where possible
97
- - Use cheaper alternatives for validation tasks
98
- - Track and optimize expensive tool chains
99
- - Reference: FinOps fo-01 (Cost Monitoring)
100
-
101
- 4. **Prompt Caching for Agents**
102
- - Cache agent system prompts (90% savings)
103
- - Cache tool schemas and examples
104
- - Reuse conversation context across turns
105
- - Implement semantic caching for agent responses
106
- - Reference: ai-01 (Prompt Caching), FinOps fo-07
107
-
108
- ### Security & Privacy (Security Architect Integration)
109
-
110
- 5. **Tool Execution Sandboxing**
111
- - Isolate tool execution environments
112
- - Implement strict permission controls
113
- - Validate all tool inputs and outputs
114
- - Monitor for malicious tool usage patterns
115
- - Reference: Security Architect sa-08 (LLM Security)
116
-
117
- 6. **Agent Permission Management**
118
- - Define granular RBAC for each agent
119
- - Restrict tool access based on agent role
120
- - Audit all agent actions and decisions
121
- - Implement least privilege principle
122
- - Reference: Security Architect sa-02 (IAM)
123
-
124
- 7. **Prevent Agent Jailbreaking**
125
- - Validate agent outputs before execution
126
- - Implement safety filters on tool calls
127
- - Monitor for prompt injection in agent inputs
128
- - Log suspicious agent behavior
129
- - Reference: Security Architect sa-08 (LLM Security)
130
-
131
- ### Data Quality & Governance (Data Engineer Integration)
132
-
133
- 8. **Conversation State Management**
134
- - Persist agent conversation history
135
- - Implement state recovery mechanisms
136
- - Version conversation schemas
137
- - Track conversation lineage
138
- - Reference: Data Engineer de-01 (Data Ingestion)
139
-
140
- 9. **Agent Output Validation**
141
- - Validate structured outputs from agents
142
- - Implement data quality checks on tool results
143
- - Monitor output consistency across agents
144
- - Track data transformation quality
145
- - Reference: Data Engineer de-03 (Data Quality)
146
-
147
- ### Model Lifecycle Management (MLOps Integration)
148
-
149
- 10. **Agent Model Versioning**
150
- - Version each agent's model configuration
151
- - Track model changes and performance impacts
152
- - Implement A/B testing for agent models
153
- - Rollback capability for agent updates
154
- - Reference: MLOps mo-01 (Model Registry), mo-03 (Versioning)
155
-
156
- 11. **Multi-Agent Performance Monitoring**
157
- - Track task completion rates per agent
158
- - Monitor agent collaboration efficiency
159
- - Measure end-to-end workflow latency
160
- - Alert on agent performance degradation
161
- - Reference: MLOps mo-04 (Monitoring)
162
-
163
- 12. **Agent Behavior Drift Detection**
164
- - Monitor agent decision patterns over time
165
- - Detect changes in tool usage patterns
166
- - Alert on unexpected agent behavior
167
- - Track agent quality metrics trends
168
- - Reference: MLOps mo-05 (Drift Detection)
169
-
170
- ### Deployment & Operations (DevOps Integration)
171
-
172
- 13. **Containerize Agent Services**
173
- - Package each agent as microservice
174
- - Use Docker for local development
175
- - Deploy to AKS with auto-scaling
176
- - Implement health checks for agents
177
- - Reference: DevOps do-03 (Containerization)
178
-
179
- 14. **CI/CD for Agent Updates**
180
- - Automate agent deployment pipeline
181
- - Implement canary deployments for agents
182
- - Test agent interactions before production
183
- - Rollback failed agent deployments
184
- - Reference: DevOps do-01 (CI/CD), do-05 (GitOps)
185
-
186
- 15. **Observability for Multi-Agent Systems**
187
- - Instrument agents with OpenTelemetry
188
- - Trace agent-to-agent communication
189
- - Monitor agent resource utilization
190
- - Set up distributed tracing dashboards
191
- - Reference: DevOps do-08 (Monitoring & Observability)
192
-
193
- ### Azure-Specific Best Practices
194
-
195
- 16. **Leverage Azure OpenAI for Agents**
196
- - Use managed identity for agent authentication
197
- - Deploy agents in Azure Container Apps
198
- - Enable diagnostic logging for all agents
199
- - Use Azure API Management for tool APIs
200
- - Reference: Azure az-05 (Azure OpenAI)
201
-
202
- 17. **Azure State Management**
203
- - Use Azure Cosmos DB for agent state
204
- - Implement Azure Service Bus for agent messaging
205
- - Store conversation history in Azure Storage
206
- - Use Azure Redis for agent caching
207
- - Reference: Azure az-03 (Storage), az-07 (Networking)
208
-
209
- ## 💰 Cost Optimization Examples
210
-
211
- ### Agent Cost Attribution
212
- ```python
213
- from agent_orchestrator import AgentOrchestrator
214
- from cost_tracker import AgentCostTracker
215
-
216
- cost_tracker = AgentCostTracker()
217
-
218
- # Track costs per agent
219
- def execute_with_cost_tracking(task: str, session_id: str):
220
- result = orchestrator.execute(
221
- task=task,
222
- cost_callback=lambda agent, cost: cost_tracker.log_agent_cost(
223
- session_id=session_id,
224
- agent_name=agent.name,
225
- model=agent.model,
226
- cost=cost
227
- )
228
- )
229
-
230
- # Generate cost breakdown
231
- breakdown = cost_tracker.get_session_breakdown(session_id)
232
- print(f"\n💰 Cost Breakdown:")
233
- for agent_name, metrics in breakdown.items():
234
- print(f" {agent_name}:")
235
- print(f" - Model calls: {metrics.num_calls}")
236
- print(f" - Total cost: ${metrics.total_cost:.4f}")
237
- print(f" - Avg cost/call: ${metrics.avg_cost:.4f}")
238
-
239
- return result
240
-
241
- # Set budget alerts per agent
242
- cost_tracker.set_agent_budget(
243
- agent_name="research_agent",
244
- daily_budget=10.00,
245
- alert_threshold=0.8
246
- )
247
- ```
248
-
249
- ### Prompt Caching for Agents (90% Savings)
250
- ```python
251
- from anthropic import Anthropic
252
-
253
- client = Anthropic()
254
-
255
- def create_agent_with_caching(agent_config: dict):
256
- """Create agent with cached system prompt and tool schemas."""
257
-
258
- # Cache system prompt
259
- system_blocks = [
260
- {
261
- "type": "text",
262
- "text": agent_config["system_prompt"],
263
- "cache_control": {"type": "ephemeral"}
264
- }
265
- ]
266
-
267
- # Cache tool schemas
268
- if agent_config.get("tools"):
269
- tools_text = json.dumps(agent_config["tools"], indent=2)
270
- system_blocks.append({
271
- "type": "text",
272
- "text": f"Available tools:\n{tools_text}",
273
- "cache_control": {"type": "ephemeral"}
274
- })
275
-
276
- def execute_agent(user_message: str, conversation_history: list = None):
277
- messages = conversation_history or []
278
- messages.append({"role": "user", "content": user_message})
279
-
280
- response = client.messages.create(
281
- model=agent_config["model"],
282
- max_tokens=4096,
283
- system=system_blocks,
284
- messages=messages,
285
- tools=agent_config.get("tools")
286
- )
287
-
288
- # Log cache performance
289
- usage = response.usage
290
- cache_creation_tokens = getattr(usage, 'cache_creation_input_tokens', 0)
291
- cache_read_tokens = getattr(usage, 'cache_read_input_tokens', 0)
292
-
293
- print(f"Cache stats: {cache_read_tokens} read, {cache_creation_tokens} created")
294
-
295
- return response
296
-
297
- return execute_agent
298
- ```
299
-
300
- ### Tool Execution Cost Optimization
301
- ```python
302
- from tool_registry import ToolRegistry
303
- from functools import lru_cache
304
- import hashlib
305
-
306
- class CostOptimizedToolRegistry(ToolRegistry):
307
- def __init__(self):
308
- super().__init__()
309
- self.execution_cache = {}
310
-
311
- def execute_tool(self, tool_name: str, params: dict):
312
- # Cache deterministic tool results
313
- if self.is_deterministic(tool_name):
314
- cache_key = self._get_cache_key(tool_name, params)
315
-
316
- if cache_key in self.execution_cache:
317
- print(f"✅ Cache hit for {tool_name}")
318
- return self.execution_cache[cache_key]
319
-
320
- # Execute tool
321
- result = super().execute_tool(tool_name, params)
322
-
323
- # Cache result
324
- if self.is_deterministic(tool_name):
325
- self.execution_cache[cache_key] = result
326
-
327
- # Track costs
328
- cost = self._estimate_tool_cost(tool_name, params, result)
329
- self.log_tool_cost(tool_name, cost)
330
-
331
- return result
332
-
333
- def _get_cache_key(self, tool_name: str, params: dict) -> str:
334
- """Generate cache key from tool name and params."""
335
- params_str = json.dumps(params, sort_keys=True)
336
- return hashlib.md5(f"{tool_name}:{params_str}".encode()).hexdigest()
337
-
338
- def get_tool_cost_report(self) -> dict:
339
- """Generate cost report by tool."""
340
- return {
341
- tool: {
342
- "executions": metrics.count,
343
- "total_cost": metrics.total_cost,
344
- "avg_cost": metrics.avg_cost
345
- }
346
- for tool, metrics in self.tool_metrics.items()
347
- }
348
- ```
349
-
350
- ## 🔒 Security Best Practices Examples
351
-
352
- ### Tool Execution Sandboxing
353
- ```python
354
- from tool_registry import ToolRegistry
355
- from security_sandbox import ToolSandbox # from sa-08
356
-
357
- class SecureToolRegistry(ToolRegistry):
358
- def __init__(self):
359
- super().__init__()
360
- self.sandbox = ToolSandbox()
361
-
362
- def execute_tool(self, tool_name: str, params: dict, agent_id: str):
363
- # Validate agent permissions
364
- if not self.has_permission(agent_id, tool_name):
365
- raise PermissionError(f"Agent {agent_id} not authorized for {tool_name}")
366
-
367
- # Validate inputs
368
- validation_result = self.validate_tool_params(tool_name, params)
369
- if not validation_result.valid:
370
- raise ValueError(f"Invalid params: {validation_result.errors}")
371
-
372
- # Execute in sandbox
373
- result = self.sandbox.execute(
374
- tool_name=tool_name,
375
- params=params,
376
- timeout=30,
377
- max_memory_mb=512,
378
- network_access=self.requires_network(tool_name)
379
- )
380
-
381
- # Validate outputs
382
- if not self.validate_tool_output(tool_name, result):
383
- raise ValueError(f"Tool output failed validation")
384
-
385
- # Audit log
386
- self.audit_log.record({
387
- "timestamp": datetime.now(),
388
- "agent_id": agent_id,
389
- "tool_name": tool_name,
390
- "params": params,
391
- "success": result.success,
392
- "duration_ms": result.duration_ms
393
- })
394
-
395
- return result.output
396
-
397
- # Define tool permissions
398
- registry = SecureToolRegistry()
399
- registry.set_tool_permissions("research_agent", [
400
- "web_search",
401
- "document_retrieval"
402
- ])
403
- registry.set_tool_permissions("coding_agent", [
404
- "code_executor",
405
- "github_api",
406
- "file_operations"
407
- ])
408
- ```
409
-
410
- ### Agent Input Validation
411
- ```python
412
- from pii_detector import PIIDetector # from sa-01
413
- from prompt_injection_detector import PromptInjectionDetector # from sa-08
414
-
415
- class SecureAgentOrchestrator(AgentOrchestrator):
416
- def __init__(self, *args, **kwargs):
417
- super().__init__(*args, **kwargs)
418
- self.pii_detector = PIIDetector()
419
- self.injection_detector = PromptInjectionDetector()
420
-
421
- def execute(self, task: str, user_context: dict = None):
422
- # Check for PII in user input
423
- pii_findings = self.pii_detector.analyze_text(task)
424
- if pii_findings:
425
- # Redact or alert
426
- print(f"⚠️ PII detected in user input: {pii_findings}")
427
- task = self.pii_detector.redact_text(task, pii_findings)
428
-
429
- # Check for prompt injection
430
- injection_score = self.injection_detector.analyze(task)
431
- if injection_score > 0.8:
432
- raise SecurityError("Potential prompt injection detected")
433
-
434
- # Execute with validation
435
- result = super().execute(task, user_context)
436
-
437
- # Validate agent outputs before returning
438
- if not self.validate_output(result.output):
439
- raise ValueError("Agent output failed safety checks")
440
-
441
- return result
442
- ```
443
-
444
- ## 📊 Enhanced Metrics & Monitoring
445
-
446
- | Metric Category | Metric | Target | Tool |
447
- |-----------------|--------|--------|------|
448
- | **Agent Performance** | Task completion rate | >0.95 | Custom monitor |
449
- | | Agent response time (p95) | <5s | Azure Monitor |
450
- | | Multi-agent coordination time | <10s | App Insights |
451
- | | Agent decision accuracy | >0.90 | MLflow |
452
- | **Tool Execution** | Tool success rate | >0.98 | Custom tracker |
453
- | | Tool execution time (p95) | <2s | App Insights |
454
- | | Tool cache hit rate | >60% | Redis metrics |
455
- | **Costs** | Cost per agent interaction | <$0.10 | FinOps dashboard |
456
- | | Cost per tool execution | <$0.01 | Cost tracker |
457
- | | Cache savings percentage | >70% | Cost analyzer |
458
- | **Quality** | Agent handoff accuracy | >0.92 | MLflow |
459
- | | Workflow success rate | >0.95 | Custom monitor |
460
- | | Output validation pass rate | >0.99 | Quality tracker |
461
- | **Security** | Permission violations | 0 | Security logs |
462
- | | Sandbox escapes | 0 | Security monitor |
463
- | | Injection attempts blocked | 100% | WAF logs |
464
-
465
- ## 🚀 Deployment Pipeline
466
-
467
- ### CI/CD for Multi-Agent System
468
- ```yaml
469
- # .github/workflows/agent-deployment.yml
470
- name: Agent Orchestration Deployment
471
-
472
- on:
473
- push:
474
- paths:
475
- - 'agents/**'
476
- - 'tools/**'
477
- branches:
478
- - main
479
-
480
- jobs:
481
- test-agents:
482
- runs-on: ubuntu-latest
483
- steps:
484
- - name: Unit test agents
485
- run: pytest tests/test_agents.py -v
486
-
487
- - name: Integration test agent coordination
488
- run: pytest tests/test_agent_orchestration.py -v
489
-
490
- - name: Test tool execution sandbox
491
- run: pytest tests/test_tool_security.py -v
492
-
493
- - name: Validate agent permissions
494
- run: python scripts/validate_permissions.py
495
-
496
- security-scan:
497
- runs-on: ubuntu-latest
498
- steps:
499
- - name: Scan for security vulnerabilities
500
- run: python scripts/security_scan.py
501
-
502
- - name: Test prompt injection detection
503
- run: pytest tests/test_prompt_injection.py
504
-
505
- - name: Validate tool sandboxing
506
- run: python scripts/test_sandbox.py
507
-
508
- deploy-staging:
509
- needs: [test-agents, security-scan]
510
- runs-on: ubuntu-latest
511
- steps:
512
- - name: Build agent containers
513
- run: |
514
- docker build -t agent-orchestrator:${{ github.sha }} .
515
- docker build -t tool-registry:${{ github.sha }} ./tools
516
-
517
- - name: Push to Azure Container Registry
518
- run: |
519
- az acr login --name myregistry
520
- docker push agent-orchestrator:${{ github.sha }}
521
-
522
- - name: Deploy to AKS staging
523
- run: |
524
- kubectl set image deployment/agent-orchestrator \
525
- agent-orchestrator=myregistry.azurecr.io/agent-orchestrator:${{ github.sha }} \
526
- --namespace staging
527
-
528
- - name: Run smoke tests
529
- run: python scripts/smoke_test_agents.py --environment staging
530
-
531
- deploy-production:
532
- needs: deploy-staging
533
- runs-on: ubuntu-latest
534
- environment: production
535
- steps:
536
- - name: Canary deployment (10%)
537
- run: |
538
- kubectl set image deployment/agent-orchestrator \
539
- agent-orchestrator=myregistry.azurecr.io/agent-orchestrator:${{ github.sha }} \
540
- --namespace production
541
- kubectl patch deployment agent-orchestrator \
542
- -p '{"spec":{"replicas":1}}' --namespace production
543
-
544
- - name: Monitor canary metrics
545
- run: python scripts/monitor_canary.py --duration 30m
546
-
547
- - name: Full production rollout
548
- if: success()
549
- run: kubectl scale deployment agent-orchestrator --replicas=10 --namespace production
550
-
551
- - name: Monitor production agents
552
- run: python scripts/monitor_agents.py --duration 2h
553
- ```
554
-
555
- ## 🔄 Integration Workflow
556
-
557
- ### End-to-End Multi-Agent Pipeline with All Roles
558
- ```
559
- 1. User Request Received
560
-
561
- 2. Input Validation & PII Detection (sa-01, sa-08)
562
-
563
- 3. Task Router (ai-03)
564
-
565
- 4. Agent Selection & Delegation (ai-03)
566
-
567
- 5. Tool Permission Check (sa-02)
568
-
569
- 6. Tool Execution in Sandbox (sa-08)
570
-
571
- 7. Tool Cost Tracking (fo-07)
572
-
573
- 8. Agent Coordination & Handoffs (ai-03)
574
-
575
- 9. Conversation State Persistence (de-01)
576
-
577
- 10. Output Validation (de-03)
578
-
579
- 11. Security Filtering (sa-08)
580
-
581
- 12. Response Caching (ai-01)
582
-
583
- 13. Performance Monitoring (mo-04)
584
-
585
- 14. Cost Attribution (fo-01)
586
-
587
- 15. Behavior Drift Detection (mo-05)
588
- ```
589
-
590
- ## 🎯 Quick Wins
591
-
592
- 1. **Enable prompt caching for agents** - 90% cost reduction on repeated agent calls
593
- 2. **Implement tool result caching** - Reduce expensive API call costs
594
- 3. **Set up agent-level cost tracking** - Identify and optimize expensive agent patterns
595
- 4. **Add tool execution sandboxing** - Prevent security vulnerabilities
596
- 5. **Implement agent permission controls** - Follow least privilege principle
597
- 6. **Enable distributed tracing** - Full visibility into multi-agent workflows
598
- 7. **Set up agent performance monitoring** - Catch quality degradation early
599
- 8. **Containerize agents as microservices** - Enable independent scaling and deployment
1
+ # Skill 3: LLM Agent Orchestration
2
+
3
+ ## 🎯 Overview
4
+ Build advanced multi-agent systems with autonomous task delegation, tool execution, and intelligent workflow orchestration for complex AI applications.
5
+
6
+ ## 🔗 Connections
7
+ - **Data Engineer**: Agent state persistence, conversation history storage (de-01, de-03)
8
+ - **Security Architect**: Tool execution sandboxing, agent permission management (sa-02, sa-08)
9
+ - **ML Engineer**: Agent model selection and optimization (ml-03, ml-04)
10
+ - **MLOps**: Agent performance tracking, multi-agent metrics (mo-01, mo-04)
11
+ - **FinOps**: Multi-agent cost attribution, tool execution cost tracking (fo-01, fo-07)
12
+ - **DevOps**: Agent deployment, horizontal scaling for agent clusters (do-01, do-03)
13
+ - **Data Scientist**: Agent behavior analysis, conversation analytics (ds-01, ds-08)
14
+
15
+ ## 🛠️ Tools Included
16
+
17
+ ### 1. `agent_orchestrator.py`
18
+ Multi-agent coordination system with task delegation, conversation routing, and state management.
19
+
20
+ ### 2. `tool_registry.py`
21
+ Centralized tool registry for agent function calling with versioning and permission controls.
22
+
23
+ ### 3. `agent_memory.py`
24
+ Short-term and long-term memory management with vector-based conversation retrieval.
25
+
26
+ ### 4. `workflow_executor.py`
27
+ Sequential and parallel workflow execution for complex multi-step agent tasks.
28
+
29
+ ### 5. `agent_monitor.py`
30
+ Real-time agent performance monitoring with cost tracking and quality metrics.
31
+
32
+ ## 📊 Key Metrics
33
+ - Agent task completion rate
34
+ - Tool execution success rate
35
+ - Multi-agent coordination latency
36
+ - Agent decision quality score
37
+ - Cost per agent interaction
38
+
39
+ ## 🚀 Quick Start
40
+
41
+ ```python
42
+ from agent_orchestrator import AgentOrchestrator, Agent
43
+
44
+ # Define specialized agents
45
+ research_agent = Agent(
46
+ name="research_agent",
47
+ model="claude-3-5-sonnet-20241022",
48
+ tools=["web_search", "document_retrieval"],
49
+ system_prompt="You are a research specialist..."
50
+ )
51
+
52
+ coding_agent = Agent(
53
+ name="coding_agent",
54
+ model="claude-3-5-sonnet-20241022",
55
+ tools=["code_executor", "github_api"],
56
+ system_prompt="You are a coding specialist..."
57
+ )
58
+
59
+ # Initialize orchestrator
60
+ orchestrator = AgentOrchestrator(
61
+ agents=[research_agent, coding_agent],
62
+ router_model="claude-3-5-sonnet-20241022"
63
+ )
64
+
65
+ # Execute complex task with automatic routing
66
+ result = orchestrator.execute(
67
+ task="Research best practices for rate limiting and implement them",
68
+ max_iterations=10
69
+ )
70
+
71
+ print(f"Result: {result.output}")
72
+ print(f"Agents used: {result.agents_invoked}")
73
+ print(f"Total cost: ${result.total_cost:.4f}")
74
+ ```
75
+
76
+ ## 📚 Best Practices
77
+
78
+ ### Cost Optimization (FinOps Integration)
79
+
80
+ 1. **Agent-Level Cost Attribution**
81
+ - Track costs per agent type and conversation
82
+ - Identify expensive agent patterns
83
+ - Optimize model selection per agent role
84
+ - Monitor tool execution costs separately
85
+ - Reference: FinOps fo-07 (AI/ML Cost Optimization)
86
+
87
+ 2. **Optimize Agent Routing**
88
+ - Use lightweight router model for task delegation
89
+ - Cache routing decisions for similar queries
90
+ - Minimize unnecessary agent hand-offs
91
+ - Implement early termination for simple tasks
92
+ - Reference: FinOps fo-03 (Budget Management)
93
+
94
+ 3. **Tool Execution Optimization**
95
+ - Cache tool results for frequent calls
96
+ - Batch API calls where possible
97
+ - Use cheaper alternatives for validation tasks
98
+ - Track and optimize expensive tool chains
99
+ - Reference: FinOps fo-01 (Cost Monitoring)
100
+
101
+ 4. **Prompt Caching for Agents**
102
+ - Cache agent system prompts (90% savings)
103
+ - Cache tool schemas and examples
104
+ - Reuse conversation context across turns
105
+ - Implement semantic caching for agent responses
106
+ - Reference: ai-01 (Prompt Caching), FinOps fo-07
107
+
108
+ ### Security & Privacy (Security Architect Integration)
109
+
110
+ 5. **Tool Execution Sandboxing**
111
+ - Isolate tool execution environments
112
+ - Implement strict permission controls
113
+ - Validate all tool inputs and outputs
114
+ - Monitor for malicious tool usage patterns
115
+ - Reference: Security Architect sa-08 (LLM Security)
116
+
117
+ 6. **Agent Permission Management**
118
+ - Define granular RBAC for each agent
119
+ - Restrict tool access based on agent role
120
+ - Audit all agent actions and decisions
121
+ - Implement least privilege principle
122
+ - Reference: Security Architect sa-02 (IAM)
123
+
124
+ 7. **Prevent Agent Jailbreaking**
125
+ - Validate agent outputs before execution
126
+ - Implement safety filters on tool calls
127
+ - Monitor for prompt injection in agent inputs
128
+ - Log suspicious agent behavior
129
+ - Reference: Security Architect sa-08 (LLM Security)
130
+
131
+ ### Data Quality & Governance (Data Engineer Integration)
132
+
133
+ 8. **Conversation State Management**
134
+ - Persist agent conversation history
135
+ - Implement state recovery mechanisms
136
+ - Version conversation schemas
137
+ - Track conversation lineage
138
+ - Reference: Data Engineer de-01 (Data Ingestion)
139
+
140
+ 9. **Agent Output Validation**
141
+ - Validate structured outputs from agents
142
+ - Implement data quality checks on tool results
143
+ - Monitor output consistency across agents
144
+ - Track data transformation quality
145
+ - Reference: Data Engineer de-03 (Data Quality)
146
+
147
+ ### Model Lifecycle Management (MLOps Integration)
148
+
149
+ 10. **Agent Model Versioning**
150
+ - Version each agent's model configuration
151
+ - Track model changes and performance impacts
152
+ - Implement A/B testing for agent models
153
+ - Rollback capability for agent updates
154
+ - Reference: MLOps mo-01 (Model Registry), mo-03 (Versioning)
155
+
156
+ 11. **Multi-Agent Performance Monitoring**
157
+ - Track task completion rates per agent
158
+ - Monitor agent collaboration efficiency
159
+ - Measure end-to-end workflow latency
160
+ - Alert on agent performance degradation
161
+ - Reference: MLOps mo-04 (Monitoring)
162
+
163
+ 12. **Agent Behavior Drift Detection**
164
+ - Monitor agent decision patterns over time
165
+ - Detect changes in tool usage patterns
166
+ - Alert on unexpected agent behavior
167
+ - Track agent quality metrics trends
168
+ - Reference: MLOps mo-05 (Drift Detection)
169
+
170
+ ### Deployment & Operations (DevOps Integration)
171
+
172
+ 13. **Containerize Agent Services**
173
+ - Package each agent as microservice
174
+ - Use Docker for local development
175
+ - Deploy to AKS with auto-scaling
176
+ - Implement health checks for agents
177
+ - Reference: DevOps do-03 (Containerization)
178
+
179
+ 14. **CI/CD for Agent Updates**
180
+ - Automate agent deployment pipeline
181
+ - Implement canary deployments for agents
182
+ - Test agent interactions before production
183
+ - Rollback failed agent deployments
184
+ - Reference: DevOps do-01 (CI/CD), do-05 (GitOps)
185
+
186
+ 15. **Observability for Multi-Agent Systems**
187
+ - Instrument agents with OpenTelemetry
188
+ - Trace agent-to-agent communication
189
+ - Monitor agent resource utilization
190
+ - Set up distributed tracing dashboards
191
+ - Reference: DevOps do-08 (Monitoring & Observability)
192
+
193
+ ### Azure-Specific Best Practices
194
+
195
+ 16. **Leverage Azure OpenAI for Agents**
196
+ - Use managed identity for agent authentication
197
+ - Deploy agents in Azure Container Apps
198
+ - Enable diagnostic logging for all agents
199
+ - Use Azure API Management for tool APIs
200
+ - Reference: Azure az-05 (Azure OpenAI)
201
+
202
+ 17. **Azure State Management**
203
+ - Use Azure Cosmos DB for agent state
204
+ - Implement Azure Service Bus for agent messaging
205
+ - Store conversation history in Azure Storage
206
+ - Use Azure Redis for agent caching
207
+ - Reference: Azure az-03 (Storage), az-07 (Networking)
208
+
209
+ ## 💰 Cost Optimization Examples
210
+
211
+ ### Agent Cost Attribution
212
+ ```python
213
+ from agent_orchestrator import AgentOrchestrator
214
+ from cost_tracker import AgentCostTracker
215
+
216
+ cost_tracker = AgentCostTracker()
217
+
218
+ # Track costs per agent
219
+ def execute_with_cost_tracking(task: str, session_id: str):
220
+ result = orchestrator.execute(
221
+ task=task,
222
+ cost_callback=lambda agent, cost: cost_tracker.log_agent_cost(
223
+ session_id=session_id,
224
+ agent_name=agent.name,
225
+ model=agent.model,
226
+ cost=cost
227
+ )
228
+ )
229
+
230
+ # Generate cost breakdown
231
+ breakdown = cost_tracker.get_session_breakdown(session_id)
232
+ print(f"\n💰 Cost Breakdown:")
233
+ for agent_name, metrics in breakdown.items():
234
+ print(f" {agent_name}:")
235
+ print(f" - Model calls: {metrics.num_calls}")
236
+ print(f" - Total cost: ${metrics.total_cost:.4f}")
237
+ print(f" - Avg cost/call: ${metrics.avg_cost:.4f}")
238
+
239
+ return result
240
+
241
+ # Set budget alerts per agent
242
+ cost_tracker.set_agent_budget(
243
+ agent_name="research_agent",
244
+ daily_budget=10.00,
245
+ alert_threshold=0.8
246
+ )
247
+ ```
248
+
249
+ ### Prompt Caching for Agents (90% Savings)
250
+ ```python
251
+ from anthropic import Anthropic
252
+
253
+ client = Anthropic()
254
+
255
+ def create_agent_with_caching(agent_config: dict):
256
+ """Create agent with cached system prompt and tool schemas."""
257
+
258
+ # Cache system prompt
259
+ system_blocks = [
260
+ {
261
+ "type": "text",
262
+ "text": agent_config["system_prompt"],
263
+ "cache_control": {"type": "ephemeral"}
264
+ }
265
+ ]
266
+
267
+ # Cache tool schemas
268
+ if agent_config.get("tools"):
269
+ tools_text = json.dumps(agent_config["tools"], indent=2)
270
+ system_blocks.append({
271
+ "type": "text",
272
+ "text": f"Available tools:\n{tools_text}",
273
+ "cache_control": {"type": "ephemeral"}
274
+ })
275
+
276
+ def execute_agent(user_message: str, conversation_history: list = None):
277
+ messages = conversation_history or []
278
+ messages.append({"role": "user", "content": user_message})
279
+
280
+ response = client.messages.create(
281
+ model=agent_config["model"],
282
+ max_tokens=4096,
283
+ system=system_blocks,
284
+ messages=messages,
285
+ tools=agent_config.get("tools")
286
+ )
287
+
288
+ # Log cache performance
289
+ usage = response.usage
290
+ cache_creation_tokens = getattr(usage, 'cache_creation_input_tokens', 0)
291
+ cache_read_tokens = getattr(usage, 'cache_read_input_tokens', 0)
292
+
293
+ print(f"Cache stats: {cache_read_tokens} read, {cache_creation_tokens} created")
294
+
295
+ return response
296
+
297
+ return execute_agent
298
+ ```
299
+
300
+ ### Tool Execution Cost Optimization
301
+ ```python
302
+ from tool_registry import ToolRegistry
303
+ from functools import lru_cache
304
+ import hashlib
305
+
306
+ class CostOptimizedToolRegistry(ToolRegistry):
307
+ def __init__(self):
308
+ super().__init__()
309
+ self.execution_cache = {}
310
+
311
+ def execute_tool(self, tool_name: str, params: dict):
312
+ # Cache deterministic tool results
313
+ if self.is_deterministic(tool_name):
314
+ cache_key = self._get_cache_key(tool_name, params)
315
+
316
+ if cache_key in self.execution_cache:
317
+ print(f"✅ Cache hit for {tool_name}")
318
+ return self.execution_cache[cache_key]
319
+
320
+ # Execute tool
321
+ result = super().execute_tool(tool_name, params)
322
+
323
+ # Cache result
324
+ if self.is_deterministic(tool_name):
325
+ self.execution_cache[cache_key] = result
326
+
327
+ # Track costs
328
+ cost = self._estimate_tool_cost(tool_name, params, result)
329
+ self.log_tool_cost(tool_name, cost)
330
+
331
+ return result
332
+
333
+ def _get_cache_key(self, tool_name: str, params: dict) -> str:
334
+ """Generate cache key from tool name and params."""
335
+ params_str = json.dumps(params, sort_keys=True)
336
+ return hashlib.md5(f"{tool_name}:{params_str}".encode()).hexdigest()
337
+
338
+ def get_tool_cost_report(self) -> dict:
339
+ """Generate cost report by tool."""
340
+ return {
341
+ tool: {
342
+ "executions": metrics.count,
343
+ "total_cost": metrics.total_cost,
344
+ "avg_cost": metrics.avg_cost
345
+ }
346
+ for tool, metrics in self.tool_metrics.items()
347
+ }
348
+ ```
349
+
350
+ ## 🔒 Security Best Practices Examples
351
+
352
+ ### Tool Execution Sandboxing
353
+ ```python
354
+ from tool_registry import ToolRegistry
355
+ from security_sandbox import ToolSandbox # from sa-08
356
+
357
+ class SecureToolRegistry(ToolRegistry):
358
+ def __init__(self):
359
+ super().__init__()
360
+ self.sandbox = ToolSandbox()
361
+
362
+ def execute_tool(self, tool_name: str, params: dict, agent_id: str):
363
+ # Validate agent permissions
364
+ if not self.has_permission(agent_id, tool_name):
365
+ raise PermissionError(f"Agent {agent_id} not authorized for {tool_name}")
366
+
367
+ # Validate inputs
368
+ validation_result = self.validate_tool_params(tool_name, params)
369
+ if not validation_result.valid:
370
+ raise ValueError(f"Invalid params: {validation_result.errors}")
371
+
372
+ # Execute in sandbox
373
+ result = self.sandbox.execute(
374
+ tool_name=tool_name,
375
+ params=params,
376
+ timeout=30,
377
+ max_memory_mb=512,
378
+ network_access=self.requires_network(tool_name)
379
+ )
380
+
381
+ # Validate outputs
382
+ if not self.validate_tool_output(tool_name, result):
383
+ raise ValueError(f"Tool output failed validation")
384
+
385
+ # Audit log
386
+ self.audit_log.record({
387
+ "timestamp": datetime.now(),
388
+ "agent_id": agent_id,
389
+ "tool_name": tool_name,
390
+ "params": params,
391
+ "success": result.success,
392
+ "duration_ms": result.duration_ms
393
+ })
394
+
395
+ return result.output
396
+
397
+ # Define tool permissions
398
+ registry = SecureToolRegistry()
399
+ registry.set_tool_permissions("research_agent", [
400
+ "web_search",
401
+ "document_retrieval"
402
+ ])
403
+ registry.set_tool_permissions("coding_agent", [
404
+ "code_executor",
405
+ "github_api",
406
+ "file_operations"
407
+ ])
408
+ ```
409
+
410
+ ### Agent Input Validation
411
+ ```python
412
+ from pii_detector import PIIDetector # from sa-01
413
+ from prompt_injection_detector import PromptInjectionDetector # from sa-08
414
+
415
+ class SecureAgentOrchestrator(AgentOrchestrator):
416
+ def __init__(self, *args, **kwargs):
417
+ super().__init__(*args, **kwargs)
418
+ self.pii_detector = PIIDetector()
419
+ self.injection_detector = PromptInjectionDetector()
420
+
421
+ def execute(self, task: str, user_context: dict = None):
422
+ # Check for PII in user input
423
+ pii_findings = self.pii_detector.analyze_text(task)
424
+ if pii_findings:
425
+ # Redact or alert
426
+ print(f"⚠️ PII detected in user input: {pii_findings}")
427
+ task = self.pii_detector.redact_text(task, pii_findings)
428
+
429
+ # Check for prompt injection
430
+ injection_score = self.injection_detector.analyze(task)
431
+ if injection_score > 0.8:
432
+ raise SecurityError("Potential prompt injection detected")
433
+
434
+ # Execute with validation
435
+ result = super().execute(task, user_context)
436
+
437
+ # Validate agent outputs before returning
438
+ if not self.validate_output(result.output):
439
+ raise ValueError("Agent output failed safety checks")
440
+
441
+ return result
442
+ ```
443
+
444
+ ## 📊 Enhanced Metrics & Monitoring
445
+
446
+ | Metric Category | Metric | Target | Tool |
447
+ |-----------------|--------|--------|------|
448
+ | **Agent Performance** | Task completion rate | >0.95 | Custom monitor |
449
+ | | Agent response time (p95) | <5s | Azure Monitor |
450
+ | | Multi-agent coordination time | <10s | App Insights |
451
+ | | Agent decision accuracy | >0.90 | MLflow |
452
+ | **Tool Execution** | Tool success rate | >0.98 | Custom tracker |
453
+ | | Tool execution time (p95) | <2s | App Insights |
454
+ | | Tool cache hit rate | >60% | Redis metrics |
455
+ | **Costs** | Cost per agent interaction | <$0.10 | FinOps dashboard |
456
+ | | Cost per tool execution | <$0.01 | Cost tracker |
457
+ | | Cache savings percentage | >70% | Cost analyzer |
458
+ | **Quality** | Agent handoff accuracy | >0.92 | MLflow |
459
+ | | Workflow success rate | >0.95 | Custom monitor |
460
+ | | Output validation pass rate | >0.99 | Quality tracker |
461
+ | **Security** | Permission violations | 0 | Security logs |
462
+ | | Sandbox escapes | 0 | Security monitor |
463
+ | | Injection attempts blocked | 100% | WAF logs |
464
+
465
+ ## 🚀 Deployment Pipeline
466
+
467
+ ### CI/CD for Multi-Agent System
468
+ ```yaml
469
+ # .github/workflows/agent-deployment.yml
470
+ name: Agent Orchestration Deployment
471
+
472
+ on:
473
+ push:
474
+ paths:
475
+ - 'agents/**'
476
+ - 'tools/**'
477
+ branches:
478
+ - main
479
+
480
+ jobs:
481
+ test-agents:
482
+ runs-on: ubuntu-latest
483
+ steps:
484
+ - name: Unit test agents
485
+ run: pytest tests/test_agents.py -v
486
+
487
+ - name: Integration test agent coordination
488
+ run: pytest tests/test_agent_orchestration.py -v
489
+
490
+ - name: Test tool execution sandbox
491
+ run: pytest tests/test_tool_security.py -v
492
+
493
+ - name: Validate agent permissions
494
+ run: python scripts/validate_permissions.py
495
+
496
+ security-scan:
497
+ runs-on: ubuntu-latest
498
+ steps:
499
+ - name: Scan for security vulnerabilities
500
+ run: python scripts/security_scan.py
501
+
502
+ - name: Test prompt injection detection
503
+ run: pytest tests/test_prompt_injection.py
504
+
505
+ - name: Validate tool sandboxing
506
+ run: python scripts/test_sandbox.py
507
+
508
+ deploy-staging:
509
+ needs: [test-agents, security-scan]
510
+ runs-on: ubuntu-latest
511
+ steps:
512
+ - name: Build agent containers
513
+ run: |
514
+ docker build -t agent-orchestrator:${{ github.sha }} .
515
+ docker build -t tool-registry:${{ github.sha }} ./tools
516
+
517
+ - name: Push to Azure Container Registry
518
+ run: |
519
+ az acr login --name myregistry
520
+ docker push agent-orchestrator:${{ github.sha }}
521
+
522
+ - name: Deploy to AKS staging
523
+ run: |
524
+ kubectl set image deployment/agent-orchestrator \
525
+ agent-orchestrator=myregistry.azurecr.io/agent-orchestrator:${{ github.sha }} \
526
+ --namespace staging
527
+
528
+ - name: Run smoke tests
529
+ run: python scripts/smoke_test_agents.py --environment staging
530
+
531
+ deploy-production:
532
+ needs: deploy-staging
533
+ runs-on: ubuntu-latest
534
+ environment: production
535
+ steps:
536
+ - name: Canary deployment (10%)
537
+ run: |
538
+ kubectl set image deployment/agent-orchestrator \
539
+ agent-orchestrator=myregistry.azurecr.io/agent-orchestrator:${{ github.sha }} \
540
+ --namespace production
541
+ kubectl patch deployment agent-orchestrator \
542
+ -p '{"spec":{"replicas":1}}' --namespace production
543
+
544
+ - name: Monitor canary metrics
545
+ run: python scripts/monitor_canary.py --duration 30m
546
+
547
+ - name: Full production rollout
548
+ if: success()
549
+ run: kubectl scale deployment agent-orchestrator --replicas=10 --namespace production
550
+
551
+ - name: Monitor production agents
552
+ run: python scripts/monitor_agents.py --duration 2h
553
+ ```
554
+
555
+ ## 🔄 Integration Workflow
556
+
557
+ ### End-to-End Multi-Agent Pipeline with All Roles
558
+ ```
559
+ 1. User Request Received
560
+
561
+ 2. Input Validation & PII Detection (sa-01, sa-08)
562
+
563
+ 3. Task Router (ai-03)
564
+
565
+ 4. Agent Selection & Delegation (ai-03)
566
+
567
+ 5. Tool Permission Check (sa-02)
568
+
569
+ 6. Tool Execution in Sandbox (sa-08)
570
+
571
+ 7. Tool Cost Tracking (fo-07)
572
+
573
+ 8. Agent Coordination & Handoffs (ai-03)
574
+
575
+ 9. Conversation State Persistence (de-01)
576
+
577
+ 10. Output Validation (de-03)
578
+
579
+ 11. Security Filtering (sa-08)
580
+
581
+ 12. Response Caching (ai-01)
582
+
583
+ 13. Performance Monitoring (mo-04)
584
+
585
+ 14. Cost Attribution (fo-01)
586
+
587
+ 15. Behavior Drift Detection (mo-05)
588
+ ```
589
+
590
+ ## 🎯 Quick Wins
591
+
592
+ 1. **Enable prompt caching for agents** - 90% cost reduction on repeated agent calls
593
+ 2. **Implement tool result caching** - Reduce expensive API call costs
594
+ 3. **Set up agent-level cost tracking** - Identify and optimize expensive agent patterns
595
+ 4. **Add tool execution sandboxing** - Prevent security vulnerabilities
596
+ 5. **Implement agent permission controls** - Follow least privilege principle
597
+ 6. **Enable distributed tracing** - Full visibility into multi-agent workflows
598
+ 7. **Set up agent performance monitoring** - Catch quality degradation early
599
+ 8. **Containerize agents as microservices** - Enable independent scaling and deployment