locus-product-planning 1.1.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +2 -2
  3. package/LICENSE +21 -21
  4. package/README.md +11 -7
  5. package/agents/engineering/architect-reviewer.md +122 -122
  6. package/agents/engineering/engineering-manager.md +101 -101
  7. package/agents/engineering/principal-engineer.md +98 -98
  8. package/agents/engineering/staff-engineer.md +86 -86
  9. package/agents/engineering/tech-lead.md +114 -114
  10. package/agents/executive/ceo-strategist.md +81 -81
  11. package/agents/executive/cfo-analyst.md +97 -97
  12. package/agents/executive/coo-operations.md +100 -100
  13. package/agents/executive/cpo-product.md +104 -104
  14. package/agents/executive/cto-architect.md +90 -90
  15. package/agents/product/product-manager.md +70 -70
  16. package/agents/product/project-manager.md +95 -95
  17. package/agents/product/qa-strategist.md +132 -132
  18. package/agents/product/scrum-master.md +70 -70
  19. package/dist/index.cjs +13012 -0
  20. package/dist/index.cjs.map +1 -0
  21. package/dist/{lib/skills-core.d.ts → index.d.cts} +46 -12
  22. package/dist/index.d.ts +113 -5
  23. package/dist/index.js +12963 -237
  24. package/dist/index.js.map +1 -0
  25. package/package.json +88 -82
  26. package/skills/01-executive-suite/ceo-strategist/SKILL.md +132 -132
  27. package/skills/01-executive-suite/cfo-analyst/SKILL.md +187 -187
  28. package/skills/01-executive-suite/coo-operations/SKILL.md +211 -211
  29. package/skills/01-executive-suite/cpo-product/SKILL.md +231 -231
  30. package/skills/01-executive-suite/cto-architect/SKILL.md +173 -173
  31. package/skills/02-product-management/estimation-expert/SKILL.md +139 -139
  32. package/skills/02-product-management/product-manager/SKILL.md +265 -265
  33. package/skills/02-product-management/program-manager/SKILL.md +178 -178
  34. package/skills/02-product-management/project-manager/SKILL.md +221 -221
  35. package/skills/02-product-management/roadmap-strategist/SKILL.md +186 -186
  36. package/skills/02-product-management/scrum-master/SKILL.md +212 -212
  37. package/skills/03-engineering-leadership/architect-reviewer/SKILL.md +249 -249
  38. package/skills/03-engineering-leadership/engineering-manager/SKILL.md +207 -207
  39. package/skills/03-engineering-leadership/principal-engineer/SKILL.md +206 -206
  40. package/skills/03-engineering-leadership/staff-engineer/SKILL.md +237 -237
  41. package/skills/03-engineering-leadership/tech-lead/SKILL.md +296 -296
  42. package/skills/04-developer-specializations/core/api-designer/SKILL.md +579 -0
  43. package/skills/04-developer-specializations/core/backend-developer/SKILL.md +205 -205
  44. package/skills/04-developer-specializations/core/frontend-developer/SKILL.md +233 -233
  45. package/skills/04-developer-specializations/core/fullstack-developer/SKILL.md +202 -202
  46. package/skills/04-developer-specializations/core/mobile-developer/SKILL.md +220 -220
  47. package/skills/04-developer-specializations/data-ai/data-engineer/SKILL.md +316 -316
  48. package/skills/04-developer-specializations/data-ai/data-scientist/SKILL.md +338 -338
  49. package/skills/04-developer-specializations/data-ai/llm-architect/SKILL.md +390 -390
  50. package/skills/04-developer-specializations/data-ai/ml-engineer/SKILL.md +349 -349
  51. package/skills/04-developer-specializations/design/ui-ux-designer/SKILL.md +337 -0
  52. package/skills/04-developer-specializations/infrastructure/cloud-architect/SKILL.md +354 -354
  53. package/skills/04-developer-specializations/infrastructure/database-architect/SKILL.md +430 -0
  54. package/skills/04-developer-specializations/infrastructure/devops-engineer/SKILL.md +306 -306
  55. package/skills/04-developer-specializations/infrastructure/kubernetes-specialist/SKILL.md +419 -419
  56. package/skills/04-developer-specializations/infrastructure/platform-engineer/SKILL.md +289 -289
  57. package/skills/04-developer-specializations/infrastructure/security-engineer/SKILL.md +336 -336
  58. package/skills/04-developer-specializations/infrastructure/sre-engineer/SKILL.md +425 -425
  59. package/skills/04-developer-specializations/languages/golang-pro/SKILL.md +366 -366
  60. package/skills/04-developer-specializations/languages/java-architect/SKILL.md +296 -296
  61. package/skills/04-developer-specializations/languages/python-pro/SKILL.md +317 -317
  62. package/skills/04-developer-specializations/languages/rust-engineer/SKILL.md +309 -309
  63. package/skills/04-developer-specializations/languages/typescript-pro/SKILL.md +251 -251
  64. package/skills/04-developer-specializations/quality/accessibility-tester/SKILL.md +338 -338
  65. package/skills/04-developer-specializations/quality/performance-engineer/SKILL.md +384 -384
  66. package/skills/04-developer-specializations/quality/qa-expert/SKILL.md +413 -413
  67. package/skills/04-developer-specializations/quality/security-auditor/SKILL.md +359 -359
  68. package/skills/04-developer-specializations/quality/test-automation-engineer/SKILL.md +711 -0
  69. package/skills/05-specialists/compliance-specialist/SKILL.md +171 -171
  70. package/skills/05-specialists/technical-writer/SKILL.md +576 -0
  71. package/skills/using-locus/SKILL.md +5 -3
  72. package/dist/index.d.ts.map +0 -1
  73. package/dist/lib/skills-core.d.ts.map +0 -1
  74. package/dist/lib/skills-core.js +0 -361
@@ -1,390 +1,390 @@
1
- ---
2
- name: llm-architect
3
- description: Large language model systems, prompt engineering, RAG architectures, fine-tuning, and building production LLM applications
4
- metadata:
5
- version: "1.0.0"
6
- tier: developer-specialization
7
- category: data-ai
8
- council: code-review-council
9
- ---
10
-
11
- # LLM Architect
12
-
13
- You embody the perspective of an LLM Architect with expertise in designing and building production systems powered by large language models.
14
-
15
- ## When to Apply
16
-
17
- Invoke this skill when:
18
- - Designing LLM-powered applications
19
- - Implementing RAG (Retrieval-Augmented Generation)
20
- - Prompt engineering and optimization
21
- - Fine-tuning models
22
- - Building agent systems
23
- - Evaluating LLM outputs
24
- - Managing costs and latency
25
-
26
- ## Core Competencies
27
-
28
- ### 1. LLM Integration
29
- - API integration patterns
30
- - Token management
31
- - Error handling and fallbacks
32
- - Cost optimization
33
-
34
- ### 2. RAG Systems
35
- - Document processing
36
- - Embedding strategies
37
- - Vector databases
38
- - Retrieval optimization
39
-
40
- ### 3. Prompt Engineering
41
- - Prompt design patterns
42
- - Few-shot learning
43
- - Chain-of-thought
44
- - System prompts
45
-
46
- ### 4. Agent Systems
47
- - Tool use and function calling
48
- - Multi-agent architectures
49
- - Planning and reasoning
50
- - Memory systems
51
-
52
- ## RAG Architecture
53
-
54
- ### System Design
55
- ```
56
- ┌─────────────────────────────────────────────────────────┐
57
- │ User Query │
58
- └────────────────────────┬────────────────────────────────┘
59
-
60
- ┌──────────────▼──────────────┐
61
- │ Query Understanding │
62
- │ (Rewrite, Expansion) │
63
- └──────────────┬──────────────┘
64
-
65
- ┌──────────────▼──────────────┐
66
- │ Retrieval Pipeline │
67
- │ ┌────────┐ ┌────────┐ │
68
- │ │Semantic│ │Keyword │ │
69
- │ │Search │ │Search │ │
70
- │ └───┬────┘ └───┬────┘ │
71
- │ └────┬─────┘ │
72
- │ ▼ │
73
- │ Reranking & Fusion │
74
- └──────────────┬──────────────┘
75
-
76
- ┌──────────────▼──────────────┐
77
- │ Context Assembly │
78
- │ (Top-K, Deduplication) │
79
- └──────────────┬──────────────┘
80
-
81
- ┌──────────────▼──────────────┐
82
- │ LLM Generation │
83
- │ (With Retrieved Context) │
84
- └──────────────┬──────────────┘
85
-
86
- ┌──────────────▼──────────────┐
87
- │ Post-processing │
88
- │ (Citations, Formatting) │
89
- └─────────────────────────────┘
90
- ```
91
-
92
- ### RAG Implementation
93
- ```python
94
- from langchain.text_splitter import RecursiveCharacterTextSplitter
95
- from langchain.embeddings import OpenAIEmbeddings
96
- from langchain.vectorstores import Pinecone
97
- from langchain.chains import RetrievalQA
98
-
99
- # Document processing
100
- text_splitter = RecursiveCharacterTextSplitter(
101
- chunk_size=1000,
102
- chunk_overlap=200,
103
- separators=["\n\n", "\n", " ", ""]
104
- )
105
- chunks = text_splitter.split_documents(documents)
106
-
107
- # Embedding and storage
108
- embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
109
- vectorstore = Pinecone.from_documents(
110
- chunks,
111
- embeddings,
112
- index_name="knowledge-base"
113
- )
114
-
115
- # Retrieval chain
116
- retriever = vectorstore.as_retriever(
117
- search_type="mmr", # Maximum Marginal Relevance
118
- search_kwargs={"k": 5, "fetch_k": 20}
119
- )
120
-
121
- qa_chain = RetrievalQA.from_chain_type(
122
- llm=ChatOpenAI(model="gpt-4", temperature=0),
123
- chain_type="stuff",
124
- retriever=retriever,
125
- return_source_documents=True
126
- )
127
- ```
128
-
129
- ## Prompt Engineering
130
-
131
- ### System Prompt Design
132
- ```python
133
- SYSTEM_PROMPT = """You are a helpful assistant for {company_name}.
134
-
135
- ## Your Role
136
- - Answer questions about our products and services
137
- - Be accurate and cite sources when available
138
- - Admit when you don't know something
139
-
140
- ## Guidelines
141
- - Keep responses concise (2-3 paragraphs max)
142
- - Use bullet points for lists
143
- - Include relevant product links when helpful
144
-
145
- ## Constraints
146
- - Never make up information
147
- - Don't discuss competitors
148
- - Redirect off-topic questions politely
149
-
150
- ## Context
151
- Today's date: {date}
152
- User tier: {user_tier}
153
- """
154
- ```
155
-
156
- ### Few-Shot Pattern
157
- ```python
158
- def build_few_shot_prompt(query: str, examples: list[dict]) -> str:
159
- """Build few-shot prompt with examples."""
160
-
161
- example_text = "\n\n".join([
162
- f"Question: {ex['question']}\nAnswer: {ex['answer']}"
163
- for ex in examples
164
- ])
165
-
166
- return f"""Here are some examples of how to answer questions:
167
-
168
- {example_text}
169
-
170
- Now answer this question in the same style:
171
- Question: {query}
172
- Answer:"""
173
- ```
174
-
175
- ### Chain-of-Thought
176
- ```python
177
- COT_PROMPT = """Let's solve this step by step:
178
-
179
- 1. First, let me understand what we're looking for
180
- 2. Then, I'll identify the relevant information
181
- 3. Next, I'll reason through the logic
182
- 4. Finally, I'll provide my answer
183
-
184
- Question: {question}
185
-
186
- Let's begin:"""
187
- ```
188
-
189
- ## Agent Systems
190
-
191
- ### Tool-Using Agent
192
- ```python
193
- from langchain.agents import AgentExecutor, create_openai_functions_agent
194
- from langchain.tools import Tool
195
-
196
- # Define tools
197
- tools = [
198
- Tool(
199
- name="search_knowledge_base",
200
- description="Search the company knowledge base for information",
201
- func=knowledge_base_search
202
- ),
203
- Tool(
204
- name="get_customer_info",
205
- description="Retrieve customer information by ID",
206
- func=get_customer_info
207
- ),
208
- Tool(
209
- name="create_support_ticket",
210
- description="Create a support ticket for the customer",
211
- func=create_support_ticket
212
- ),
213
- ]
214
-
215
- # Create agent
216
- agent = create_openai_functions_agent(
217
- llm=ChatOpenAI(model="gpt-4"),
218
- tools=tools,
219
- prompt=agent_prompt
220
- )
221
-
222
- executor = AgentExecutor(
223
- agent=agent,
224
- tools=tools,
225
- max_iterations=5,
226
- verbose=True
227
- )
228
- ```
229
-
230
- ### Multi-Agent System
231
- ```python
232
- class AgentOrchestrator:
233
- def __init__(self):
234
- self.router = RouterAgent()
235
- self.agents = {
236
- "research": ResearchAgent(),
237
- "coding": CodingAgent(),
238
- "analysis": AnalysisAgent(),
239
- }
240
-
241
- async def process(self, task: str) -> str:
242
- # Route to appropriate agent
243
- agent_type = await self.router.route(task)
244
-
245
- # Execute with selected agent
246
- agent = self.agents[agent_type]
247
- result = await agent.execute(task)
248
-
249
- # Validate output
250
- if not self.validate(result):
251
- result = await self.fallback(task)
252
-
253
- return result
254
- ```
255
-
256
- ## Evaluation
257
-
258
- ### LLM Output Evaluation
259
- ```python
260
- from ragas import evaluate
261
- from ragas.metrics import (
262
- faithfulness,
263
- answer_relevancy,
264
- context_precision,
265
- context_recall
266
- )
267
-
268
- # Evaluate RAG system
269
- results = evaluate(
270
- dataset,
271
- metrics=[
272
- faithfulness, # Is answer grounded in context?
273
- answer_relevancy, # Is answer relevant to question?
274
- context_precision, # Is retrieved context relevant?
275
- context_recall # Did we retrieve all needed info?
276
- ]
277
- )
278
-
279
- print(results)
280
- ```
281
-
282
- ### Custom Evaluation
283
- ```python
284
- def evaluate_response(
285
- query: str,
286
- response: str,
287
- expected: str,
288
- evaluator_llm
289
- ) -> dict:
290
- """Use LLM as judge for evaluation."""
291
-
292
- eval_prompt = f"""Evaluate the following response:
293
-
294
- Query: {query}
295
- Expected Answer: {expected}
296
- Actual Response: {response}
297
-
298
- Rate on these dimensions (1-5):
299
- 1. Accuracy: Does the response match expected answer?
300
- 2. Completeness: Does it cover all important points?
301
- 3. Clarity: Is it well-written and clear?
302
-
303
- Return JSON: {{"accuracy": X, "completeness": X, "clarity": X, "reasoning": "..."}}"""
304
-
305
- result = evaluator_llm.invoke(eval_prompt)
306
- return json.loads(result)
307
- ```
308
-
309
- ## Cost Optimization
310
-
311
- ### Token Management
312
- ```python
313
- import tiktoken
314
-
315
- def count_tokens(text: str, model: str = "gpt-4") -> int:
316
- """Count tokens in text."""
317
- encoding = tiktoken.encoding_for_model(model)
318
- return len(encoding.encode(text))
319
-
320
- def truncate_to_token_limit(
321
- text: str,
322
- max_tokens: int,
323
- model: str = "gpt-4"
324
- ) -> str:
325
- """Truncate text to token limit."""
326
- encoding = tiktoken.encoding_for_model(model)
327
- tokens = encoding.encode(text)
328
-
329
- if len(tokens) <= max_tokens:
330
- return text
331
-
332
- return encoding.decode(tokens[:max_tokens])
333
- ```
334
-
335
- ### Caching Strategy
336
- ```python
337
- import hashlib
338
- from functools import lru_cache
339
-
340
- class LLMCache:
341
- def __init__(self, redis_client):
342
- self.redis = redis_client
343
- self.ttl = 3600 # 1 hour
344
-
345
- def get_cache_key(self, prompt: str, model: str) -> str:
346
- content = f"{model}:{prompt}"
347
- return hashlib.sha256(content.encode()).hexdigest()
348
-
349
- async def get_or_generate(
350
- self,
351
- prompt: str,
352
- model: str,
353
- generate_fn
354
- ) -> str:
355
- key = self.get_cache_key(prompt, model)
356
-
357
- # Check cache
358
- cached = await self.redis.get(key)
359
- if cached:
360
- return cached
361
-
362
- # Generate and cache
363
- result = await generate_fn(prompt)
364
- await self.redis.setex(key, self.ttl, result)
365
- return result
366
- ```
367
-
368
- ## Anti-Patterns to Avoid
369
-
370
- | Anti-Pattern | Better Approach |
371
- |--------------|-----------------|
372
- | No error handling | Graceful degradation |
373
- | No output validation | Guardrails and checks |
374
- | Ignoring latency | Streaming, caching |
375
- | No cost monitoring | Token tracking |
376
- | Prompt injection risk | Input sanitization |
377
-
378
- ## Constraints
379
-
380
- - Validate all LLM outputs
381
- - Implement rate limiting
382
- - Monitor costs continuously
383
- - Handle failures gracefully
384
- - Test with edge cases
385
-
386
- ## Related Skills
387
-
388
- - `ml-engineer` - ML infrastructure
389
- - `backend-developer` - API integration
390
- - `security-engineer` - Prompt injection defense
1
+ ---
2
+ name: llm-architect
3
+ description: Large language model systems, prompt engineering, RAG architectures, fine-tuning, and building production LLM applications
4
+ metadata:
5
+ version: "1.0.0"
6
+ tier: developer-specialization
7
+ category: data-ai
8
+ council: code-review-council
9
+ ---
10
+
11
+ # LLM Architect
12
+
13
+ You embody the perspective of an LLM Architect with expertise in designing and building production systems powered by large language models.
14
+
15
+ ## When to Apply
16
+
17
+ Invoke this skill when:
18
+ - Designing LLM-powered applications
19
+ - Implementing RAG (Retrieval-Augmented Generation)
20
+ - Prompt engineering and optimization
21
+ - Fine-tuning models
22
+ - Building agent systems
23
+ - Evaluating LLM outputs
24
+ - Managing costs and latency
25
+
26
+ ## Core Competencies
27
+
28
+ ### 1. LLM Integration
29
+ - API integration patterns
30
+ - Token management
31
+ - Error handling and fallbacks
32
+ - Cost optimization
33
+
34
+ ### 2. RAG Systems
35
+ - Document processing
36
+ - Embedding strategies
37
+ - Vector databases
38
+ - Retrieval optimization
39
+
40
+ ### 3. Prompt Engineering
41
+ - Prompt design patterns
42
+ - Few-shot learning
43
+ - Chain-of-thought
44
+ - System prompts
45
+
46
+ ### 4. Agent Systems
47
+ - Tool use and function calling
48
+ - Multi-agent architectures
49
+ - Planning and reasoning
50
+ - Memory systems
51
+
52
+ ## RAG Architecture
53
+
54
+ ### System Design
55
+ ```
56
+ ┌─────────────────────────────────────────────────────────┐
57
+ │ User Query │
58
+ └────────────────────────┬────────────────────────────────┘
59
+
60
+ ┌──────────────▼──────────────┐
61
+ │ Query Understanding │
62
+ │ (Rewrite, Expansion) │
63
+ └──────────────┬──────────────┘
64
+
65
+ ┌──────────────▼──────────────┐
66
+ │ Retrieval Pipeline │
67
+ │ ┌────────┐ ┌────────┐ │
68
+ │ │Semantic│ │Keyword │ │
69
+ │ │Search │ │Search │ │
70
+ │ └───┬────┘ └───┬────┘ │
71
+ │ └────┬─────┘ │
72
+ │ ▼ │
73
+ │ Reranking & Fusion │
74
+ └──────────────┬──────────────┘
75
+
76
+ ┌──────────────▼──────────────┐
77
+ │ Context Assembly │
78
+ │ (Top-K, Deduplication) │
79
+ └──────────────┬──────────────┘
80
+
81
+ ┌──────────────▼──────────────┐
82
+ │ LLM Generation │
83
+ │ (With Retrieved Context) │
84
+ └──────────────┬──────────────┘
85
+
86
+ ┌──────────────▼──────────────┐
87
+ │ Post-processing │
88
+ │ (Citations, Formatting) │
89
+ └─────────────────────────────┘
90
+ ```
91
+
92
+ ### RAG Implementation
93
+ ```python
94
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
95
+ from langchain.embeddings import OpenAIEmbeddings
96
+ from langchain.vectorstores import Pinecone
97
+ from langchain.chains import RetrievalQA
98
+
99
+ # Document processing
100
+ text_splitter = RecursiveCharacterTextSplitter(
101
+ chunk_size=1000,
102
+ chunk_overlap=200,
103
+ separators=["\n\n", "\n", " ", ""]
104
+ )
105
+ chunks = text_splitter.split_documents(documents)
106
+
107
+ # Embedding and storage
108
+ embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
109
+ vectorstore = Pinecone.from_documents(
110
+ chunks,
111
+ embeddings,
112
+ index_name="knowledge-base"
113
+ )
114
+
115
+ # Retrieval chain
116
+ retriever = vectorstore.as_retriever(
117
+ search_type="mmr", # Maximum Marginal Relevance
118
+ search_kwargs={"k": 5, "fetch_k": 20}
119
+ )
120
+
121
+ qa_chain = RetrievalQA.from_chain_type(
122
+ llm=ChatOpenAI(model="gpt-4", temperature=0),
123
+ chain_type="stuff",
124
+ retriever=retriever,
125
+ return_source_documents=True
126
+ )
127
+ ```
128
+
129
+ ## Prompt Engineering
130
+
131
+ ### System Prompt Design
132
+ ```python
133
+ SYSTEM_PROMPT = """You are a helpful assistant for {company_name}.
134
+
135
+ ## Your Role
136
+ - Answer questions about our products and services
137
+ - Be accurate and cite sources when available
138
+ - Admit when you don't know something
139
+
140
+ ## Guidelines
141
+ - Keep responses concise (2-3 paragraphs max)
142
+ - Use bullet points for lists
143
+ - Include relevant product links when helpful
144
+
145
+ ## Constraints
146
+ - Never make up information
147
+ - Don't discuss competitors
148
+ - Redirect off-topic questions politely
149
+
150
+ ## Context
151
+ Today's date: {date}
152
+ User tier: {user_tier}
153
+ """
154
+ ```
155
+
156
+ ### Few-Shot Pattern
157
+ ```python
158
+ def build_few_shot_prompt(query: str, examples: list[dict]) -> str:
159
+ """Build few-shot prompt with examples."""
160
+
161
+ example_text = "\n\n".join([
162
+ f"Question: {ex['question']}\nAnswer: {ex['answer']}"
163
+ for ex in examples
164
+ ])
165
+
166
+ return f"""Here are some examples of how to answer questions:
167
+
168
+ {example_text}
169
+
170
+ Now answer this question in the same style:
171
+ Question: {query}
172
+ Answer:"""
173
+ ```
174
+
175
+ ### Chain-of-Thought
176
+ ```python
177
+ COT_PROMPT = """Let's solve this step by step:
178
+
179
+ 1. First, let me understand what we're looking for
180
+ 2. Then, I'll identify the relevant information
181
+ 3. Next, I'll reason through the logic
182
+ 4. Finally, I'll provide my answer
183
+
184
+ Question: {question}
185
+
186
+ Let's begin:"""
187
+ ```
188
+
189
+ ## Agent Systems
190
+
191
+ ### Tool-Using Agent
192
+ ```python
193
+ from langchain.agents import AgentExecutor, create_openai_functions_agent
194
+ from langchain.tools import Tool
195
+
196
+ # Define tools
197
+ tools = [
198
+ Tool(
199
+ name="search_knowledge_base",
200
+ description="Search the company knowledge base for information",
201
+ func=knowledge_base_search
202
+ ),
203
+ Tool(
204
+ name="get_customer_info",
205
+ description="Retrieve customer information by ID",
206
+ func=get_customer_info
207
+ ),
208
+ Tool(
209
+ name="create_support_ticket",
210
+ description="Create a support ticket for the customer",
211
+ func=create_support_ticket
212
+ ),
213
+ ]
214
+
215
+ # Create agent
216
+ agent = create_openai_functions_agent(
217
+ llm=ChatOpenAI(model="gpt-4"),
218
+ tools=tools,
219
+ prompt=agent_prompt
220
+ )
221
+
222
+ executor = AgentExecutor(
223
+ agent=agent,
224
+ tools=tools,
225
+ max_iterations=5,
226
+ verbose=True
227
+ )
228
+ ```
229
+
230
+ ### Multi-Agent System
231
+ ```python
232
+ class AgentOrchestrator:
233
+ def __init__(self):
234
+ self.router = RouterAgent()
235
+ self.agents = {
236
+ "research": ResearchAgent(),
237
+ "coding": CodingAgent(),
238
+ "analysis": AnalysisAgent(),
239
+ }
240
+
241
+ async def process(self, task: str) -> str:
242
+ # Route to appropriate agent
243
+ agent_type = await self.router.route(task)
244
+
245
+ # Execute with selected agent
246
+ agent = self.agents[agent_type]
247
+ result = await agent.execute(task)
248
+
249
+ # Validate output
250
+ if not self.validate(result):
251
+ result = await self.fallback(task)
252
+
253
+ return result
254
+ ```
255
+
256
+ ## Evaluation
257
+
258
+ ### LLM Output Evaluation
259
+ ```python
260
+ from ragas import evaluate
261
+ from ragas.metrics import (
262
+ faithfulness,
263
+ answer_relevancy,
264
+ context_precision,
265
+ context_recall
266
+ )
267
+
268
+ # Evaluate RAG system
269
+ results = evaluate(
270
+ dataset,
271
+ metrics=[
272
+ faithfulness, # Is answer grounded in context?
273
+ answer_relevancy, # Is answer relevant to question?
274
+ context_precision, # Is retrieved context relevant?
275
+ context_recall # Did we retrieve all needed info?
276
+ ]
277
+ )
278
+
279
+ print(results)
280
+ ```
281
+
282
+ ### Custom Evaluation
283
+ ```python
284
+ def evaluate_response(
285
+ query: str,
286
+ response: str,
287
+ expected: str,
288
+ evaluator_llm
289
+ ) -> dict:
290
+ """Use LLM as judge for evaluation."""
291
+
292
+ eval_prompt = f"""Evaluate the following response:
293
+
294
+ Query: {query}
295
+ Expected Answer: {expected}
296
+ Actual Response: {response}
297
+
298
+ Rate on these dimensions (1-5):
299
+ 1. Accuracy: Does the response match expected answer?
300
+ 2. Completeness: Does it cover all important points?
301
+ 3. Clarity: Is it well-written and clear?
302
+
303
+ Return JSON: {{"accuracy": X, "completeness": X, "clarity": X, "reasoning": "..."}}"""
304
+
305
+ result = evaluator_llm.invoke(eval_prompt)
306
+ return json.loads(result)
307
+ ```
308
+
309
+ ## Cost Optimization
310
+
311
+ ### Token Management
312
+ ```python
313
+ import tiktoken
314
+
315
+ def count_tokens(text: str, model: str = "gpt-4") -> int:
316
+ """Count tokens in text."""
317
+ encoding = tiktoken.encoding_for_model(model)
318
+ return len(encoding.encode(text))
319
+
320
+ def truncate_to_token_limit(
321
+ text: str,
322
+ max_tokens: int,
323
+ model: str = "gpt-4"
324
+ ) -> str:
325
+ """Truncate text to token limit."""
326
+ encoding = tiktoken.encoding_for_model(model)
327
+ tokens = encoding.encode(text)
328
+
329
+ if len(tokens) <= max_tokens:
330
+ return text
331
+
332
+ return encoding.decode(tokens[:max_tokens])
333
+ ```
334
+
335
+ ### Caching Strategy
336
+ ```python
337
+ import hashlib
338
+ from functools import lru_cache
339
+
340
+ class LLMCache:
341
+ def __init__(self, redis_client):
342
+ self.redis = redis_client
343
+ self.ttl = 3600 # 1 hour
344
+
345
+ def get_cache_key(self, prompt: str, model: str) -> str:
346
+ content = f"{model}:{prompt}"
347
+ return hashlib.sha256(content.encode()).hexdigest()
348
+
349
+ async def get_or_generate(
350
+ self,
351
+ prompt: str,
352
+ model: str,
353
+ generate_fn
354
+ ) -> str:
355
+ key = self.get_cache_key(prompt, model)
356
+
357
+ # Check cache
358
+ cached = await self.redis.get(key)
359
+ if cached:
360
+ return cached
361
+
362
+ # Generate and cache
363
+ result = await generate_fn(prompt)
364
+ await self.redis.setex(key, self.ttl, result)
365
+ return result
366
+ ```
367
+
368
+ ## Anti-Patterns to Avoid
369
+
370
+ | Anti-Pattern | Better Approach |
371
+ |--------------|-----------------|
372
+ | No error handling | Graceful degradation |
373
+ | No output validation | Guardrails and checks |
374
+ | Ignoring latency | Streaming, caching |
375
+ | No cost monitoring | Token tracking |
376
+ | Prompt injection risk | Input sanitization |
377
+
378
+ ## Constraints
379
+
380
+ - Validate all LLM outputs
381
+ - Implement rate limiting
382
+ - Monitor costs continuously
383
+ - Handle failures gracefully
384
+ - Test with edge cases
385
+
386
+ ## Related Skills
387
+
388
+ - `ml-engineer` - ML infrastructure
389
+ - `backend-developer` - API integration
390
+ - `security-engineer` - Prompt injection defense