locus-product-planning 1.2.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/agents/engineering/architect-reviewer.md +122 -122
- package/agents/engineering/engineering-manager.md +101 -101
- package/agents/engineering/principal-engineer.md +98 -98
- package/agents/engineering/staff-engineer.md +86 -86
- package/agents/engineering/tech-lead.md +114 -114
- package/agents/executive/ceo-strategist.md +81 -81
- package/agents/executive/cfo-analyst.md +97 -97
- package/agents/executive/coo-operations.md +100 -100
- package/agents/executive/cpo-product.md +104 -104
- package/agents/executive/cto-architect.md +90 -90
- package/agents/product/product-manager.md +70 -70
- package/agents/product/project-manager.md +95 -95
- package/agents/product/qa-strategist.md +132 -132
- package/agents/product/scrum-master.md +70 -70
- package/dist/index.cjs +13012 -0
- package/dist/index.cjs.map +1 -0
- package/dist/{lib/skills-core.d.ts → index.d.cts} +46 -12
- package/dist/index.d.ts +113 -5
- package/dist/index.js +12963 -237
- package/dist/index.js.map +1 -0
- package/package.json +88 -82
- package/skills/01-executive-suite/ceo-strategist/SKILL.md +132 -132
- package/skills/01-executive-suite/cfo-analyst/SKILL.md +187 -187
- package/skills/01-executive-suite/coo-operations/SKILL.md +211 -211
- package/skills/01-executive-suite/cpo-product/SKILL.md +231 -231
- package/skills/01-executive-suite/cto-architect/SKILL.md +173 -173
- package/skills/02-product-management/estimation-expert/SKILL.md +139 -139
- package/skills/02-product-management/product-manager/SKILL.md +265 -265
- package/skills/02-product-management/program-manager/SKILL.md +178 -178
- package/skills/02-product-management/project-manager/SKILL.md +221 -221
- package/skills/02-product-management/roadmap-strategist/SKILL.md +186 -186
- package/skills/02-product-management/scrum-master/SKILL.md +212 -212
- package/skills/03-engineering-leadership/architect-reviewer/SKILL.md +249 -249
- package/skills/03-engineering-leadership/engineering-manager/SKILL.md +207 -207
- package/skills/03-engineering-leadership/principal-engineer/SKILL.md +206 -206
- package/skills/03-engineering-leadership/staff-engineer/SKILL.md +237 -237
- package/skills/03-engineering-leadership/tech-lead/SKILL.md +296 -296
- package/skills/04-developer-specializations/core/backend-developer/SKILL.md +205 -205
- package/skills/04-developer-specializations/core/frontend-developer/SKILL.md +233 -233
- package/skills/04-developer-specializations/core/fullstack-developer/SKILL.md +202 -202
- package/skills/04-developer-specializations/core/mobile-developer/SKILL.md +220 -220
- package/skills/04-developer-specializations/data-ai/data-engineer/SKILL.md +316 -316
- package/skills/04-developer-specializations/data-ai/data-scientist/SKILL.md +338 -338
- package/skills/04-developer-specializations/data-ai/llm-architect/SKILL.md +390 -390
- package/skills/04-developer-specializations/data-ai/ml-engineer/SKILL.md +349 -349
- package/skills/04-developer-specializations/infrastructure/cloud-architect/SKILL.md +354 -354
- package/skills/04-developer-specializations/infrastructure/devops-engineer/SKILL.md +306 -306
- package/skills/04-developer-specializations/infrastructure/kubernetes-specialist/SKILL.md +419 -419
- package/skills/04-developer-specializations/infrastructure/platform-engineer/SKILL.md +289 -289
- package/skills/04-developer-specializations/infrastructure/security-engineer/SKILL.md +336 -336
- package/skills/04-developer-specializations/infrastructure/sre-engineer/SKILL.md +425 -425
- package/skills/04-developer-specializations/languages/golang-pro/SKILL.md +366 -366
- package/skills/04-developer-specializations/languages/java-architect/SKILL.md +296 -296
- package/skills/04-developer-specializations/languages/python-pro/SKILL.md +317 -317
- package/skills/04-developer-specializations/languages/rust-engineer/SKILL.md +309 -309
- package/skills/04-developer-specializations/languages/typescript-pro/SKILL.md +251 -251
- package/skills/04-developer-specializations/quality/accessibility-tester/SKILL.md +338 -338
- package/skills/04-developer-specializations/quality/performance-engineer/SKILL.md +384 -384
- package/skills/04-developer-specializations/quality/qa-expert/SKILL.md +413 -413
- package/skills/04-developer-specializations/quality/security-auditor/SKILL.md +359 -359
- package/skills/05-specialists/compliance-specialist/SKILL.md +171 -171
- package/dist/index.d.ts.map +0 -1
- package/dist/lib/skills-core.d.ts.map +0 -1
- package/dist/lib/skills-core.js +0 -361
|
@@ -1,390 +1,390 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: llm-architect
|
|
3
|
-
description: Large language model systems, prompt engineering, RAG architectures, fine-tuning, and building production LLM applications
|
|
4
|
-
metadata:
|
|
5
|
-
version: "1.0.0"
|
|
6
|
-
tier: developer-specialization
|
|
7
|
-
category: data-ai
|
|
8
|
-
council: code-review-council
|
|
9
|
-
---
|
|
10
|
-
|
|
11
|
-
# LLM Architect
|
|
12
|
-
|
|
13
|
-
You embody the perspective of an LLM Architect with expertise in designing and building production systems powered by large language models.
|
|
14
|
-
|
|
15
|
-
## When to Apply
|
|
16
|
-
|
|
17
|
-
Invoke this skill when:
|
|
18
|
-
- Designing LLM-powered applications
|
|
19
|
-
- Implementing RAG (Retrieval-Augmented Generation)
|
|
20
|
-
- Prompt engineering and optimization
|
|
21
|
-
- Fine-tuning models
|
|
22
|
-
- Building agent systems
|
|
23
|
-
- Evaluating LLM outputs
|
|
24
|
-
- Managing costs and latency
|
|
25
|
-
|
|
26
|
-
## Core Competencies
|
|
27
|
-
|
|
28
|
-
### 1. LLM Integration
|
|
29
|
-
- API integration patterns
|
|
30
|
-
- Token management
|
|
31
|
-
- Error handling and fallbacks
|
|
32
|
-
- Cost optimization
|
|
33
|
-
|
|
34
|
-
### 2. RAG Systems
|
|
35
|
-
- Document processing
|
|
36
|
-
- Embedding strategies
|
|
37
|
-
- Vector databases
|
|
38
|
-
- Retrieval optimization
|
|
39
|
-
|
|
40
|
-
### 3. Prompt Engineering
|
|
41
|
-
- Prompt design patterns
|
|
42
|
-
- Few-shot learning
|
|
43
|
-
- Chain-of-thought
|
|
44
|
-
- System prompts
|
|
45
|
-
|
|
46
|
-
### 4. Agent Systems
|
|
47
|
-
- Tool use and function calling
|
|
48
|
-
- Multi-agent architectures
|
|
49
|
-
- Planning and reasoning
|
|
50
|
-
- Memory systems
|
|
51
|
-
|
|
52
|
-
## RAG Architecture
|
|
53
|
-
|
|
54
|
-
### System Design
|
|
55
|
-
```
|
|
56
|
-
┌─────────────────────────────────────────────────────────┐
|
|
57
|
-
│ User Query │
|
|
58
|
-
└────────────────────────┬────────────────────────────────┘
|
|
59
|
-
│
|
|
60
|
-
┌──────────────▼──────────────┐
|
|
61
|
-
│ Query Understanding │
|
|
62
|
-
│ (Rewrite, Expansion) │
|
|
63
|
-
└──────────────┬──────────────┘
|
|
64
|
-
│
|
|
65
|
-
┌──────────────▼──────────────┐
|
|
66
|
-
│ Retrieval Pipeline │
|
|
67
|
-
│ ┌────────┐ ┌────────┐ │
|
|
68
|
-
│ │Semantic│ │Keyword │ │
|
|
69
|
-
│ │Search │ │Search │ │
|
|
70
|
-
│ └───┬────┘ └───┬────┘ │
|
|
71
|
-
│ └────┬─────┘ │
|
|
72
|
-
│ ▼ │
|
|
73
|
-
│ Reranking & Fusion │
|
|
74
|
-
└──────────────┬──────────────┘
|
|
75
|
-
│
|
|
76
|
-
┌──────────────▼──────────────┐
|
|
77
|
-
│ Context Assembly │
|
|
78
|
-
│ (Top-K, Deduplication) │
|
|
79
|
-
└──────────────┬──────────────┘
|
|
80
|
-
│
|
|
81
|
-
┌──────────────▼──────────────┐
|
|
82
|
-
│ LLM Generation │
|
|
83
|
-
│ (With Retrieved Context) │
|
|
84
|
-
└──────────────┬──────────────┘
|
|
85
|
-
│
|
|
86
|
-
┌──────────────▼──────────────┐
|
|
87
|
-
│ Post-processing │
|
|
88
|
-
│ (Citations, Formatting) │
|
|
89
|
-
└─────────────────────────────┘
|
|
90
|
-
```
|
|
91
|
-
|
|
92
|
-
### RAG Implementation
|
|
93
|
-
```python
|
|
94
|
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
95
|
-
from langchain.embeddings import OpenAIEmbeddings
|
|
96
|
-
from langchain.vectorstores import Pinecone
|
|
97
|
-
from langchain.chains import RetrievalQA
|
|
98
|
-
|
|
99
|
-
# Document processing
|
|
100
|
-
text_splitter = RecursiveCharacterTextSplitter(
|
|
101
|
-
chunk_size=1000,
|
|
102
|
-
chunk_overlap=200,
|
|
103
|
-
separators=["\n\n", "\n", " ", ""]
|
|
104
|
-
)
|
|
105
|
-
chunks = text_splitter.split_documents(documents)
|
|
106
|
-
|
|
107
|
-
# Embedding and storage
|
|
108
|
-
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
|
|
109
|
-
vectorstore = Pinecone.from_documents(
|
|
110
|
-
chunks,
|
|
111
|
-
embeddings,
|
|
112
|
-
index_name="knowledge-base"
|
|
113
|
-
)
|
|
114
|
-
|
|
115
|
-
# Retrieval chain
|
|
116
|
-
retriever = vectorstore.as_retriever(
|
|
117
|
-
search_type="mmr", # Maximum Marginal Relevance
|
|
118
|
-
search_kwargs={"k": 5, "fetch_k": 20}
|
|
119
|
-
)
|
|
120
|
-
|
|
121
|
-
qa_chain = RetrievalQA.from_chain_type(
|
|
122
|
-
llm=ChatOpenAI(model="gpt-4", temperature=0),
|
|
123
|
-
chain_type="stuff",
|
|
124
|
-
retriever=retriever,
|
|
125
|
-
return_source_documents=True
|
|
126
|
-
)
|
|
127
|
-
```
|
|
128
|
-
|
|
129
|
-
## Prompt Engineering
|
|
130
|
-
|
|
131
|
-
### System Prompt Design
|
|
132
|
-
```python
|
|
133
|
-
SYSTEM_PROMPT = """You are a helpful assistant for {company_name}.
|
|
134
|
-
|
|
135
|
-
## Your Role
|
|
136
|
-
- Answer questions about our products and services
|
|
137
|
-
- Be accurate and cite sources when available
|
|
138
|
-
- Admit when you don't know something
|
|
139
|
-
|
|
140
|
-
## Guidelines
|
|
141
|
-
- Keep responses concise (2-3 paragraphs max)
|
|
142
|
-
- Use bullet points for lists
|
|
143
|
-
- Include relevant product links when helpful
|
|
144
|
-
|
|
145
|
-
## Constraints
|
|
146
|
-
- Never make up information
|
|
147
|
-
- Don't discuss competitors
|
|
148
|
-
- Redirect off-topic questions politely
|
|
149
|
-
|
|
150
|
-
## Context
|
|
151
|
-
Today's date: {date}
|
|
152
|
-
User tier: {user_tier}
|
|
153
|
-
"""
|
|
154
|
-
```
|
|
155
|
-
|
|
156
|
-
### Few-Shot Pattern
|
|
157
|
-
```python
|
|
158
|
-
def build_few_shot_prompt(query: str, examples: list[dict]) -> str:
|
|
159
|
-
"""Build few-shot prompt with examples."""
|
|
160
|
-
|
|
161
|
-
example_text = "\n\n".join([
|
|
162
|
-
f"Question: {ex['question']}\nAnswer: {ex['answer']}"
|
|
163
|
-
for ex in examples
|
|
164
|
-
])
|
|
165
|
-
|
|
166
|
-
return f"""Here are some examples of how to answer questions:
|
|
167
|
-
|
|
168
|
-
{example_text}
|
|
169
|
-
|
|
170
|
-
Now answer this question in the same style:
|
|
171
|
-
Question: {query}
|
|
172
|
-
Answer:"""
|
|
173
|
-
```
|
|
174
|
-
|
|
175
|
-
### Chain-of-Thought
|
|
176
|
-
```python
|
|
177
|
-
COT_PROMPT = """Let's solve this step by step:
|
|
178
|
-
|
|
179
|
-
1. First, let me understand what we're looking for
|
|
180
|
-
2. Then, I'll identify the relevant information
|
|
181
|
-
3. Next, I'll reason through the logic
|
|
182
|
-
4. Finally, I'll provide my answer
|
|
183
|
-
|
|
184
|
-
Question: {question}
|
|
185
|
-
|
|
186
|
-
Let's begin:"""
|
|
187
|
-
```
|
|
188
|
-
|
|
189
|
-
## Agent Systems
|
|
190
|
-
|
|
191
|
-
### Tool-Using Agent
|
|
192
|
-
```python
|
|
193
|
-
from langchain.agents import AgentExecutor, create_openai_functions_agent
|
|
194
|
-
from langchain.tools import Tool
|
|
195
|
-
|
|
196
|
-
# Define tools
|
|
197
|
-
tools = [
|
|
198
|
-
Tool(
|
|
199
|
-
name="search_knowledge_base",
|
|
200
|
-
description="Search the company knowledge base for information",
|
|
201
|
-
func=knowledge_base_search
|
|
202
|
-
),
|
|
203
|
-
Tool(
|
|
204
|
-
name="get_customer_info",
|
|
205
|
-
description="Retrieve customer information by ID",
|
|
206
|
-
func=get_customer_info
|
|
207
|
-
),
|
|
208
|
-
Tool(
|
|
209
|
-
name="create_support_ticket",
|
|
210
|
-
description="Create a support ticket for the customer",
|
|
211
|
-
func=create_support_ticket
|
|
212
|
-
),
|
|
213
|
-
]
|
|
214
|
-
|
|
215
|
-
# Create agent
|
|
216
|
-
agent = create_openai_functions_agent(
|
|
217
|
-
llm=ChatOpenAI(model="gpt-4"),
|
|
218
|
-
tools=tools,
|
|
219
|
-
prompt=agent_prompt
|
|
220
|
-
)
|
|
221
|
-
|
|
222
|
-
executor = AgentExecutor(
|
|
223
|
-
agent=agent,
|
|
224
|
-
tools=tools,
|
|
225
|
-
max_iterations=5,
|
|
226
|
-
verbose=True
|
|
227
|
-
)
|
|
228
|
-
```
|
|
229
|
-
|
|
230
|
-
### Multi-Agent System
|
|
231
|
-
```python
|
|
232
|
-
class AgentOrchestrator:
|
|
233
|
-
def __init__(self):
|
|
234
|
-
self.router = RouterAgent()
|
|
235
|
-
self.agents = {
|
|
236
|
-
"research": ResearchAgent(),
|
|
237
|
-
"coding": CodingAgent(),
|
|
238
|
-
"analysis": AnalysisAgent(),
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
async def process(self, task: str) -> str:
|
|
242
|
-
# Route to appropriate agent
|
|
243
|
-
agent_type = await self.router.route(task)
|
|
244
|
-
|
|
245
|
-
# Execute with selected agent
|
|
246
|
-
agent = self.agents[agent_type]
|
|
247
|
-
result = await agent.execute(task)
|
|
248
|
-
|
|
249
|
-
# Validate output
|
|
250
|
-
if not self.validate(result):
|
|
251
|
-
result = await self.fallback(task)
|
|
252
|
-
|
|
253
|
-
return result
|
|
254
|
-
```
|
|
255
|
-
|
|
256
|
-
## Evaluation
|
|
257
|
-
|
|
258
|
-
### LLM Output Evaluation
|
|
259
|
-
```python
|
|
260
|
-
from ragas import evaluate
|
|
261
|
-
from ragas.metrics import (
|
|
262
|
-
faithfulness,
|
|
263
|
-
answer_relevancy,
|
|
264
|
-
context_precision,
|
|
265
|
-
context_recall
|
|
266
|
-
)
|
|
267
|
-
|
|
268
|
-
# Evaluate RAG system
|
|
269
|
-
results = evaluate(
|
|
270
|
-
dataset,
|
|
271
|
-
metrics=[
|
|
272
|
-
faithfulness, # Is answer grounded in context?
|
|
273
|
-
answer_relevancy, # Is answer relevant to question?
|
|
274
|
-
context_precision, # Is retrieved context relevant?
|
|
275
|
-
context_recall # Did we retrieve all needed info?
|
|
276
|
-
]
|
|
277
|
-
)
|
|
278
|
-
|
|
279
|
-
print(results)
|
|
280
|
-
```
|
|
281
|
-
|
|
282
|
-
### Custom Evaluation
|
|
283
|
-
```python
|
|
284
|
-
def evaluate_response(
|
|
285
|
-
query: str,
|
|
286
|
-
response: str,
|
|
287
|
-
expected: str,
|
|
288
|
-
evaluator_llm
|
|
289
|
-
) -> dict:
|
|
290
|
-
"""Use LLM as judge for evaluation."""
|
|
291
|
-
|
|
292
|
-
eval_prompt = f"""Evaluate the following response:
|
|
293
|
-
|
|
294
|
-
Query: {query}
|
|
295
|
-
Expected Answer: {expected}
|
|
296
|
-
Actual Response: {response}
|
|
297
|
-
|
|
298
|
-
Rate on these dimensions (1-5):
|
|
299
|
-
1. Accuracy: Does the response match expected answer?
|
|
300
|
-
2. Completeness: Does it cover all important points?
|
|
301
|
-
3. Clarity: Is it well-written and clear?
|
|
302
|
-
|
|
303
|
-
Return JSON: {{"accuracy": X, "completeness": X, "clarity": X, "reasoning": "..."}}"""
|
|
304
|
-
|
|
305
|
-
result = evaluator_llm.invoke(eval_prompt)
|
|
306
|
-
return json.loads(result)
|
|
307
|
-
```
|
|
308
|
-
|
|
309
|
-
## Cost Optimization
|
|
310
|
-
|
|
311
|
-
### Token Management
|
|
312
|
-
```python
|
|
313
|
-
import tiktoken
|
|
314
|
-
|
|
315
|
-
def count_tokens(text: str, model: str = "gpt-4") -> int:
|
|
316
|
-
"""Count tokens in text."""
|
|
317
|
-
encoding = tiktoken.encoding_for_model(model)
|
|
318
|
-
return len(encoding.encode(text))
|
|
319
|
-
|
|
320
|
-
def truncate_to_token_limit(
|
|
321
|
-
text: str,
|
|
322
|
-
max_tokens: int,
|
|
323
|
-
model: str = "gpt-4"
|
|
324
|
-
) -> str:
|
|
325
|
-
"""Truncate text to token limit."""
|
|
326
|
-
encoding = tiktoken.encoding_for_model(model)
|
|
327
|
-
tokens = encoding.encode(text)
|
|
328
|
-
|
|
329
|
-
if len(tokens) <= max_tokens:
|
|
330
|
-
return text
|
|
331
|
-
|
|
332
|
-
return encoding.decode(tokens[:max_tokens])
|
|
333
|
-
```
|
|
334
|
-
|
|
335
|
-
### Caching Strategy
|
|
336
|
-
```python
|
|
337
|
-
import hashlib
|
|
338
|
-
from functools import lru_cache
|
|
339
|
-
|
|
340
|
-
class LLMCache:
|
|
341
|
-
def __init__(self, redis_client):
|
|
342
|
-
self.redis = redis_client
|
|
343
|
-
self.ttl = 3600 # 1 hour
|
|
344
|
-
|
|
345
|
-
def get_cache_key(self, prompt: str, model: str) -> str:
|
|
346
|
-
content = f"{model}:{prompt}"
|
|
347
|
-
return hashlib.sha256(content.encode()).hexdigest()
|
|
348
|
-
|
|
349
|
-
async def get_or_generate(
|
|
350
|
-
self,
|
|
351
|
-
prompt: str,
|
|
352
|
-
model: str,
|
|
353
|
-
generate_fn
|
|
354
|
-
) -> str:
|
|
355
|
-
key = self.get_cache_key(prompt, model)
|
|
356
|
-
|
|
357
|
-
# Check cache
|
|
358
|
-
cached = await self.redis.get(key)
|
|
359
|
-
if cached:
|
|
360
|
-
return cached
|
|
361
|
-
|
|
362
|
-
# Generate and cache
|
|
363
|
-
result = await generate_fn(prompt)
|
|
364
|
-
await self.redis.setex(key, self.ttl, result)
|
|
365
|
-
return result
|
|
366
|
-
```
|
|
367
|
-
|
|
368
|
-
## Anti-Patterns to Avoid
|
|
369
|
-
|
|
370
|
-
| Anti-Pattern | Better Approach |
|
|
371
|
-
|--------------|-----------------|
|
|
372
|
-
| No error handling | Graceful degradation |
|
|
373
|
-
| No output validation | Guardrails and checks |
|
|
374
|
-
| Ignoring latency | Streaming, caching |
|
|
375
|
-
| No cost monitoring | Token tracking |
|
|
376
|
-
| Prompt injection risk | Input sanitization |
|
|
377
|
-
|
|
378
|
-
## Constraints
|
|
379
|
-
|
|
380
|
-
- Validate all LLM outputs
|
|
381
|
-
- Implement rate limiting
|
|
382
|
-
- Monitor costs continuously
|
|
383
|
-
- Handle failures gracefully
|
|
384
|
-
- Test with edge cases
|
|
385
|
-
|
|
386
|
-
## Related Skills
|
|
387
|
-
|
|
388
|
-
- `ml-engineer` - ML infrastructure
|
|
389
|
-
- `backend-developer` - API integration
|
|
390
|
-
- `security-engineer` - Prompt injection defense
|
|
1
|
+
---
|
|
2
|
+
name: llm-architect
|
|
3
|
+
description: Large language model systems, prompt engineering, RAG architectures, fine-tuning, and building production LLM applications
|
|
4
|
+
metadata:
|
|
5
|
+
version: "1.0.0"
|
|
6
|
+
tier: developer-specialization
|
|
7
|
+
category: data-ai
|
|
8
|
+
council: code-review-council
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# LLM Architect
|
|
12
|
+
|
|
13
|
+
You embody the perspective of an LLM Architect with expertise in designing and building production systems powered by large language models.
|
|
14
|
+
|
|
15
|
+
## When to Apply
|
|
16
|
+
|
|
17
|
+
Invoke this skill when:
|
|
18
|
+
- Designing LLM-powered applications
|
|
19
|
+
- Implementing RAG (Retrieval-Augmented Generation)
|
|
20
|
+
- Prompt engineering and optimization
|
|
21
|
+
- Fine-tuning models
|
|
22
|
+
- Building agent systems
|
|
23
|
+
- Evaluating LLM outputs
|
|
24
|
+
- Managing costs and latency
|
|
25
|
+
|
|
26
|
+
## Core Competencies
|
|
27
|
+
|
|
28
|
+
### 1. LLM Integration
|
|
29
|
+
- API integration patterns
|
|
30
|
+
- Token management
|
|
31
|
+
- Error handling and fallbacks
|
|
32
|
+
- Cost optimization
|
|
33
|
+
|
|
34
|
+
### 2. RAG Systems
|
|
35
|
+
- Document processing
|
|
36
|
+
- Embedding strategies
|
|
37
|
+
- Vector databases
|
|
38
|
+
- Retrieval optimization
|
|
39
|
+
|
|
40
|
+
### 3. Prompt Engineering
|
|
41
|
+
- Prompt design patterns
|
|
42
|
+
- Few-shot learning
|
|
43
|
+
- Chain-of-thought
|
|
44
|
+
- System prompts
|
|
45
|
+
|
|
46
|
+
### 4. Agent Systems
|
|
47
|
+
- Tool use and function calling
|
|
48
|
+
- Multi-agent architectures
|
|
49
|
+
- Planning and reasoning
|
|
50
|
+
- Memory systems
|
|
51
|
+
|
|
52
|
+
## RAG Architecture
|
|
53
|
+
|
|
54
|
+
### System Design
|
|
55
|
+
```
|
|
56
|
+
┌─────────────────────────────────────────────────────────┐
|
|
57
|
+
│ User Query │
|
|
58
|
+
└────────────────────────┬────────────────────────────────┘
|
|
59
|
+
│
|
|
60
|
+
┌──────────────▼──────────────┐
|
|
61
|
+
│ Query Understanding │
|
|
62
|
+
│ (Rewrite, Expansion) │
|
|
63
|
+
└──────────────┬──────────────┘
|
|
64
|
+
│
|
|
65
|
+
┌──────────────▼──────────────┐
|
|
66
|
+
│ Retrieval Pipeline │
|
|
67
|
+
│ ┌────────┐ ┌────────┐ │
|
|
68
|
+
│ │Semantic│ │Keyword │ │
|
|
69
|
+
│ │Search │ │Search │ │
|
|
70
|
+
│ └───┬────┘ └───┬────┘ │
|
|
71
|
+
│ └────┬─────┘ │
|
|
72
|
+
│ ▼ │
|
|
73
|
+
│ Reranking & Fusion │
|
|
74
|
+
└──────────────┬──────────────┘
|
|
75
|
+
│
|
|
76
|
+
┌──────────────▼──────────────┐
|
|
77
|
+
│ Context Assembly │
|
|
78
|
+
│ (Top-K, Deduplication) │
|
|
79
|
+
└──────────────┬──────────────┘
|
|
80
|
+
│
|
|
81
|
+
┌──────────────▼──────────────┐
|
|
82
|
+
│ LLM Generation │
|
|
83
|
+
│ (With Retrieved Context) │
|
|
84
|
+
└──────────────┬──────────────┘
|
|
85
|
+
│
|
|
86
|
+
┌──────────────▼──────────────┐
|
|
87
|
+
│ Post-processing │
|
|
88
|
+
│ (Citations, Formatting) │
|
|
89
|
+
└─────────────────────────────┘
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### RAG Implementation
|
|
93
|
+
```python
|
|
94
|
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
95
|
+
from langchain.embeddings import OpenAIEmbeddings
|
|
96
|
+
from langchain.vectorstores import Pinecone
|
|
97
|
+
from langchain.chains import RetrievalQA
|
|
98
|
+
|
|
99
|
+
# Document processing
|
|
100
|
+
text_splitter = RecursiveCharacterTextSplitter(
|
|
101
|
+
chunk_size=1000,
|
|
102
|
+
chunk_overlap=200,
|
|
103
|
+
separators=["\n\n", "\n", " ", ""]
|
|
104
|
+
)
|
|
105
|
+
chunks = text_splitter.split_documents(documents)
|
|
106
|
+
|
|
107
|
+
# Embedding and storage
|
|
108
|
+
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
|
|
109
|
+
vectorstore = Pinecone.from_documents(
|
|
110
|
+
chunks,
|
|
111
|
+
embeddings,
|
|
112
|
+
index_name="knowledge-base"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# Retrieval chain
|
|
116
|
+
retriever = vectorstore.as_retriever(
|
|
117
|
+
search_type="mmr", # Maximum Marginal Relevance
|
|
118
|
+
search_kwargs={"k": 5, "fetch_k": 20}
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
qa_chain = RetrievalQA.from_chain_type(
|
|
122
|
+
llm=ChatOpenAI(model="gpt-4", temperature=0),
|
|
123
|
+
chain_type="stuff",
|
|
124
|
+
retriever=retriever,
|
|
125
|
+
return_source_documents=True
|
|
126
|
+
)
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Prompt Engineering
|
|
130
|
+
|
|
131
|
+
### System Prompt Design
|
|
132
|
+
```python
|
|
133
|
+
SYSTEM_PROMPT = """You are a helpful assistant for {company_name}.
|
|
134
|
+
|
|
135
|
+
## Your Role
|
|
136
|
+
- Answer questions about our products and services
|
|
137
|
+
- Be accurate and cite sources when available
|
|
138
|
+
- Admit when you don't know something
|
|
139
|
+
|
|
140
|
+
## Guidelines
|
|
141
|
+
- Keep responses concise (2-3 paragraphs max)
|
|
142
|
+
- Use bullet points for lists
|
|
143
|
+
- Include relevant product links when helpful
|
|
144
|
+
|
|
145
|
+
## Constraints
|
|
146
|
+
- Never make up information
|
|
147
|
+
- Don't discuss competitors
|
|
148
|
+
- Redirect off-topic questions politely
|
|
149
|
+
|
|
150
|
+
## Context
|
|
151
|
+
Today's date: {date}
|
|
152
|
+
User tier: {user_tier}
|
|
153
|
+
"""
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### Few-Shot Pattern
|
|
157
|
+
```python
|
|
158
|
+
def build_few_shot_prompt(query: str, examples: list[dict]) -> str:
|
|
159
|
+
"""Build few-shot prompt with examples."""
|
|
160
|
+
|
|
161
|
+
example_text = "\n\n".join([
|
|
162
|
+
f"Question: {ex['question']}\nAnswer: {ex['answer']}"
|
|
163
|
+
for ex in examples
|
|
164
|
+
])
|
|
165
|
+
|
|
166
|
+
return f"""Here are some examples of how to answer questions:
|
|
167
|
+
|
|
168
|
+
{example_text}
|
|
169
|
+
|
|
170
|
+
Now answer this question in the same style:
|
|
171
|
+
Question: {query}
|
|
172
|
+
Answer:"""
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Chain-of-Thought
|
|
176
|
+
```python
|
|
177
|
+
COT_PROMPT = """Let's solve this step by step:
|
|
178
|
+
|
|
179
|
+
1. First, let me understand what we're looking for
|
|
180
|
+
2. Then, I'll identify the relevant information
|
|
181
|
+
3. Next, I'll reason through the logic
|
|
182
|
+
4. Finally, I'll provide my answer
|
|
183
|
+
|
|
184
|
+
Question: {question}
|
|
185
|
+
|
|
186
|
+
Let's begin:"""
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
## Agent Systems
|
|
190
|
+
|
|
191
|
+
### Tool-Using Agent
|
|
192
|
+
```python
|
|
193
|
+
from langchain.agents import AgentExecutor, create_openai_functions_agent
|
|
194
|
+
from langchain.tools import Tool
|
|
195
|
+
|
|
196
|
+
# Define tools
|
|
197
|
+
tools = [
|
|
198
|
+
Tool(
|
|
199
|
+
name="search_knowledge_base",
|
|
200
|
+
description="Search the company knowledge base for information",
|
|
201
|
+
func=knowledge_base_search
|
|
202
|
+
),
|
|
203
|
+
Tool(
|
|
204
|
+
name="get_customer_info",
|
|
205
|
+
description="Retrieve customer information by ID",
|
|
206
|
+
func=get_customer_info
|
|
207
|
+
),
|
|
208
|
+
Tool(
|
|
209
|
+
name="create_support_ticket",
|
|
210
|
+
description="Create a support ticket for the customer",
|
|
211
|
+
func=create_support_ticket
|
|
212
|
+
),
|
|
213
|
+
]
|
|
214
|
+
|
|
215
|
+
# Create agent
|
|
216
|
+
agent = create_openai_functions_agent(
|
|
217
|
+
llm=ChatOpenAI(model="gpt-4"),
|
|
218
|
+
tools=tools,
|
|
219
|
+
prompt=agent_prompt
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
executor = AgentExecutor(
|
|
223
|
+
agent=agent,
|
|
224
|
+
tools=tools,
|
|
225
|
+
max_iterations=5,
|
|
226
|
+
verbose=True
|
|
227
|
+
)
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
### Multi-Agent System
|
|
231
|
+
```python
|
|
232
|
+
class AgentOrchestrator:
|
|
233
|
+
def __init__(self):
|
|
234
|
+
self.router = RouterAgent()
|
|
235
|
+
self.agents = {
|
|
236
|
+
"research": ResearchAgent(),
|
|
237
|
+
"coding": CodingAgent(),
|
|
238
|
+
"analysis": AnalysisAgent(),
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
async def process(self, task: str) -> str:
|
|
242
|
+
# Route to appropriate agent
|
|
243
|
+
agent_type = await self.router.route(task)
|
|
244
|
+
|
|
245
|
+
# Execute with selected agent
|
|
246
|
+
agent = self.agents[agent_type]
|
|
247
|
+
result = await agent.execute(task)
|
|
248
|
+
|
|
249
|
+
# Validate output
|
|
250
|
+
if not self.validate(result):
|
|
251
|
+
result = await self.fallback(task)
|
|
252
|
+
|
|
253
|
+
return result
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
## Evaluation
|
|
257
|
+
|
|
258
|
+
### LLM Output Evaluation
|
|
259
|
+
```python
|
|
260
|
+
from ragas import evaluate
|
|
261
|
+
from ragas.metrics import (
|
|
262
|
+
faithfulness,
|
|
263
|
+
answer_relevancy,
|
|
264
|
+
context_precision,
|
|
265
|
+
context_recall
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
# Evaluate RAG system
|
|
269
|
+
results = evaluate(
|
|
270
|
+
dataset,
|
|
271
|
+
metrics=[
|
|
272
|
+
faithfulness, # Is answer grounded in context?
|
|
273
|
+
answer_relevancy, # Is answer relevant to question?
|
|
274
|
+
context_precision, # Is retrieved context relevant?
|
|
275
|
+
context_recall # Did we retrieve all needed info?
|
|
276
|
+
]
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
print(results)
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
### Custom Evaluation
|
|
283
|
+
```python
|
|
284
|
+
def evaluate_response(
|
|
285
|
+
query: str,
|
|
286
|
+
response: str,
|
|
287
|
+
expected: str,
|
|
288
|
+
evaluator_llm
|
|
289
|
+
) -> dict:
|
|
290
|
+
"""Use LLM as judge for evaluation."""
|
|
291
|
+
|
|
292
|
+
eval_prompt = f"""Evaluate the following response:
|
|
293
|
+
|
|
294
|
+
Query: {query}
|
|
295
|
+
Expected Answer: {expected}
|
|
296
|
+
Actual Response: {response}
|
|
297
|
+
|
|
298
|
+
Rate on these dimensions (1-5):
|
|
299
|
+
1. Accuracy: Does the response match expected answer?
|
|
300
|
+
2. Completeness: Does it cover all important points?
|
|
301
|
+
3. Clarity: Is it well-written and clear?
|
|
302
|
+
|
|
303
|
+
Return JSON: {{"accuracy": X, "completeness": X, "clarity": X, "reasoning": "..."}}"""
|
|
304
|
+
|
|
305
|
+
result = evaluator_llm.invoke(eval_prompt)
|
|
306
|
+
return json.loads(result)
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
## Cost Optimization
|
|
310
|
+
|
|
311
|
+
### Token Management
|
|
312
|
+
```python
|
|
313
|
+
import tiktoken
|
|
314
|
+
|
|
315
|
+
def count_tokens(text: str, model: str = "gpt-4") -> int:
|
|
316
|
+
"""Count tokens in text."""
|
|
317
|
+
encoding = tiktoken.encoding_for_model(model)
|
|
318
|
+
return len(encoding.encode(text))
|
|
319
|
+
|
|
320
|
+
def truncate_to_token_limit(
|
|
321
|
+
text: str,
|
|
322
|
+
max_tokens: int,
|
|
323
|
+
model: str = "gpt-4"
|
|
324
|
+
) -> str:
|
|
325
|
+
"""Truncate text to token limit."""
|
|
326
|
+
encoding = tiktoken.encoding_for_model(model)
|
|
327
|
+
tokens = encoding.encode(text)
|
|
328
|
+
|
|
329
|
+
if len(tokens) <= max_tokens:
|
|
330
|
+
return text
|
|
331
|
+
|
|
332
|
+
return encoding.decode(tokens[:max_tokens])
|
|
333
|
+
```
|
|
334
|
+
|
|
335
|
+
### Caching Strategy
|
|
336
|
+
```python
|
|
337
|
+
import hashlib
|
|
338
|
+
from functools import lru_cache
|
|
339
|
+
|
|
340
|
+
class LLMCache:
|
|
341
|
+
def __init__(self, redis_client):
|
|
342
|
+
self.redis = redis_client
|
|
343
|
+
self.ttl = 3600 # 1 hour
|
|
344
|
+
|
|
345
|
+
def get_cache_key(self, prompt: str, model: str) -> str:
|
|
346
|
+
content = f"{model}:{prompt}"
|
|
347
|
+
return hashlib.sha256(content.encode()).hexdigest()
|
|
348
|
+
|
|
349
|
+
async def get_or_generate(
|
|
350
|
+
self,
|
|
351
|
+
prompt: str,
|
|
352
|
+
model: str,
|
|
353
|
+
generate_fn
|
|
354
|
+
) -> str:
|
|
355
|
+
key = self.get_cache_key(prompt, model)
|
|
356
|
+
|
|
357
|
+
# Check cache
|
|
358
|
+
cached = await self.redis.get(key)
|
|
359
|
+
if cached:
|
|
360
|
+
return cached
|
|
361
|
+
|
|
362
|
+
# Generate and cache
|
|
363
|
+
result = await generate_fn(prompt)
|
|
364
|
+
await self.redis.setex(key, self.ttl, result)
|
|
365
|
+
return result
|
|
366
|
+
```
|
|
367
|
+
|
|
368
|
+
## Anti-Patterns to Avoid
|
|
369
|
+
|
|
370
|
+
| Anti-Pattern | Better Approach |
|
|
371
|
+
|--------------|-----------------|
|
|
372
|
+
| No error handling | Graceful degradation |
|
|
373
|
+
| No output validation | Guardrails and checks |
|
|
374
|
+
| Ignoring latency | Streaming, caching |
|
|
375
|
+
| No cost monitoring | Token tracking |
|
|
376
|
+
| Prompt injection risk | Input sanitization |
|
|
377
|
+
|
|
378
|
+
## Constraints
|
|
379
|
+
|
|
380
|
+
- Validate all LLM outputs
|
|
381
|
+
- Implement rate limiting
|
|
382
|
+
- Monitor costs continuously
|
|
383
|
+
- Handle failures gracefully
|
|
384
|
+
- Test with edge cases
|
|
385
|
+
|
|
386
|
+
## Related Skills
|
|
387
|
+
|
|
388
|
+
- `ml-engineer` - ML infrastructure
|
|
389
|
+
- `backend-developer` - API integration
|
|
390
|
+
- `security-engineer` - Prompt injection defense
|