@namch/agent-assistant 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +114 -522
- package/agents/backend-engineer.md +0 -8
- package/agents/brainstormer.md +0 -6
- package/agents/business-analyst.md +0 -5
- package/agents/database-architect.md +0 -6
- package/agents/debugger.md +0 -6
- package/agents/designer.md +0 -5
- package/agents/devops-engineer.md +0 -7
- package/agents/docs-manager.md +0 -6
- package/agents/frontend-engineer.md +0 -7
- package/agents/game-engineer.md +0 -7
- package/agents/mobile-engineer.md +0 -7
- package/agents/performance-engineer.md +0 -7
- package/agents/planner.md +0 -6
- package/agents/project-manager.md +0 -6
- package/agents/researcher.md +0 -5
- package/agents/reviewer.md +0 -6
- package/agents/scouter.md +0 -6
- package/agents/security-engineer.md +0 -7
- package/agents/tech-lead.md +0 -7
- package/agents/tester.md +0 -5
- package/cli/README.md +19 -10
- package/documents/business/business-features.md +1 -1
- package/documents/business/business-prd.md +4 -4
- package/documents/knowledge-architecture.md +1 -1
- package/documents/knowledge-domain.md +1 -1
- package/documents/knowledge-overview.md +14 -29
- package/documents/knowledge-source-base.md +14 -14
- package/package.json +1 -1
- package/rules/QUICK-REFERENCE.md +4 -1
- package/rules/SKILL-DISCOVERY.md +37 -14
- package/skills/active-directory-attacks/SKILL.md +383 -0
- package/skills/active-directory-attacks/references/advanced-attacks.md +382 -0
- package/skills/agent-evaluation/SKILL.md +64 -0
- package/skills/agent-memory-mcp/SKILL.md +82 -0
- package/skills/agent-memory-systems/SKILL.md +67 -0
- package/skills/agent-tool-builder/SKILL.md +53 -0
- package/skills/ai-agents-architect/SKILL.md +90 -0
- package/skills/ai-product/SKILL.md +54 -0
- package/skills/ai-wrapper-product/SKILL.md +273 -0
- package/skills/api-documentation-generator/SKILL.md +484 -0
- package/skills/api-fuzzing-bug-bounty/SKILL.md +433 -0
- package/skills/api-security-best-practices/SKILL.md +907 -0
- package/skills/autonomous-agent-patterns/SKILL.md +761 -0
- package/skills/autonomous-agents/SKILL.md +68 -0
- package/skills/aws-penetration-testing/SKILL.md +405 -0
- package/skills/aws-penetration-testing/references/advanced-aws-pentesting.md +469 -0
- package/skills/azure-functions/SKILL.md +42 -0
- package/skills/backend-dev-guidelines/SKILL.md +342 -0
- package/skills/backend-dev-guidelines/resources/architecture-overview.md +451 -0
- package/skills/backend-dev-guidelines/resources/async-and-errors.md +307 -0
- package/skills/backend-dev-guidelines/resources/complete-examples.md +638 -0
- package/skills/backend-dev-guidelines/resources/configuration.md +275 -0
- package/skills/backend-dev-guidelines/resources/database-patterns.md +224 -0
- package/skills/backend-dev-guidelines/resources/middleware-guide.md +213 -0
- package/skills/backend-dev-guidelines/resources/routing-and-controllers.md +756 -0
- package/skills/backend-dev-guidelines/resources/sentry-and-monitoring.md +336 -0
- package/skills/backend-dev-guidelines/resources/services-and-repositories.md +789 -0
- package/skills/backend-dev-guidelines/resources/testing-guide.md +235 -0
- package/skills/backend-dev-guidelines/resources/validation-patterns.md +754 -0
- package/skills/broken-authentication/SKILL.md +476 -0
- package/skills/bullmq-specialist/SKILL.md +57 -0
- package/skills/bun-development/SKILL.md +691 -0
- package/skills/burp-suite-testing/SKILL.md +380 -0
- package/skills/cloud-penetration-testing/SKILL.md +501 -0
- package/skills/cloud-penetration-testing/references/advanced-cloud-scripts.md +318 -0
- package/skills/computer-use-agents/SKILL.md +315 -0
- package/skills/content-creator/SKILL.md +248 -0
- package/skills/content-creator/assets/content_calendar_template.md +99 -0
- package/skills/content-creator/references/brand_guidelines.md +199 -0
- package/skills/content-creator/references/content_frameworks.md +534 -0
- package/skills/content-creator/references/social_media_optimization.md +317 -0
- package/skills/content-creator/scripts/brand_voice_analyzer.py +185 -0
- package/skills/content-creator/scripts/seo_optimizer.py +419 -0
- package/skills/context-window-management/SKILL.md +53 -0
- package/skills/conversation-memory/SKILL.md +61 -0
- package/skills/copy-editing/SKILL.md +439 -0
- package/skills/copywriting/SKILL.md +225 -0
- package/skills/crewai/SKILL.md +243 -0
- package/skills/discord-bot-architect/SKILL.md +277 -0
- package/skills/dispatching-parallel-agents/SKILL.md +180 -0
- package/skills/email-sequence/SKILL.md +925 -0
- package/skills/email-systems/SKILL.md +54 -0
- package/skills/ethical-hacking-methodology/SKILL.md +466 -0
- package/skills/executing-plans/SKILL.md +76 -0
- package/skills/file-path-traversal/SKILL.md +486 -0
- package/skills/finishing-a-development-branch/SKILL.md +200 -0
- package/skills/frontend-dev-guidelines/SKILL.md +359 -0
- package/skills/frontend-dev-guidelines/resources/common-patterns.md +331 -0
- package/skills/frontend-dev-guidelines/resources/complete-examples.md +872 -0
- package/skills/frontend-dev-guidelines/resources/component-patterns.md +502 -0
- package/skills/frontend-dev-guidelines/resources/data-fetching.md +767 -0
- package/skills/frontend-dev-guidelines/resources/file-organization.md +502 -0
- package/skills/frontend-dev-guidelines/resources/loading-and-error-states.md +501 -0
- package/skills/frontend-dev-guidelines/resources/performance.md +406 -0
- package/skills/frontend-dev-guidelines/resources/routing-guide.md +364 -0
- package/skills/frontend-dev-guidelines/resources/styling-guide.md +428 -0
- package/skills/frontend-dev-guidelines/resources/typescript-standards.md +418 -0
- package/skills/gcp-cloud-run/SKILL.md +288 -0
- package/skills/git-pushing/SKILL.md +33 -0
- package/skills/git-pushing/scripts/smart_commit.sh +19 -0
- package/skills/github-workflow-automation/SKILL.md +846 -0
- package/skills/html-injection-testing/SKILL.md +498 -0
- package/skills/idor-testing/SKILL.md +442 -0
- package/skills/inngest/SKILL.md +55 -0
- package/skills/javascript-mastery/SKILL.md +645 -0
- package/skills/kaizen/SKILL.md +730 -0
- package/skills/langfuse/SKILL.md +238 -0
- package/skills/langgraph/SKILL.md +287 -0
- package/skills/linux-privilege-escalation/SKILL.md +504 -0
- package/skills/llm-app-patterns/SKILL.md +760 -0
- package/skills/metasploit-framework/SKILL.md +478 -0
- package/skills/multi-agent-brainstorming/SKILL.md +256 -0
- package/skills/neon-postgres/SKILL.md +56 -0
- package/skills/nextjs-supabase-auth/SKILL.md +56 -0
- package/skills/nosql-expert/SKILL.md +111 -0
- package/skills/pentest-checklist/SKILL.md +334 -0
- package/skills/pentest-commands/SKILL.md +438 -0
- package/skills/plaid-fintech/SKILL.md +50 -0
- package/skills/planning-with-files/SKILL.md +211 -0
- package/skills/planning-with-files/examples.md +202 -0
- package/skills/planning-with-files/reference.md +218 -0
- package/skills/planning-with-files/scripts/check-complete.sh +44 -0
- package/skills/planning-with-files/scripts/init-session.sh +120 -0
- package/skills/planning-with-files/templates/findings.md +95 -0
- package/skills/planning-with-files/templates/progress.md +114 -0
- package/skills/planning-with-files/templates/task_plan.md +132 -0
- package/skills/privilege-escalation-methods/SKILL.md +333 -0
- package/skills/production-code-audit/SKILL.md +540 -0
- package/skills/prompt-caching/SKILL.md +61 -0
- package/skills/prompt-engineering/SKILL.md +171 -0
- package/skills/prompt-library/SKILL.md +322 -0
- package/skills/rag-engineer/SKILL.md +90 -0
- package/skills/rag-implementation/SKILL.md +63 -0
- package/skills/react-ui-patterns/SKILL.md +289 -0
- package/skills/red-team-tools/SKILL.md +310 -0
- package/skills/scanning-tools/SKILL.md +589 -0
- package/skills/shodan-reconnaissance/SKILL.md +503 -0
- package/skills/slack-bot-builder/SKILL.md +264 -0
- package/skills/smtp-penetration-testing/SKILL.md +500 -0
- package/skills/social-content/SKILL.md +807 -0
- package/skills/software-architecture/SKILL.md +75 -0
- package/skills/sql-injection-testing/SKILL.md +448 -0
- package/skills/sqlmap-database-pentesting/SKILL.md +400 -0
- package/skills/ssh-penetration-testing/SKILL.md +488 -0
- package/skills/stripe-integration/SKILL.md +69 -0
- package/skills/subagent-driven-development/SKILL.md +240 -0
- package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +20 -0
- package/skills/subagent-driven-development/implementer-prompt.md +78 -0
- package/skills/subagent-driven-development/spec-reviewer-prompt.md +61 -0
- package/skills/tavily-web/SKILL.md +36 -0
- package/skills/telegram-bot-builder/SKILL.md +254 -0
- package/skills/test-driven-development/SKILL.md +371 -0
- package/skills/test-driven-development/testing-anti-patterns.md +299 -0
- package/skills/test-fixing/SKILL.md +119 -0
- package/skills/top-web-vulnerabilities/SKILL.md +543 -0
- package/skills/trigger-dev/SKILL.md +67 -0
- package/skills/twilio-communications/SKILL.md +295 -0
- package/skills/upstash-qstash/SKILL.md +68 -0
- package/skills/verification-before-completion/SKILL.md +139 -0
- package/skills/voice-agents/SKILL.md +68 -0
- package/skills/voice-ai-development/SKILL.md +302 -0
- package/skills/windows-privilege-escalation/SKILL.md +496 -0
- package/skills/wireshark-analysis/SKILL.md +497 -0
- package/skills/wordpress-penetration-testing/SKILL.md +485 -0
- package/skills/workflow-automation/SKILL.md +68 -0
- package/skills/xss-html-injection/SKILL.md +499 -0
- package/skills/zapier-make-patterns/SKILL.md +67 -0
|
@@ -0,0 +1,760 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: llm-app-patterns
|
|
3
|
+
description: "Production-ready patterns for building LLM applications. Covers RAG pipelines, agent architectures, prompt IDEs, and LLMOps monitoring. Use when designing AI applications, implementing RAG, building agents, or setting up LLM observability."
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# 🤖 LLM Application Patterns
|
|
7
|
+
|
|
8
|
+
> Production-ready patterns for building LLM applications, inspired by [Dify](https://github.com/langgenius/dify) and industry best practices.
|
|
9
|
+
|
|
10
|
+
## When to Use This Skill
|
|
11
|
+
|
|
12
|
+
Use this skill when:
|
|
13
|
+
|
|
14
|
+
- Designing LLM-powered applications
|
|
15
|
+
- Implementing RAG (Retrieval-Augmented Generation)
|
|
16
|
+
- Building AI agents with tools
|
|
17
|
+
- Setting up LLMOps monitoring
|
|
18
|
+
- Choosing between agent architectures
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## 1. RAG Pipeline Architecture
|
|
23
|
+
|
|
24
|
+
### Overview
|
|
25
|
+
|
|
26
|
+
RAG (Retrieval-Augmented Generation) grounds LLM responses in your data.
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
|
30
|
+
│ Ingest │────▶│ Retrieve │────▶│ Generate │
|
|
31
|
+
│ Documents │ │ Context │ │ Response │
|
|
32
|
+
└─────────────┘ └─────────────┘ └─────────────┘
|
|
33
|
+
│ │ │
|
|
34
|
+
▼ ▼ ▼
|
|
35
|
+
┌─────────┐ ┌───────────┐ ┌───────────┐
|
|
36
|
+
│ Chunking│ │ Vector │ │ LLM │
|
|
37
|
+
│Embedding│ │ Search │ │ + Context│
|
|
38
|
+
└─────────┘ └───────────┘ └───────────┘
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### 1.1 Document Ingestion
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
# Chunking strategies
|
|
45
|
+
class ChunkingStrategy:
|
|
46
|
+
# Fixed-size chunks (simple but may break context)
|
|
47
|
+
FIXED_SIZE = "fixed_size" # e.g., 512 tokens
|
|
48
|
+
|
|
49
|
+
# Semantic chunking (preserves meaning)
|
|
50
|
+
SEMANTIC = "semantic" # Split on paragraphs/sections
|
|
51
|
+
|
|
52
|
+
# Recursive splitting (tries multiple separators)
|
|
53
|
+
RECURSIVE = "recursive" # ["\n\n", "\n", " ", ""]
|
|
54
|
+
|
|
55
|
+
# Document-aware (respects structure)
|
|
56
|
+
DOCUMENT_AWARE = "document_aware" # Headers, lists, etc.
|
|
57
|
+
|
|
58
|
+
# Recommended settings
|
|
59
|
+
CHUNK_CONFIG = {
|
|
60
|
+
"chunk_size": 512, # tokens
|
|
61
|
+
"chunk_overlap": 50, # token overlap between chunks
|
|
62
|
+
"separators": ["\n\n", "\n", ". ", " "],
|
|
63
|
+
}
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### 1.2 Embedding & Storage
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
# Vector database selection
|
|
70
|
+
VECTOR_DB_OPTIONS = {
|
|
71
|
+
"pinecone": {
|
|
72
|
+
"use_case": "Production, managed service",
|
|
73
|
+
"scale": "Billions of vectors",
|
|
74
|
+
"features": ["Hybrid search", "Metadata filtering"]
|
|
75
|
+
},
|
|
76
|
+
"weaviate": {
|
|
77
|
+
"use_case": "Self-hosted, multi-modal",
|
|
78
|
+
"scale": "Millions of vectors",
|
|
79
|
+
"features": ["GraphQL API", "Modules"]
|
|
80
|
+
},
|
|
81
|
+
"chromadb": {
|
|
82
|
+
"use_case": "Development, prototyping",
|
|
83
|
+
"scale": "Thousands of vectors",
|
|
84
|
+
"features": ["Simple API", "In-memory option"]
|
|
85
|
+
},
|
|
86
|
+
"pgvector": {
|
|
87
|
+
"use_case": "Existing Postgres infrastructure",
|
|
88
|
+
"scale": "Millions of vectors",
|
|
89
|
+
"features": ["SQL integration", "ACID compliance"]
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
# Embedding model selection
|
|
94
|
+
EMBEDDING_MODELS = {
|
|
95
|
+
"openai/text-embedding-3-small": {
|
|
96
|
+
"dimensions": 1536,
|
|
97
|
+
"cost": "$0.02/1M tokens",
|
|
98
|
+
"quality": "Good for most use cases"
|
|
99
|
+
},
|
|
100
|
+
"openai/text-embedding-3-large": {
|
|
101
|
+
"dimensions": 3072,
|
|
102
|
+
"cost": "$0.13/1M tokens",
|
|
103
|
+
"quality": "Best for complex queries"
|
|
104
|
+
},
|
|
105
|
+
"local/bge-large": {
|
|
106
|
+
"dimensions": 1024,
|
|
107
|
+
"cost": "Free (compute only)",
|
|
108
|
+
"quality": "Comparable to OpenAI small"
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### 1.3 Retrieval Strategies
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
# Basic semantic search
|
|
117
|
+
def semantic_search(query: str, top_k: int = 5):
|
|
118
|
+
query_embedding = embed(query)
|
|
119
|
+
results = vector_db.similarity_search(
|
|
120
|
+
query_embedding,
|
|
121
|
+
top_k=top_k
|
|
122
|
+
)
|
|
123
|
+
return results
|
|
124
|
+
|
|
125
|
+
# Hybrid search (semantic + keyword)
|
|
126
|
+
def hybrid_search(query: str, top_k: int = 5, alpha: float = 0.5):
|
|
127
|
+
"""
|
|
128
|
+
alpha=1.0: Pure semantic
|
|
129
|
+
alpha=0.0: Pure keyword (BM25)
|
|
130
|
+
alpha=0.5: Balanced
|
|
131
|
+
"""
|
|
132
|
+
semantic_results = vector_db.similarity_search(query)
|
|
133
|
+
keyword_results = bm25_search(query)
|
|
134
|
+
|
|
135
|
+
# Reciprocal Rank Fusion
|
|
136
|
+
return rrf_merge(semantic_results, keyword_results, alpha)
|
|
137
|
+
|
|
138
|
+
# Multi-query retrieval
|
|
139
|
+
def multi_query_retrieval(query: str):
|
|
140
|
+
"""Generate multiple query variations for better recall"""
|
|
141
|
+
queries = llm.generate_query_variations(query, n=3)
|
|
142
|
+
all_results = []
|
|
143
|
+
for q in queries:
|
|
144
|
+
all_results.extend(semantic_search(q))
|
|
145
|
+
return deduplicate(all_results)
|
|
146
|
+
|
|
147
|
+
# Contextual compression
|
|
148
|
+
def compressed_retrieval(query: str):
|
|
149
|
+
"""Retrieve then compress to relevant parts only"""
|
|
150
|
+
docs = semantic_search(query, top_k=10)
|
|
151
|
+
compressed = llm.extract_relevant_parts(docs, query)
|
|
152
|
+
return compressed
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### 1.4 Generation with Context
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
RAG_PROMPT_TEMPLATE = """
|
|
159
|
+
Answer the user's question based ONLY on the following context.
|
|
160
|
+
If the context doesn't contain enough information, say "I don't have enough information to answer that."
|
|
161
|
+
|
|
162
|
+
Context:
|
|
163
|
+
{context}
|
|
164
|
+
|
|
165
|
+
Question: {question}
|
|
166
|
+
|
|
167
|
+
Answer:"""
|
|
168
|
+
|
|
169
|
+
def generate_with_rag(question: str):
|
|
170
|
+
# Retrieve
|
|
171
|
+
context_docs = hybrid_search(question, top_k=5)
|
|
172
|
+
context = "\n\n".join([doc.content for doc in context_docs])
|
|
173
|
+
|
|
174
|
+
# Generate
|
|
175
|
+
prompt = RAG_PROMPT_TEMPLATE.format(
|
|
176
|
+
context=context,
|
|
177
|
+
question=question
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
response = llm.generate(prompt)
|
|
181
|
+
|
|
182
|
+
# Return with citations
|
|
183
|
+
return {
|
|
184
|
+
"answer": response,
|
|
185
|
+
"sources": [doc.metadata for doc in context_docs]
|
|
186
|
+
}
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
---
|
|
190
|
+
|
|
191
|
+
## 2. Agent Architectures
|
|
192
|
+
|
|
193
|
+
### 2.1 ReAct Pattern (Reasoning + Acting)
|
|
194
|
+
|
|
195
|
+
```
|
|
196
|
+
Thought: I need to search for information about X
|
|
197
|
+
Action: search("X")
|
|
198
|
+
Observation: [search results]
|
|
199
|
+
Thought: Based on the results, I should...
|
|
200
|
+
Action: calculate(...)
|
|
201
|
+
Observation: [calculation result]
|
|
202
|
+
Thought: I now have enough information
|
|
203
|
+
Action: final_answer("The answer is...")
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
```python
|
|
207
|
+
REACT_PROMPT = """
|
|
208
|
+
You are an AI assistant that can use tools to answer questions.
|
|
209
|
+
|
|
210
|
+
Available tools:
|
|
211
|
+
{tools_description}
|
|
212
|
+
|
|
213
|
+
Use this format:
|
|
214
|
+
Thought: [your reasoning about what to do next]
|
|
215
|
+
Action: [tool_name(arguments)]
|
|
216
|
+
Observation: [tool result - this will be filled in]
|
|
217
|
+
... (repeat Thought/Action/Observation as needed)
|
|
218
|
+
Thought: I have enough information to answer
|
|
219
|
+
Final Answer: [your final response]
|
|
220
|
+
|
|
221
|
+
Question: {question}
|
|
222
|
+
"""
|
|
223
|
+
|
|
224
|
+
class ReActAgent:
|
|
225
|
+
def __init__(self, tools: list, llm):
|
|
226
|
+
self.tools = {t.name: t for t in tools}
|
|
227
|
+
self.llm = llm
|
|
228
|
+
self.max_iterations = 10
|
|
229
|
+
|
|
230
|
+
def run(self, question: str) -> str:
|
|
231
|
+
prompt = REACT_PROMPT.format(
|
|
232
|
+
tools_description=self._format_tools(),
|
|
233
|
+
question=question
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
for _ in range(self.max_iterations):
|
|
237
|
+
response = self.llm.generate(prompt)
|
|
238
|
+
|
|
239
|
+
if "Final Answer:" in response:
|
|
240
|
+
return self._extract_final_answer(response)
|
|
241
|
+
|
|
242
|
+
action = self._parse_action(response)
|
|
243
|
+
observation = self._execute_tool(action)
|
|
244
|
+
prompt += f"\nObservation: {observation}\n"
|
|
245
|
+
|
|
246
|
+
return "Max iterations reached"
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
### 2.2 Function Calling Pattern
|
|
250
|
+
|
|
251
|
+
```python
|
|
252
|
+
# Define tools as functions with schemas
|
|
253
|
+
TOOLS = [
|
|
254
|
+
{
|
|
255
|
+
"name": "search_web",
|
|
256
|
+
"description": "Search the web for current information",
|
|
257
|
+
"parameters": {
|
|
258
|
+
"type": "object",
|
|
259
|
+
"properties": {
|
|
260
|
+
"query": {
|
|
261
|
+
"type": "string",
|
|
262
|
+
"description": "Search query"
|
|
263
|
+
}
|
|
264
|
+
},
|
|
265
|
+
"required": ["query"]
|
|
266
|
+
}
|
|
267
|
+
},
|
|
268
|
+
{
|
|
269
|
+
"name": "calculate",
|
|
270
|
+
"description": "Perform mathematical calculations",
|
|
271
|
+
"parameters": {
|
|
272
|
+
"type": "object",
|
|
273
|
+
"properties": {
|
|
274
|
+
"expression": {
|
|
275
|
+
"type": "string",
|
|
276
|
+
"description": "Math expression to evaluate"
|
|
277
|
+
}
|
|
278
|
+
},
|
|
279
|
+
"required": ["expression"]
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
]
|
|
283
|
+
|
|
284
|
+
class FunctionCallingAgent:
|
|
285
|
+
def run(self, question: str) -> str:
|
|
286
|
+
messages = [{"role": "user", "content": question}]
|
|
287
|
+
|
|
288
|
+
while True:
|
|
289
|
+
response = self.llm.chat(
|
|
290
|
+
messages=messages,
|
|
291
|
+
tools=TOOLS,
|
|
292
|
+
tool_choice="auto"
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
if response.tool_calls:
|
|
296
|
+
for tool_call in response.tool_calls:
|
|
297
|
+
result = self._execute_tool(
|
|
298
|
+
tool_call.name,
|
|
299
|
+
tool_call.arguments
|
|
300
|
+
)
|
|
301
|
+
messages.append({
|
|
302
|
+
"role": "tool",
|
|
303
|
+
"tool_call_id": tool_call.id,
|
|
304
|
+
"content": str(result)
|
|
305
|
+
})
|
|
306
|
+
else:
|
|
307
|
+
return response.content
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
### 2.3 Plan-and-Execute Pattern
|
|
311
|
+
|
|
312
|
+
```python
|
|
313
|
+
class PlanAndExecuteAgent:
|
|
314
|
+
"""
|
|
315
|
+
1. Create a plan (list of steps)
|
|
316
|
+
2. Execute each step
|
|
317
|
+
3. Replan if needed
|
|
318
|
+
"""
|
|
319
|
+
|
|
320
|
+
def run(self, task: str) -> str:
|
|
321
|
+
# Planning phase
|
|
322
|
+
plan = self.planner.create_plan(task)
|
|
323
|
+
# Returns: ["Step 1: ...", "Step 2: ...", ...]
|
|
324
|
+
|
|
325
|
+
results = []
|
|
326
|
+
for step in plan:
|
|
327
|
+
# Execute each step
|
|
328
|
+
result = self.executor.execute(step, context=results)
|
|
329
|
+
results.append(result)
|
|
330
|
+
|
|
331
|
+
# Check if replan needed
|
|
332
|
+
if self._needs_replan(task, results):
|
|
333
|
+
new_plan = self.planner.replan(
|
|
334
|
+
task,
|
|
335
|
+
completed=results,
|
|
336
|
+
remaining=plan[len(results):]
|
|
337
|
+
)
|
|
338
|
+
plan = new_plan
|
|
339
|
+
|
|
340
|
+
# Synthesize final answer
|
|
341
|
+
return self.synthesizer.summarize(task, results)
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
### 2.4 Multi-Agent Collaboration
|
|
345
|
+
|
|
346
|
+
```python
|
|
347
|
+
class AgentTeam:
|
|
348
|
+
"""
|
|
349
|
+
Specialized agents collaborating on complex tasks
|
|
350
|
+
"""
|
|
351
|
+
|
|
352
|
+
def __init__(self):
|
|
353
|
+
self.agents = {
|
|
354
|
+
"researcher": ResearchAgent(),
|
|
355
|
+
"analyst": AnalystAgent(),
|
|
356
|
+
"writer": WriterAgent(),
|
|
357
|
+
"critic": CriticAgent()
|
|
358
|
+
}
|
|
359
|
+
self.coordinator = CoordinatorAgent()
|
|
360
|
+
|
|
361
|
+
def solve(self, task: str) -> str:
|
|
362
|
+
# Coordinator assigns subtasks
|
|
363
|
+
assignments = self.coordinator.decompose(task)
|
|
364
|
+
|
|
365
|
+
results = {}
|
|
366
|
+
for assignment in assignments:
|
|
367
|
+
agent = self.agents[assignment.agent]
|
|
368
|
+
result = agent.execute(
|
|
369
|
+
assignment.subtask,
|
|
370
|
+
context=results
|
|
371
|
+
)
|
|
372
|
+
results[assignment.id] = result
|
|
373
|
+
|
|
374
|
+
# Critic reviews
|
|
375
|
+
critique = self.agents["critic"].review(results)
|
|
376
|
+
|
|
377
|
+
if critique.needs_revision:
|
|
378
|
+
# Iterate with feedback
|
|
379
|
+
return self.solve_with_feedback(task, results, critique)
|
|
380
|
+
|
|
381
|
+
return self.coordinator.synthesize(results)
|
|
382
|
+
```
|
|
383
|
+
|
|
384
|
+
---
|
|
385
|
+
|
|
386
|
+
## 3. Prompt IDE Patterns
|
|
387
|
+
|
|
388
|
+
### 3.1 Prompt Templates with Variables
|
|
389
|
+
|
|
390
|
+
```python
|
|
391
|
+
class PromptTemplate:
|
|
392
|
+
def __init__(self, template: str, variables: list[str]):
|
|
393
|
+
self.template = template
|
|
394
|
+
self.variables = variables
|
|
395
|
+
|
|
396
|
+
def format(self, **kwargs) -> str:
|
|
397
|
+
# Validate all variables provided
|
|
398
|
+
missing = set(self.variables) - set(kwargs.keys())
|
|
399
|
+
if missing:
|
|
400
|
+
raise ValueError(f"Missing variables: {missing}")
|
|
401
|
+
|
|
402
|
+
return self.template.format(**kwargs)
|
|
403
|
+
|
|
404
|
+
def with_examples(self, examples: list[dict]) -> str:
|
|
405
|
+
"""Add few-shot examples"""
|
|
406
|
+
example_text = "\n\n".join([
|
|
407
|
+
f"Input: {ex['input']}\nOutput: {ex['output']}"
|
|
408
|
+
for ex in examples
|
|
409
|
+
])
|
|
410
|
+
return f"{example_text}\n\n{self.template}"
|
|
411
|
+
|
|
412
|
+
# Usage
|
|
413
|
+
summarizer = PromptTemplate(
|
|
414
|
+
template="Summarize the following text in {style} style:\n\n{text}",
|
|
415
|
+
variables=["style", "text"]
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
prompt = summarizer.format(
|
|
419
|
+
style="professional",
|
|
420
|
+
text="Long article content..."
|
|
421
|
+
)
|
|
422
|
+
```
|
|
423
|
+
|
|
424
|
+
### 3.2 Prompt Versioning & A/B Testing
|
|
425
|
+
|
|
426
|
+
```python
|
|
427
|
+
class PromptRegistry:
|
|
428
|
+
def __init__(self, db):
|
|
429
|
+
self.db = db
|
|
430
|
+
|
|
431
|
+
def register(self, name: str, template: str, version: str):
|
|
432
|
+
"""Store prompt with version"""
|
|
433
|
+
self.db.save({
|
|
434
|
+
"name": name,
|
|
435
|
+
"template": template,
|
|
436
|
+
"version": version,
|
|
437
|
+
"created_at": datetime.now(),
|
|
438
|
+
"metrics": {}
|
|
439
|
+
})
|
|
440
|
+
|
|
441
|
+
def get(self, name: str, version: str = "latest") -> str:
|
|
442
|
+
"""Retrieve specific version"""
|
|
443
|
+
return self.db.get(name, version)
|
|
444
|
+
|
|
445
|
+
def ab_test(self, name: str, user_id: str) -> str:
|
|
446
|
+
"""Return variant based on user bucket"""
|
|
447
|
+
variants = self.db.get_all_versions(name)
|
|
448
|
+
bucket = hash(user_id) % len(variants)
|
|
449
|
+
return variants[bucket]
|
|
450
|
+
|
|
451
|
+
def record_outcome(self, prompt_id: str, outcome: dict):
|
|
452
|
+
"""Track prompt performance"""
|
|
453
|
+
self.db.update_metrics(prompt_id, outcome)
|
|
454
|
+
```
|
|
455
|
+
|
|
456
|
+
### 3.3 Prompt Chaining
|
|
457
|
+
|
|
458
|
+
```python
|
|
459
|
+
class PromptChain:
|
|
460
|
+
"""
|
|
461
|
+
Chain prompts together, passing output as input to next
|
|
462
|
+
"""
|
|
463
|
+
|
|
464
|
+
def __init__(self, steps: list[dict]):
|
|
465
|
+
self.steps = steps
|
|
466
|
+
|
|
467
|
+
def run(self, initial_input: str) -> dict:
|
|
468
|
+
context = {"input": initial_input}
|
|
469
|
+
results = []
|
|
470
|
+
|
|
471
|
+
for step in self.steps:
|
|
472
|
+
prompt = step["prompt"].format(**context)
|
|
473
|
+
output = llm.generate(prompt)
|
|
474
|
+
|
|
475
|
+
# Parse output if needed
|
|
476
|
+
if step.get("parser"):
|
|
477
|
+
output = step["parser"](output)
|
|
478
|
+
|
|
479
|
+
context[step["output_key"]] = output
|
|
480
|
+
results.append({
|
|
481
|
+
"step": step["name"],
|
|
482
|
+
"output": output
|
|
483
|
+
})
|
|
484
|
+
|
|
485
|
+
return {
|
|
486
|
+
"final_output": context[self.steps[-1]["output_key"]],
|
|
487
|
+
"intermediate_results": results
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
# Example: Research → Analyze → Summarize
|
|
491
|
+
chain = PromptChain([
|
|
492
|
+
{
|
|
493
|
+
"name": "research",
|
|
494
|
+
"prompt": "Research the topic: {input}",
|
|
495
|
+
"output_key": "research"
|
|
496
|
+
},
|
|
497
|
+
{
|
|
498
|
+
"name": "analyze",
|
|
499
|
+
"prompt": "Analyze these findings:\n{research}",
|
|
500
|
+
"output_key": "analysis"
|
|
501
|
+
},
|
|
502
|
+
{
|
|
503
|
+
"name": "summarize",
|
|
504
|
+
"prompt": "Summarize this analysis in 3 bullet points:\n{analysis}",
|
|
505
|
+
"output_key": "summary"
|
|
506
|
+
}
|
|
507
|
+
])
|
|
508
|
+
```
|
|
509
|
+
|
|
510
|
+
---
|
|
511
|
+
|
|
512
|
+
## 4. LLMOps & Observability
|
|
513
|
+
|
|
514
|
+
### 4.1 Metrics to Track
|
|
515
|
+
|
|
516
|
+
```python
|
|
517
|
+
LLM_METRICS = {
|
|
518
|
+
# Performance
|
|
519
|
+
"latency_p50": "50th percentile response time",
|
|
520
|
+
"latency_p99": "99th percentile response time",
|
|
521
|
+
"tokens_per_second": "Generation speed",
|
|
522
|
+
|
|
523
|
+
# Quality
|
|
524
|
+
"user_satisfaction": "Thumbs up/down ratio",
|
|
525
|
+
"task_completion": "% tasks completed successfully",
|
|
526
|
+
"hallucination_rate": "% responses with factual errors",
|
|
527
|
+
|
|
528
|
+
# Cost
|
|
529
|
+
"cost_per_request": "Average $ per API call",
|
|
530
|
+
"tokens_per_request": "Average tokens used",
|
|
531
|
+
"cache_hit_rate": "% requests served from cache",
|
|
532
|
+
|
|
533
|
+
# Reliability
|
|
534
|
+
"error_rate": "% failed requests",
|
|
535
|
+
"timeout_rate": "% requests that timed out",
|
|
536
|
+
"retry_rate": "% requests needing retry"
|
|
537
|
+
}
|
|
538
|
+
```
|
|
539
|
+
|
|
540
|
+
### 4.2 Logging & Tracing
|
|
541
|
+
|
|
542
|
+
```python
|
|
543
|
+
import logging
|
|
544
|
+
from opentelemetry import trace
|
|
545
|
+
|
|
546
|
+
tracer = trace.get_tracer(__name__)
|
|
547
|
+
|
|
548
|
+
class LLMLogger:
|
|
549
|
+
def log_request(self, request_id: str, data: dict):
|
|
550
|
+
"""Log LLM request for debugging and analysis"""
|
|
551
|
+
log_entry = {
|
|
552
|
+
"request_id": request_id,
|
|
553
|
+
"timestamp": datetime.now().isoformat(),
|
|
554
|
+
"model": data["model"],
|
|
555
|
+
"prompt": data["prompt"][:500], # Truncate for storage
|
|
556
|
+
"prompt_tokens": data["prompt_tokens"],
|
|
557
|
+
"temperature": data.get("temperature", 1.0),
|
|
558
|
+
"user_id": data.get("user_id"),
|
|
559
|
+
}
|
|
560
|
+
logging.info(f"LLM_REQUEST: {json.dumps(log_entry)}")
|
|
561
|
+
|
|
562
|
+
def log_response(self, request_id: str, data: dict):
|
|
563
|
+
"""Log LLM response"""
|
|
564
|
+
log_entry = {
|
|
565
|
+
"request_id": request_id,
|
|
566
|
+
"completion_tokens": data["completion_tokens"],
|
|
567
|
+
"total_tokens": data["total_tokens"],
|
|
568
|
+
"latency_ms": data["latency_ms"],
|
|
569
|
+
"finish_reason": data["finish_reason"],
|
|
570
|
+
"cost_usd": self._calculate_cost(data),
|
|
571
|
+
}
|
|
572
|
+
logging.info(f"LLM_RESPONSE: {json.dumps(log_entry)}")
|
|
573
|
+
|
|
574
|
+
# Distributed tracing
|
|
575
|
+
@tracer.start_as_current_span("llm_call")
|
|
576
|
+
def call_llm(prompt: str) -> str:
|
|
577
|
+
span = trace.get_current_span()
|
|
578
|
+
span.set_attribute("prompt.length", len(prompt))
|
|
579
|
+
|
|
580
|
+
response = llm.generate(prompt)
|
|
581
|
+
|
|
582
|
+
span.set_attribute("response.length", len(response))
|
|
583
|
+
span.set_attribute("tokens.total", response.usage.total_tokens)
|
|
584
|
+
|
|
585
|
+
return response.content
|
|
586
|
+
```
|
|
587
|
+
|
|
588
|
+
### 4.3 Evaluation Framework
|
|
589
|
+
|
|
590
|
+
```python
|
|
591
|
+
class LLMEvaluator:
|
|
592
|
+
"""
|
|
593
|
+
Evaluate LLM outputs for quality
|
|
594
|
+
"""
|
|
595
|
+
|
|
596
|
+
def evaluate_response(self,
|
|
597
|
+
question: str,
|
|
598
|
+
response: str,
|
|
599
|
+
ground_truth: str = None) -> dict:
|
|
600
|
+
scores = {}
|
|
601
|
+
|
|
602
|
+
# Relevance: Does it answer the question?
|
|
603
|
+
scores["relevance"] = self._score_relevance(question, response)
|
|
604
|
+
|
|
605
|
+
# Coherence: Is it well-structured?
|
|
606
|
+
scores["coherence"] = self._score_coherence(response)
|
|
607
|
+
|
|
608
|
+
# Groundedness: Is it based on provided context?
|
|
609
|
+
scores["groundedness"] = self._score_groundedness(response)
|
|
610
|
+
|
|
611
|
+
# Accuracy: Does it match ground truth?
|
|
612
|
+
if ground_truth:
|
|
613
|
+
scores["accuracy"] = self._score_accuracy(response, ground_truth)
|
|
614
|
+
|
|
615
|
+
# Harmfulness: Is it safe?
|
|
616
|
+
scores["safety"] = self._score_safety(response)
|
|
617
|
+
|
|
618
|
+
return scores
|
|
619
|
+
|
|
620
|
+
def run_benchmark(self, test_cases: list[dict]) -> dict:
|
|
621
|
+
"""Run evaluation on test set"""
|
|
622
|
+
results = []
|
|
623
|
+
for case in test_cases:
|
|
624
|
+
response = llm.generate(case["prompt"])
|
|
625
|
+
scores = self.evaluate_response(
|
|
626
|
+
question=case["prompt"],
|
|
627
|
+
response=response,
|
|
628
|
+
ground_truth=case.get("expected")
|
|
629
|
+
)
|
|
630
|
+
results.append(scores)
|
|
631
|
+
|
|
632
|
+
return self._aggregate_scores(results)
|
|
633
|
+
```
|
|
634
|
+
|
|
635
|
+
---
|
|
636
|
+
|
|
637
|
+
## 5. Production Patterns
|
|
638
|
+
|
|
639
|
+
### 5.1 Caching Strategy
|
|
640
|
+
|
|
641
|
+
```python
|
|
642
|
+
import hashlib
|
|
643
|
+
from functools import lru_cache
|
|
644
|
+
|
|
645
|
+
class LLMCache:
|
|
646
|
+
def __init__(self, redis_client, ttl_seconds=3600):
|
|
647
|
+
self.redis = redis_client
|
|
648
|
+
self.ttl = ttl_seconds
|
|
649
|
+
|
|
650
|
+
def _cache_key(self, prompt: str, model: str, **kwargs) -> str:
|
|
651
|
+
"""Generate deterministic cache key"""
|
|
652
|
+
content = f"{model}:{prompt}:{json.dumps(kwargs, sort_keys=True)}"
|
|
653
|
+
return hashlib.sha256(content.encode()).hexdigest()
|
|
654
|
+
|
|
655
|
+
def get_or_generate(self, prompt: str, model: str, **kwargs) -> str:
|
|
656
|
+
key = self._cache_key(prompt, model, **kwargs)
|
|
657
|
+
|
|
658
|
+
# Check cache
|
|
659
|
+
cached = self.redis.get(key)
|
|
660
|
+
if cached:
|
|
661
|
+
return cached.decode()
|
|
662
|
+
|
|
663
|
+
# Generate
|
|
664
|
+
response = llm.generate(prompt, model=model, **kwargs)
|
|
665
|
+
|
|
666
|
+
# Cache (only cache deterministic outputs)
|
|
667
|
+
if kwargs.get("temperature", 1.0) == 0:
|
|
668
|
+
self.redis.setex(key, self.ttl, response)
|
|
669
|
+
|
|
670
|
+
return response
|
|
671
|
+
```
|
|
672
|
+
|
|
673
|
+
### 5.2 Rate Limiting & Retry
|
|
674
|
+
|
|
675
|
+
```python
|
|
676
|
+
import time
|
|
677
|
+
from tenacity import retry, wait_exponential, stop_after_attempt
|
|
678
|
+
|
|
679
|
+
class RateLimiter:
|
|
680
|
+
def __init__(self, requests_per_minute: int):
|
|
681
|
+
self.rpm = requests_per_minute
|
|
682
|
+
self.timestamps = []
|
|
683
|
+
|
|
684
|
+
def acquire(self):
|
|
685
|
+
"""Wait if rate limit would be exceeded"""
|
|
686
|
+
now = time.time()
|
|
687
|
+
|
|
688
|
+
# Remove old timestamps
|
|
689
|
+
self.timestamps = [t for t in self.timestamps if now - t < 60]
|
|
690
|
+
|
|
691
|
+
if len(self.timestamps) >= self.rpm:
|
|
692
|
+
sleep_time = 60 - (now - self.timestamps[0])
|
|
693
|
+
time.sleep(sleep_time)
|
|
694
|
+
|
|
695
|
+
self.timestamps.append(time.time())
|
|
696
|
+
|
|
697
|
+
# Retry with exponential backoff
|
|
698
|
+
@retry(
|
|
699
|
+
wait=wait_exponential(multiplier=1, min=4, max=60),
|
|
700
|
+
stop=stop_after_attempt(5)
|
|
701
|
+
)
|
|
702
|
+
def call_llm_with_retry(prompt: str) -> str:
|
|
703
|
+
try:
|
|
704
|
+
return llm.generate(prompt)
|
|
705
|
+
except RateLimitError:
|
|
706
|
+
raise # Will trigger retry
|
|
707
|
+
except APIError as e:
|
|
708
|
+
if e.status_code >= 500:
|
|
709
|
+
raise # Retry server errors
|
|
710
|
+
raise # Don't retry client errors
|
|
711
|
+
```
|
|
712
|
+
|
|
713
|
+
### 5.3 Fallback Strategy
|
|
714
|
+
|
|
715
|
+
```python
|
|
716
|
+
class LLMWithFallback:
|
|
717
|
+
def __init__(self, primary: str, fallbacks: list[str]):
|
|
718
|
+
self.primary = primary
|
|
719
|
+
self.fallbacks = fallbacks
|
|
720
|
+
|
|
721
|
+
def generate(self, prompt: str, **kwargs) -> str:
|
|
722
|
+
models = [self.primary] + self.fallbacks
|
|
723
|
+
|
|
724
|
+
for model in models:
|
|
725
|
+
try:
|
|
726
|
+
return llm.generate(prompt, model=model, **kwargs)
|
|
727
|
+
except (RateLimitError, APIError) as e:
|
|
728
|
+
logging.warning(f"Model {model} failed: {e}")
|
|
729
|
+
continue
|
|
730
|
+
|
|
731
|
+
raise AllModelsFailedError("All models exhausted")
|
|
732
|
+
|
|
733
|
+
# Usage
|
|
734
|
+
llm_client = LLMWithFallback(
|
|
735
|
+
primary="gpt-4-turbo",
|
|
736
|
+
fallbacks=["gpt-3.5-turbo", "claude-3-sonnet"]
|
|
737
|
+
)
|
|
738
|
+
```
|
|
739
|
+
|
|
740
|
+
---
|
|
741
|
+
|
|
742
|
+
## Architecture Decision Matrix
|
|
743
|
+
|
|
744
|
+
| Pattern | Use When | Complexity | Cost |
|
|
745
|
+
| :------------------- | :--------------- | :--------- | :-------- |
|
|
746
|
+
| **Simple RAG** | FAQ, docs search | Low | Low |
|
|
747
|
+
| **Hybrid RAG** | Mixed queries | Medium | Medium |
|
|
748
|
+
| **ReAct Agent** | Multi-step tasks | Medium | Medium |
|
|
749
|
+
| **Function Calling** | Structured tools | Low | Low |
|
|
750
|
+
| **Plan-Execute** | Complex tasks | High | High |
|
|
751
|
+
| **Multi-Agent** | Research tasks | Very High | Very High |
|
|
752
|
+
|
|
753
|
+
---
|
|
754
|
+
|
|
755
|
+
## Resources
|
|
756
|
+
|
|
757
|
+
- [Dify Platform](https://github.com/langgenius/dify)
|
|
758
|
+
- [LangChain Docs](https://python.langchain.com/)
|
|
759
|
+
- [LlamaIndex](https://www.llamaindex.ai/)
|
|
760
|
+
- [Anthropic Cookbook](https://github.com/anthropics/anthropic-cookbook)
|