red64-cli 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/parseArgs.d.ts.map +1 -1
- package/dist/cli/parseArgs.js +5 -0
- package/dist/cli/parseArgs.js.map +1 -1
- package/dist/components/init/CompleteStep.d.ts.map +1 -1
- package/dist/components/init/CompleteStep.js +2 -2
- package/dist/components/init/CompleteStep.js.map +1 -1
- package/dist/components/init/TestCheckStep.d.ts +16 -0
- package/dist/components/init/TestCheckStep.d.ts.map +1 -0
- package/dist/components/init/TestCheckStep.js +120 -0
- package/dist/components/init/TestCheckStep.js.map +1 -0
- package/dist/components/init/index.d.ts +1 -0
- package/dist/components/init/index.d.ts.map +1 -1
- package/dist/components/init/index.js +1 -0
- package/dist/components/init/index.js.map +1 -1
- package/dist/components/init/types.d.ts +9 -0
- package/dist/components/init/types.d.ts.map +1 -1
- package/dist/components/screens/InitScreen.d.ts.map +1 -1
- package/dist/components/screens/InitScreen.js +69 -6
- package/dist/components/screens/InitScreen.js.map +1 -1
- package/dist/components/screens/StartScreen.d.ts.map +1 -1
- package/dist/components/screens/StartScreen.js +89 -3
- package/dist/components/screens/StartScreen.js.map +1 -1
- package/dist/services/ConfigService.d.ts +1 -0
- package/dist/services/ConfigService.d.ts.map +1 -1
- package/dist/services/ConfigService.js.map +1 -1
- package/dist/services/ProjectDetector.d.ts +28 -0
- package/dist/services/ProjectDetector.d.ts.map +1 -0
- package/dist/services/ProjectDetector.js +236 -0
- package/dist/services/ProjectDetector.js.map +1 -0
- package/dist/services/TestRunner.d.ts +46 -0
- package/dist/services/TestRunner.d.ts.map +1 -0
- package/dist/services/TestRunner.js +85 -0
- package/dist/services/TestRunner.js.map +1 -0
- package/dist/services/index.d.ts +2 -0
- package/dist/services/index.d.ts.map +1 -1
- package/dist/services/index.js +2 -0
- package/dist/services/index.js.map +1 -1
- package/dist/types/index.d.ts +1 -0
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js.map +1 -1
- package/framework/agents/claude/.claude/agents/red64/spec-impl.md +131 -2
- package/framework/agents/claude/.claude/commands/red64/spec-impl.md +24 -0
- package/framework/agents/codex/.codex/agents/red64/spec-impl.md +131 -2
- package/framework/agents/codex/.codex/commands/red64/spec-impl.md +24 -0
- package/framework/stacks/generic/feedback.md +80 -0
- package/framework/stacks/nextjs/accessibility.md +437 -0
- package/framework/stacks/nextjs/api.md +431 -0
- package/framework/stacks/nextjs/coding-style.md +282 -0
- package/framework/stacks/nextjs/commenting.md +226 -0
- package/framework/stacks/nextjs/components.md +411 -0
- package/framework/stacks/nextjs/conventions.md +333 -0
- package/framework/stacks/nextjs/css.md +310 -0
- package/framework/stacks/nextjs/error-handling.md +442 -0
- package/framework/stacks/nextjs/feedback.md +124 -0
- package/framework/stacks/nextjs/migrations.md +332 -0
- package/framework/stacks/nextjs/models.md +362 -0
- package/framework/stacks/nextjs/queries.md +410 -0
- package/framework/stacks/nextjs/responsive.md +338 -0
- package/framework/stacks/nextjs/tech-stack.md +177 -0
- package/framework/stacks/nextjs/test-writing.md +475 -0
- package/framework/stacks/nextjs/validation.md +467 -0
- package/framework/stacks/python/api.md +468 -0
- package/framework/stacks/python/authentication.md +342 -0
- package/framework/stacks/python/code-quality.md +283 -0
- package/framework/stacks/python/code-refactoring.md +315 -0
- package/framework/stacks/python/coding-style.md +462 -0
- package/framework/stacks/python/conventions.md +399 -0
- package/framework/stacks/python/error-handling.md +512 -0
- package/framework/stacks/python/feedback.md +92 -0
- package/framework/stacks/python/implement-ai-llm.md +468 -0
- package/framework/stacks/python/migrations.md +388 -0
- package/framework/stacks/python/models.md +399 -0
- package/framework/stacks/python/python.md +232 -0
- package/framework/stacks/python/queries.md +451 -0
- package/framework/stacks/python/structure.md +245 -58
- package/framework/stacks/python/tech.md +92 -35
- package/framework/stacks/python/testing.md +380 -0
- package/framework/stacks/python/validation.md +471 -0
- package/framework/stacks/rails/authentication.md +176 -0
- package/framework/stacks/rails/code-quality.md +287 -0
- package/framework/stacks/rails/code-refactoring.md +299 -0
- package/framework/stacks/rails/feedback.md +130 -0
- package/framework/stacks/rails/implement-ai-llm-with-rubyllm.md +342 -0
- package/framework/stacks/rails/rails.md +301 -0
- package/framework/stacks/rails/rails8-best-practices.md +498 -0
- package/framework/stacks/rails/rails8-css.md +573 -0
- package/framework/stacks/rails/structure.md +140 -0
- package/framework/stacks/rails/tech.md +108 -0
- package/framework/stacks/react/code-quality.md +521 -0
- package/framework/stacks/react/components.md +625 -0
- package/framework/stacks/react/data-fetching.md +586 -0
- package/framework/stacks/react/feedback.md +110 -0
- package/framework/stacks/react/forms.md +694 -0
- package/framework/stacks/react/performance.md +640 -0
- package/framework/stacks/react/product.md +22 -9
- package/framework/stacks/react/state-management.md +472 -0
- package/framework/stacks/react/structure.md +351 -44
- package/framework/stacks/react/tech.md +219 -30
- package/framework/stacks/react/testing.md +690 -0
- package/package.json +1 -1
- package/framework/stacks/node/product.md +0 -27
- package/framework/stacks/node/structure.md +0 -82
- package/framework/stacks/node/tech.md +0 -63
|
@@ -0,0 +1,468 @@
|
|
|
1
|
+
# AI/LLM Implementation in Python
|
|
2
|
+
|
|
3
|
+
Project memory for implementing AI-powered features using litellm, OpenAI SDK, and Pydantic structured outputs.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Overview
|
|
8
|
+
|
|
9
|
+
Python AI integration uses litellm as a unified multi-provider interface, with Pydantic for structured outputs. The architecture separates configuration, client management, and domain-specific services.
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Architecture Layers
|
|
14
|
+
|
|
15
|
+
### 1. Configuration
|
|
16
|
+
|
|
17
|
+
```python
|
|
18
|
+
# app/config.py
|
|
19
|
+
from pydantic_settings import BaseSettings
|
|
20
|
+
|
|
21
|
+
class Settings(BaseSettings):
|
|
22
|
+
# LLM providers
|
|
23
|
+
openai_api_key: str = ""
|
|
24
|
+
anthropic_api_key: str = ""
|
|
25
|
+
|
|
26
|
+
# Defaults
|
|
27
|
+
default_llm_model: str = "claude-sonnet-4-20250514"
|
|
28
|
+
extraction_model: str = "claude-haiku-3-5-20241022"
|
|
29
|
+
embedding_model: str = "text-embedding-3-small"
|
|
30
|
+
|
|
31
|
+
# Limits
|
|
32
|
+
llm_request_timeout: int = 120
|
|
33
|
+
llm_max_retries: int = 3
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### 2. Client Layer (litellm)
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
# app/services/llm_client.py
|
|
40
|
+
import litellm
|
|
41
|
+
from app.config import settings
|
|
42
|
+
|
|
43
|
+
# Configure globally
|
|
44
|
+
litellm.api_key = settings.openai_api_key
|
|
45
|
+
litellm.set_verbose = False
|
|
46
|
+
|
|
47
|
+
async def completion(
|
|
48
|
+
prompt: str,
|
|
49
|
+
model: str | None = None,
|
|
50
|
+
system_prompt: str | None = None,
|
|
51
|
+
temperature: float = 0.7,
|
|
52
|
+
max_tokens: int = 4096,
|
|
53
|
+
) -> str:
|
|
54
|
+
"""Unified completion call across providers."""
|
|
55
|
+
messages = []
|
|
56
|
+
if system_prompt:
|
|
57
|
+
messages.append({"role": "system", "content": system_prompt})
|
|
58
|
+
messages.append({"role": "user", "content": prompt})
|
|
59
|
+
|
|
60
|
+
response = await litellm.acompletion(
|
|
61
|
+
model=model or settings.default_llm_model,
|
|
62
|
+
messages=messages,
|
|
63
|
+
temperature=temperature,
|
|
64
|
+
max_tokens=max_tokens,
|
|
65
|
+
timeout=settings.llm_request_timeout,
|
|
66
|
+
num_retries=settings.llm_max_retries,
|
|
67
|
+
)
|
|
68
|
+
return response.choices[0].message.content
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### 3. Alternative: OpenAI SDK Directly
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
from openai import AsyncOpenAI
|
|
75
|
+
|
|
76
|
+
client = AsyncOpenAI(api_key=settings.openai_api_key)
|
|
77
|
+
|
|
78
|
+
async def completion(prompt: str, model: str = "gpt-4o") -> str:
|
|
79
|
+
response = await client.chat.completions.create(
|
|
80
|
+
model=model,
|
|
81
|
+
messages=[{"role": "user", "content": prompt}],
|
|
82
|
+
)
|
|
83
|
+
return response.choices[0].message.content
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## Model Selection Strategy
|
|
89
|
+
|
|
90
|
+
| Use Case | Model | Rationale |
|
|
91
|
+
|----------|-------|-----------|
|
|
92
|
+
| **Extraction** (entities, structured data) | Claude Haiku / GPT-4o-mini | Fast, cost-effective |
|
|
93
|
+
| **Generation** (ideas, content) | Claude Sonnet / GPT-4o | Higher quality |
|
|
94
|
+
| **Embeddings** | text-embedding-3-small | Cost-effective vectors |
|
|
95
|
+
| **Complex reasoning** | Claude Opus / o1 | Multi-step analysis |
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
# Constants avoid magic strings
|
|
99
|
+
EXTRACTION_MODEL = "claude-haiku-3-5-20241022"
|
|
100
|
+
GENERATION_MODEL = "claude-sonnet-4-20250514"
|
|
101
|
+
EMBEDDING_MODEL = "text-embedding-3-small"
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## Structured Outputs with Pydantic
|
|
107
|
+
|
|
108
|
+
### Pattern: Type-Safe LLM Responses
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
from pydantic import BaseModel, Field
|
|
112
|
+
|
|
113
|
+
class ExtractedIdea(BaseModel):
|
|
114
|
+
title: str = Field(description="Clear, engaging title")
|
|
115
|
+
summary: str = Field(description="2-3 sentence summary")
|
|
116
|
+
source_indices: list[int] = Field(description="Indices of supporting sources")
|
|
117
|
+
confidence: float = Field(ge=0.0, le=1.0, description="Confidence score")
|
|
118
|
+
|
|
119
|
+
class IdeaExtractionResult(BaseModel):
|
|
120
|
+
ideas: list[ExtractedIdea]
|
|
121
|
+
overall_theme: str
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### Using with OpenAI Structured Outputs
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
from openai import AsyncOpenAI
|
|
128
|
+
|
|
129
|
+
client = AsyncOpenAI()
|
|
130
|
+
|
|
131
|
+
async def extract_ideas(content: str) -> IdeaExtractionResult:
|
|
132
|
+
response = await client.beta.chat.completions.parse(
|
|
133
|
+
model="gpt-4o",
|
|
134
|
+
messages=[
|
|
135
|
+
{"role": "system", "content": EXTRACTION_SYSTEM_PROMPT},
|
|
136
|
+
{"role": "user", "content": content},
|
|
137
|
+
],
|
|
138
|
+
response_format=IdeaExtractionResult,
|
|
139
|
+
)
|
|
140
|
+
return response.choices[0].message.parsed
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
### Using with litellm (JSON mode)
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
import json
|
|
147
|
+
|
|
148
|
+
async def extract_ideas(content: str) -> IdeaExtractionResult:
|
|
149
|
+
raw = await completion(
|
|
150
|
+
prompt=f"Extract ideas from:\n\n{content}",
|
|
151
|
+
system_prompt=EXTRACTION_SYSTEM_PROMPT,
|
|
152
|
+
model=EXTRACTION_MODEL,
|
|
153
|
+
)
|
|
154
|
+
parsed = json.loads(extract_json(raw))
|
|
155
|
+
return IdeaExtractionResult.model_validate(parsed)
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
---
|
|
159
|
+
|
|
160
|
+
## Prompt Engineering Patterns
|
|
161
|
+
|
|
162
|
+
### System Prompts as Constants
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
EXTRACTION_SYSTEM_PROMPT = """\
|
|
166
|
+
You are an expert content analyst. Extract structured information from the provided content.
|
|
167
|
+
|
|
168
|
+
Rules:
|
|
169
|
+
1. Be precise and factual - only extract what is explicitly stated
|
|
170
|
+
2. Provide confidence scores based on evidence strength
|
|
171
|
+
3. Return valid JSON matching the requested schema exactly
|
|
172
|
+
"""
|
|
173
|
+
|
|
174
|
+
GENERATION_SYSTEM_PROMPT = """\
|
|
175
|
+
You are an expert content strategist. Generate compelling content ideas based on source material.
|
|
176
|
+
|
|
177
|
+
Rules:
|
|
178
|
+
1. Each idea should be actionable and specific
|
|
179
|
+
2. Reference source material by index
|
|
180
|
+
3. Maintain the original tone and audience
|
|
181
|
+
"""
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
### Prompt Templates
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
from string import Template
|
|
188
|
+
|
|
189
|
+
IDEA_PROMPT = Template("""\
|
|
190
|
+
Analyze the following $source_count source(s) and generate content ideas.
|
|
191
|
+
|
|
192
|
+
Sources:
|
|
193
|
+
$sources
|
|
194
|
+
|
|
195
|
+
Generate $idea_count unique content ideas as a JSON array with objects containing:
|
|
196
|
+
- title: string
|
|
197
|
+
- summary: string (2-3 sentences)
|
|
198
|
+
- source_indices: list of integers
|
|
199
|
+
- confidence: float (0.0 to 1.0)
|
|
200
|
+
|
|
201
|
+
Respond with ONLY valid JSON, no markdown formatting.
|
|
202
|
+
""")
|
|
203
|
+
|
|
204
|
+
# Usage
|
|
205
|
+
prompt = IDEA_PROMPT.substitute(
|
|
206
|
+
source_count=len(sources),
|
|
207
|
+
sources=formatted_sources,
|
|
208
|
+
idea_count=5,
|
|
209
|
+
)
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
---
|
|
213
|
+
|
|
214
|
+
## Streaming
|
|
215
|
+
|
|
216
|
+
### Async Streaming with litellm
|
|
217
|
+
|
|
218
|
+
```python
|
|
219
|
+
from collections.abc import AsyncIterator
|
|
220
|
+
|
|
221
|
+
async def stream_completion(
|
|
222
|
+
prompt: str,
|
|
223
|
+
model: str | None = None,
|
|
224
|
+
system_prompt: str | None = None,
|
|
225
|
+
) -> AsyncIterator[str]:
|
|
226
|
+
messages = []
|
|
227
|
+
if system_prompt:
|
|
228
|
+
messages.append({"role": "system", "content": system_prompt})
|
|
229
|
+
messages.append({"role": "user", "content": prompt})
|
|
230
|
+
|
|
231
|
+
response = await litellm.acompletion(
|
|
232
|
+
model=model or settings.default_llm_model,
|
|
233
|
+
messages=messages,
|
|
234
|
+
stream=True,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
async for chunk in response:
|
|
238
|
+
delta = chunk.choices[0].delta.content
|
|
239
|
+
if delta:
|
|
240
|
+
yield delta
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
### FastAPI Streaming Endpoint
|
|
244
|
+
|
|
245
|
+
```python
|
|
246
|
+
from fastapi.responses import StreamingResponse
|
|
247
|
+
|
|
248
|
+
@router.post("/generate/stream")
|
|
249
|
+
async def generate_stream(
|
|
250
|
+
request: GenerateRequest,
|
|
251
|
+
current_user: User = Depends(get_current_user),
|
|
252
|
+
):
|
|
253
|
+
async def event_stream():
|
|
254
|
+
async for chunk in stream_completion(
|
|
255
|
+
prompt=request.prompt,
|
|
256
|
+
system_prompt=GENERATION_SYSTEM_PROMPT,
|
|
257
|
+
):
|
|
258
|
+
yield f"data: {json.dumps({'content': chunk})}\n\n"
|
|
259
|
+
yield "data: [DONE]\n\n"
|
|
260
|
+
|
|
261
|
+
return StreamingResponse(event_stream(), media_type="text/event-stream")
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
---
|
|
265
|
+
|
|
266
|
+
## Error Handling
|
|
267
|
+
|
|
268
|
+
### Error Hierarchy
|
|
269
|
+
|
|
270
|
+
```python
|
|
271
|
+
class LLMError(Exception):
|
|
272
|
+
"""Base LLM error."""
|
|
273
|
+
|
|
274
|
+
class LLMConfigError(LLMError):
|
|
275
|
+
"""Missing API key or invalid configuration."""
|
|
276
|
+
|
|
277
|
+
class LLMAuthError(LLMError):
|
|
278
|
+
"""Invalid or expired API key."""
|
|
279
|
+
|
|
280
|
+
class LLMRateLimitError(LLMError):
|
|
281
|
+
"""Rate limit exceeded."""
|
|
282
|
+
|
|
283
|
+
class LLMTimeoutError(LLMError):
|
|
284
|
+
"""Request timed out."""
|
|
285
|
+
|
|
286
|
+
class LLMParseError(LLMError):
|
|
287
|
+
"""Failed to parse LLM response."""
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
### Error Mapping
|
|
291
|
+
|
|
292
|
+
```python
|
|
293
|
+
import litellm
|
|
294
|
+
|
|
295
|
+
async def safe_completion(prompt: str, **kwargs) -> str:
|
|
296
|
+
try:
|
|
297
|
+
return await completion(prompt, **kwargs)
|
|
298
|
+
except litellm.AuthenticationError as e:
|
|
299
|
+
raise LLMAuthError(f"Invalid API key: {e}") from e
|
|
300
|
+
except litellm.RateLimitError as e:
|
|
301
|
+
raise LLMRateLimitError(f"Rate limit exceeded: {e}") from e
|
|
302
|
+
except litellm.Timeout as e:
|
|
303
|
+
raise LLMTimeoutError(f"Request timed out: {e}") from e
|
|
304
|
+
except litellm.APIError as e:
|
|
305
|
+
raise LLMError(f"API error: {e}") from e
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
### Retry with Tenacity
|
|
309
|
+
|
|
310
|
+
```python
|
|
311
|
+
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
|
|
312
|
+
|
|
313
|
+
@retry(
|
|
314
|
+
stop=stop_after_attempt(3),
|
|
315
|
+
wait=wait_exponential(multiplier=1, min=1, max=10),
|
|
316
|
+
retry=retry_if_exception_type(LLMRateLimitError),
|
|
317
|
+
)
|
|
318
|
+
async def completion_with_retry(prompt: str, **kwargs) -> str:
|
|
319
|
+
return await safe_completion(prompt, **kwargs)
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
---
|
|
323
|
+
|
|
324
|
+
## JSON Response Parsing
|
|
325
|
+
|
|
326
|
+
```python
|
|
327
|
+
import json
|
|
328
|
+
import re
|
|
329
|
+
|
|
330
|
+
def extract_json(content: str) -> str:
|
|
331
|
+
"""Extract JSON from potential markdown code fences."""
|
|
332
|
+
content = content.strip()
|
|
333
|
+
# Remove markdown code fences
|
|
334
|
+
match = re.search(r"```(?:json)?\s*\n?(.*?)\n?\s*```", content, re.DOTALL)
|
|
335
|
+
if match:
|
|
336
|
+
return match.group(1).strip()
|
|
337
|
+
return content
|
|
338
|
+
|
|
339
|
+
def parse_llm_json(content: str, model: type[BaseModel]) -> BaseModel:
|
|
340
|
+
"""Parse LLM response into Pydantic model with graceful fallback."""
|
|
341
|
+
try:
|
|
342
|
+
raw = extract_json(content)
|
|
343
|
+
data = json.loads(raw)
|
|
344
|
+
return model.model_validate(data)
|
|
345
|
+
except (json.JSONDecodeError, ValueError) as e:
|
|
346
|
+
raise LLMParseError(f"Failed to parse response: {e}") from e
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
---
|
|
350
|
+
|
|
351
|
+
## Content Truncation
|
|
352
|
+
|
|
353
|
+
```python
|
|
354
|
+
MAX_CONTENT_TOKENS = 8000
|
|
355
|
+
CHARS_PER_TOKEN = 4 # Rough estimate
|
|
356
|
+
|
|
357
|
+
def truncate_for_llm(content: str, max_tokens: int = MAX_CONTENT_TOKENS) -> str:
|
|
358
|
+
max_chars = max_tokens * CHARS_PER_TOKEN
|
|
359
|
+
if len(content) <= max_chars:
|
|
360
|
+
return content
|
|
361
|
+
return content[:max_chars] + "\n\n[Content truncated for processing]"
|
|
362
|
+
```
|
|
363
|
+
|
|
364
|
+
---
|
|
365
|
+
|
|
366
|
+
## Domain Services Pattern
|
|
367
|
+
|
|
368
|
+
```python
|
|
369
|
+
class IdeaGenerationService:
|
|
370
|
+
"""Generate content ideas from source material."""
|
|
371
|
+
|
|
372
|
+
def __init__(self, llm_client: Callable = safe_completion) -> None:
|
|
373
|
+
self._complete = llm_client
|
|
374
|
+
|
|
375
|
+
async def generate(
|
|
376
|
+
self,
|
|
377
|
+
sources: list[Source],
|
|
378
|
+
count: int = 5,
|
|
379
|
+
) -> IdeaExtractionResult:
|
|
380
|
+
formatted = self._format_sources(sources)
|
|
381
|
+
prompt = IDEA_PROMPT.substitute(
|
|
382
|
+
source_count=len(sources),
|
|
383
|
+
sources=formatted,
|
|
384
|
+
idea_count=count,
|
|
385
|
+
)
|
|
386
|
+
raw = await self._complete(
|
|
387
|
+
prompt=prompt,
|
|
388
|
+
system_prompt=EXTRACTION_SYSTEM_PROMPT,
|
|
389
|
+
model=GENERATION_MODEL,
|
|
390
|
+
)
|
|
391
|
+
return parse_llm_json(raw, IdeaExtractionResult)
|
|
392
|
+
|
|
393
|
+
def _format_sources(self, sources: list[Source]) -> str:
|
|
394
|
+
return "\n\n".join(
|
|
395
|
+
f"[{i}] {s.title}\n{truncate_for_llm(s.content)}"
|
|
396
|
+
for i, s in enumerate(sources)
|
|
397
|
+
)
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
---
|
|
401
|
+
|
|
402
|
+
## Testing LLM Services
|
|
403
|
+
|
|
404
|
+
### Mock Completions
|
|
405
|
+
|
|
406
|
+
```python
|
|
407
|
+
from unittest.mock import AsyncMock
|
|
408
|
+
|
|
409
|
+
async def test_idea_generation():
|
|
410
|
+
mock_response = json.dumps({
|
|
411
|
+
"ideas": [{"title": "Test Idea", "summary": "A test.", "source_indices": [0], "confidence": 0.9}],
|
|
412
|
+
"overall_theme": "Testing",
|
|
413
|
+
})
|
|
414
|
+
|
|
415
|
+
service = IdeaGenerationService(llm_client=AsyncMock(return_value=mock_response))
|
|
416
|
+
result = await service.generate(sources=[mock_source])
|
|
417
|
+
|
|
418
|
+
assert len(result.ideas) == 1
|
|
419
|
+
assert result.ideas[0].title == "Test Idea"
|
|
420
|
+
|
|
421
|
+
async def test_handles_parse_error():
|
|
422
|
+
service = IdeaGenerationService(llm_client=AsyncMock(return_value="not json"))
|
|
423
|
+
|
|
424
|
+
with pytest.raises(LLMParseError):
|
|
425
|
+
await service.generate(sources=[mock_source])
|
|
426
|
+
|
|
427
|
+
async def test_handles_rate_limit():
|
|
428
|
+
service = IdeaGenerationService(
|
|
429
|
+
llm_client=AsyncMock(side_effect=LLMRateLimitError("too many requests"))
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
with pytest.raises(LLMRateLimitError):
|
|
433
|
+
await service.generate(sources=[mock_source])
|
|
434
|
+
```
|
|
435
|
+
|
|
436
|
+
---
|
|
437
|
+
|
|
438
|
+
## Credential Management
|
|
439
|
+
|
|
440
|
+
Store API keys in environment variables, never in code:
|
|
441
|
+
|
|
442
|
+
```bash
|
|
443
|
+
# .env (never committed)
|
|
444
|
+
OPENAI_API_KEY=sk-...
|
|
445
|
+
ANTHROPIC_API_KEY=sk-ant-...
|
|
446
|
+
```
|
|
447
|
+
|
|
448
|
+
Access via pydantic-settings:
|
|
449
|
+
```python
|
|
450
|
+
settings.openai_api_key # Loaded from OPENAI_API_KEY env var
|
|
451
|
+
```
|
|
452
|
+
|
|
453
|
+
---
|
|
454
|
+
|
|
455
|
+
## Quick Reference
|
|
456
|
+
|
|
457
|
+
### Adding a New LLM Service
|
|
458
|
+
|
|
459
|
+
1. Define Pydantic response model in `app/schemas/`
|
|
460
|
+
2. Create service class in `app/services/` with injected `llm_client`
|
|
461
|
+
3. Build prompt with explicit output schema
|
|
462
|
+
4. Parse response with `parse_llm_json()`
|
|
463
|
+
5. Map errors to domain-specific types
|
|
464
|
+
6. Test with mocked `llm_client`
|
|
465
|
+
|
|
466
|
+
---
|
|
467
|
+
|
|
468
|
+
_Focus on patterns, not exhaustive API documentation. See litellm/openai docs for full API reference._
|