claude-code-pack 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +120 -0
- package/assets/statusline-command.sh +116 -0
- package/bin/claude-pack.mjs +48 -0
- package/claude-pack.config.json +37 -0
- package/package.json +31 -0
- package/skills/cloud-devops/SKILL.md +235 -0
- package/skills/fastapi/SKILL.md +436 -0
- package/skills/fastapi/references/dependencies.md +142 -0
- package/skills/fastapi/references/other-tools.md +76 -0
- package/skills/fastapi/references/streaming.md +105 -0
- package/skills/senior-ml-engineer/SKILL.md +304 -0
- package/skills/senior-ml-engineer/references/llm_integration_guide.md +317 -0
- package/skills/senior-ml-engineer/references/mlops_production_patterns.md +265 -0
- package/skills/senior-ml-engineer/references/rag_system_architecture.md +371 -0
- package/skills/senior-ml-engineer/scripts/ml_monitoring_suite.py +100 -0
- package/skills/senior-ml-engineer/scripts/model_deployment_pipeline.py +100 -0
- package/skills/senior-ml-engineer/scripts/rag_system_builder.py +100 -0
- package/skills/technical-writer/technical-writer/SKILL.md +351 -0
- package/src/install.mjs +391 -0
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
# LLM Integration Guide
|
|
2
|
+
|
|
3
|
+
Production patterns for integrating Large Language Models into applications.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Table of Contents
|
|
8
|
+
|
|
9
|
+
- [API Integration Patterns](#api-integration-patterns)
|
|
10
|
+
- [Prompt Engineering](#prompt-engineering)
|
|
11
|
+
- [Token Optimization](#token-optimization)
|
|
12
|
+
- [Cost Management](#cost-management)
|
|
13
|
+
- [Error Handling](#error-handling)
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## API Integration Patterns
|
|
18
|
+
|
|
19
|
+
### Provider Abstraction Layer
|
|
20
|
+
|
|
21
|
+
```python
|
|
22
|
+
from abc import ABC, abstractmethod
|
|
23
|
+
from typing import List, Dict, Any
|
|
24
|
+
|
|
25
|
+
class LLMProvider(ABC):
|
|
26
|
+
"""Abstract base class for LLM providers."""
|
|
27
|
+
|
|
28
|
+
@abstractmethod
|
|
29
|
+
def complete(self, prompt: str, **kwargs) -> str:
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
@abstractmethod
|
|
33
|
+
def chat(self, messages: List[Dict], **kwargs) -> str:
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
class OpenAIProvider(LLMProvider):
|
|
37
|
+
def __init__(self, api_key: str, model: str = "gpt-4"):
|
|
38
|
+
self.client = OpenAI(api_key=api_key)
|
|
39
|
+
self.model = model
|
|
40
|
+
|
|
41
|
+
def complete(self, prompt: str, **kwargs) -> str:
|
|
42
|
+
response = self.client.completions.create(
|
|
43
|
+
model=self.model,
|
|
44
|
+
prompt=prompt,
|
|
45
|
+
**kwargs
|
|
46
|
+
)
|
|
47
|
+
return response.choices[0].text
|
|
48
|
+
|
|
49
|
+
class AnthropicProvider(LLMProvider):
|
|
50
|
+
def __init__(self, api_key: str, model: str = "claude-3-opus"):
|
|
51
|
+
self.client = Anthropic(api_key=api_key)
|
|
52
|
+
self.model = model
|
|
53
|
+
|
|
54
|
+
def chat(self, messages: List[Dict], **kwargs) -> str:
|
|
55
|
+
response = self.client.messages.create(
|
|
56
|
+
model=self.model,
|
|
57
|
+
messages=messages,
|
|
58
|
+
**kwargs
|
|
59
|
+
)
|
|
60
|
+
return response.content[0].text
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### Retry and Fallback Strategy
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
import time
|
|
67
|
+
from tenacity import retry, stop_after_attempt, wait_exponential
|
|
68
|
+
|
|
69
|
+
@retry(
|
|
70
|
+
stop=stop_after_attempt(3),
|
|
71
|
+
wait=wait_exponential(multiplier=1, min=1, max=10)
|
|
72
|
+
)
|
|
73
|
+
def call_llm_with_retry(provider: LLMProvider, prompt: str) -> str:
|
|
74
|
+
"""Call LLM with exponential backoff retry."""
|
|
75
|
+
return provider.complete(prompt)
|
|
76
|
+
|
|
77
|
+
def call_with_fallback(
|
|
78
|
+
primary: LLMProvider,
|
|
79
|
+
fallback: LLMProvider,
|
|
80
|
+
prompt: str
|
|
81
|
+
) -> str:
|
|
82
|
+
"""Try primary provider, fall back on failure."""
|
|
83
|
+
try:
|
|
84
|
+
return call_llm_with_retry(primary, prompt)
|
|
85
|
+
except Exception as e:
|
|
86
|
+
logger.warning(f"Primary provider failed: {e}, using fallback")
|
|
87
|
+
return call_llm_with_retry(fallback, prompt)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## Prompt Engineering
|
|
93
|
+
|
|
94
|
+
### Prompt Templates
|
|
95
|
+
|
|
96
|
+
| Pattern | Use Case | Structure |
|
|
97
|
+
|---------|----------|-----------|
|
|
98
|
+
| Zero-shot | Simple tasks | Task description + input |
|
|
99
|
+
| Few-shot | Complex tasks | Examples + task + input |
|
|
100
|
+
| Chain-of-thought | Reasoning | "Think step by step" + task |
|
|
101
|
+
| Role-based | Specialized output | System role + task |
|
|
102
|
+
|
|
103
|
+
### Few-Shot Template
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
FEW_SHOT_TEMPLATE = """
|
|
107
|
+
You are a sentiment classifier. Classify the sentiment as positive, negative, or neutral.
|
|
108
|
+
|
|
109
|
+
Examples:
|
|
110
|
+
Input: "This product is amazing, I love it!"
|
|
111
|
+
Output: positive
|
|
112
|
+
|
|
113
|
+
Input: "Terrible experience, waste of money."
|
|
114
|
+
Output: negative
|
|
115
|
+
|
|
116
|
+
Input: "The product arrived on time."
|
|
117
|
+
Output: neutral
|
|
118
|
+
|
|
119
|
+
Now classify:
|
|
120
|
+
Input: "{user_input}"
|
|
121
|
+
Output:"""
|
|
122
|
+
|
|
123
|
+
def classify_sentiment(text: str, provider: LLMProvider) -> str:
|
|
124
|
+
prompt = FEW_SHOT_TEMPLATE.format(user_input=text)
|
|
125
|
+
response = provider.complete(prompt, max_tokens=10, temperature=0)
|
|
126
|
+
return response.strip().lower()
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### System Prompts for Consistency
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
SYSTEM_PROMPT = """You are a helpful assistant that answers questions about our product.
|
|
133
|
+
|
|
134
|
+
Guidelines:
|
|
135
|
+
- Be concise and direct
|
|
136
|
+
- Use bullet points for lists
|
|
137
|
+
- If unsure, say "I don't have that information"
|
|
138
|
+
- Never make up information
|
|
139
|
+
- Keep responses under 200 words
|
|
140
|
+
|
|
141
|
+
Product context:
|
|
142
|
+
{product_context}
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
def create_chat_messages(user_query: str, context: str) -> List[Dict]:
|
|
146
|
+
return [
|
|
147
|
+
{"role": "system", "content": SYSTEM_PROMPT.format(product_context=context)},
|
|
148
|
+
{"role": "user", "content": user_query}
|
|
149
|
+
]
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
---
|
|
153
|
+
|
|
154
|
+
## Token Optimization
|
|
155
|
+
|
|
156
|
+
### Token Counting
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
import tiktoken
|
|
160
|
+
|
|
161
|
+
def count_tokens(text: str, model: str = "gpt-4") -> int:
|
|
162
|
+
"""Count tokens for a given text and model."""
|
|
163
|
+
encoding = tiktoken.encoding_for_model(model)
|
|
164
|
+
return len(encoding.encode(text))
|
|
165
|
+
|
|
166
|
+
def truncate_to_token_limit(text: str, max_tokens: int, model: str = "gpt-4") -> str:
|
|
167
|
+
"""Truncate text to fit within token limit."""
|
|
168
|
+
encoding = tiktoken.encoding_for_model(model)
|
|
169
|
+
tokens = encoding.encode(text)
|
|
170
|
+
|
|
171
|
+
if len(tokens) <= max_tokens:
|
|
172
|
+
return text
|
|
173
|
+
|
|
174
|
+
return encoding.decode(tokens[:max_tokens])
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
### Context Window Management
|
|
178
|
+
|
|
179
|
+
| Model | Context Window | Effective Limit |
|
|
180
|
+
|-------|----------------|-----------------|
|
|
181
|
+
| GPT-4 | 8,192 | ~6,000 (leave room for response) |
|
|
182
|
+
| GPT-4-32k | 32,768 | ~28,000 |
|
|
183
|
+
| Claude 3 | 200,000 | ~180,000 |
|
|
184
|
+
| Llama 3 | 8,192 | ~6,000 |
|
|
185
|
+
|
|
186
|
+
### Chunking Strategy
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
def chunk_text(text: str, chunk_size: int = 1000, overlap: int = 100) -> List[str]:
|
|
190
|
+
"""Split text into overlapping chunks."""
|
|
191
|
+
chunks = []
|
|
192
|
+
start = 0
|
|
193
|
+
|
|
194
|
+
while start < len(text):
|
|
195
|
+
end = start + chunk_size
|
|
196
|
+
chunk = text[start:end]
|
|
197
|
+
chunks.append(chunk)
|
|
198
|
+
start = end - overlap
|
|
199
|
+
|
|
200
|
+
return chunks
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
## Cost Management
|
|
206
|
+
|
|
207
|
+
### Cost Calculation
|
|
208
|
+
|
|
209
|
+
| Provider | Input Cost | Output Cost | Example (1K tokens) |
|
|
210
|
+
|----------|------------|-------------|---------------------|
|
|
211
|
+
| GPT-4 | $0.03/1K | $0.06/1K | $0.09 |
|
|
212
|
+
| GPT-3.5 | $0.0005/1K | $0.0015/1K | $0.002 |
|
|
213
|
+
| Claude 3 Opus | $0.015/1K | $0.075/1K | $0.09 |
|
|
214
|
+
| Claude 3 Haiku | $0.00025/1K | $0.00125/1K | $0.0015 |
|
|
215
|
+
|
|
216
|
+
### Cost Tracking
|
|
217
|
+
|
|
218
|
+
```python
|
|
219
|
+
from dataclasses import dataclass
|
|
220
|
+
from typing import Optional
|
|
221
|
+
|
|
222
|
+
@dataclass
|
|
223
|
+
class LLMUsage:
|
|
224
|
+
input_tokens: int
|
|
225
|
+
output_tokens: int
|
|
226
|
+
model: str
|
|
227
|
+
cost: float
|
|
228
|
+
|
|
229
|
+
def calculate_cost(
|
|
230
|
+
input_tokens: int,
|
|
231
|
+
output_tokens: int,
|
|
232
|
+
model: str
|
|
233
|
+
) -> float:
|
|
234
|
+
"""Calculate cost based on token usage."""
|
|
235
|
+
PRICING = {
|
|
236
|
+
"gpt-4": {"input": 0.03, "output": 0.06},
|
|
237
|
+
"gpt-3.5-turbo": {"input": 0.0005, "output": 0.0015},
|
|
238
|
+
"claude-3-opus": {"input": 0.015, "output": 0.075},
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
prices = PRICING.get(model, {"input": 0.01, "output": 0.03})
|
|
242
|
+
|
|
243
|
+
input_cost = (input_tokens / 1000) * prices["input"]
|
|
244
|
+
output_cost = (output_tokens / 1000) * prices["output"]
|
|
245
|
+
|
|
246
|
+
return input_cost + output_cost
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
### Cost Optimization Strategies
|
|
250
|
+
|
|
251
|
+
1. **Use smaller models for simple tasks** - GPT-3.5 for classification, GPT-4 for reasoning
|
|
252
|
+
2. **Cache common responses** - Store results for repeated queries
|
|
253
|
+
3. **Batch requests** - Combine multiple items in single prompt
|
|
254
|
+
4. **Truncate context** - Only include relevant information
|
|
255
|
+
5. **Set max_tokens limit** - Prevent runaway responses
|
|
256
|
+
|
|
257
|
+
---
|
|
258
|
+
|
|
259
|
+
## Error Handling
|
|
260
|
+
|
|
261
|
+
### Common Error Types
|
|
262
|
+
|
|
263
|
+
| Error | Cause | Handling |
|
|
264
|
+
|-------|-------|----------|
|
|
265
|
+
| RateLimitError | Too many requests | Exponential backoff |
|
|
266
|
+
| InvalidRequestError | Bad input | Validate before sending |
|
|
267
|
+
| AuthenticationError | Invalid API key | Check credentials |
|
|
268
|
+
| ServiceUnavailable | Provider down | Fallback to alternative |
|
|
269
|
+
| ContextLengthExceeded | Input too long | Truncate or chunk |
|
|
270
|
+
|
|
271
|
+
### Error Handling Pattern
|
|
272
|
+
|
|
273
|
+
```python
|
|
274
|
+
from openai import RateLimitError, APIError
|
|
275
|
+
|
|
276
|
+
def safe_llm_call(provider: LLMProvider, prompt: str, max_retries: int = 3) -> str:
|
|
277
|
+
"""Safely call LLM with comprehensive error handling."""
|
|
278
|
+
for attempt in range(max_retries):
|
|
279
|
+
try:
|
|
280
|
+
return provider.complete(prompt)
|
|
281
|
+
|
|
282
|
+
except RateLimitError:
|
|
283
|
+
wait_time = 2 ** attempt
|
|
284
|
+
logger.warning(f"Rate limited, waiting {wait_time}s")
|
|
285
|
+
time.sleep(wait_time)
|
|
286
|
+
|
|
287
|
+
except APIError as e:
|
|
288
|
+
if e.status_code >= 500:
|
|
289
|
+
logger.warning(f"Server error: {e}, retrying...")
|
|
290
|
+
time.sleep(1)
|
|
291
|
+
else:
|
|
292
|
+
raise
|
|
293
|
+
|
|
294
|
+
raise Exception(f"Failed after {max_retries} attempts")
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
### Response Validation
|
|
298
|
+
|
|
299
|
+
```python
|
|
300
|
+
import json
|
|
301
|
+
from pydantic import BaseModel, ValidationError
|
|
302
|
+
|
|
303
|
+
class StructuredResponse(BaseModel):
|
|
304
|
+
answer: str
|
|
305
|
+
confidence: float
|
|
306
|
+
sources: List[str]
|
|
307
|
+
|
|
308
|
+
def parse_structured_response(response: str) -> StructuredResponse:
|
|
309
|
+
"""Parse and validate LLM JSON response."""
|
|
310
|
+
try:
|
|
311
|
+
data = json.loads(response)
|
|
312
|
+
return StructuredResponse(**data)
|
|
313
|
+
except json.JSONDecodeError:
|
|
314
|
+
raise ValueError("Response is not valid JSON")
|
|
315
|
+
except ValidationError as e:
|
|
316
|
+
raise ValueError(f"Response validation failed: {e}")
|
|
317
|
+
```
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
# MLOps Production Patterns
|
|
2
|
+
|
|
3
|
+
Production ML infrastructure patterns for model deployment, monitoring, and lifecycle management.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Table of Contents
|
|
8
|
+
|
|
9
|
+
- [Model Deployment Pipeline](#model-deployment-pipeline)
|
|
10
|
+
- [Feature Store Architecture](#feature-store-architecture)
|
|
11
|
+
- [Model Monitoring](#model-monitoring)
|
|
12
|
+
- [A/B Testing Infrastructure](#ab-testing-infrastructure)
|
|
13
|
+
- [Automated Retraining](#automated-retraining)
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## Model Deployment Pipeline
|
|
18
|
+
|
|
19
|
+
### Deployment Workflow
|
|
20
|
+
|
|
21
|
+
1. Export trained model to standardized format (ONNX, TorchScript, SavedModel)
|
|
22
|
+
2. Package model with dependencies in Docker container
|
|
23
|
+
3. Deploy to staging environment
|
|
24
|
+
4. Run integration tests against staging
|
|
25
|
+
5. Deploy canary (5% traffic) to production
|
|
26
|
+
6. Monitor latency and error rates for 1 hour
|
|
27
|
+
7. Promote to full production if metrics pass
|
|
28
|
+
8. **Validation:** p95 latency < 100ms, error rate < 0.1%
|
|
29
|
+
|
|
30
|
+
### Container Structure
|
|
31
|
+
|
|
32
|
+
```dockerfile
|
|
33
|
+
FROM python:3.11-slim
|
|
34
|
+
|
|
35
|
+
# Install dependencies
|
|
36
|
+
COPY requirements.txt .
|
|
37
|
+
RUN pip install --no-cache-dir -r requirements.txt
|
|
38
|
+
|
|
39
|
+
# Copy model artifacts
|
|
40
|
+
COPY model/ /app/model/
|
|
41
|
+
COPY src/ /app/src/
|
|
42
|
+
|
|
43
|
+
# Health check endpoint
|
|
44
|
+
HEALTHCHECK CMD curl -f http://localhost:8080/health || exit 1
|
|
45
|
+
|
|
46
|
+
EXPOSE 8080
|
|
47
|
+
CMD ["uvicorn", "src.server:app", "--host", "0.0.0.0", "--port", "8080"]
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Model Serving Options
|
|
51
|
+
|
|
52
|
+
| Option | Latency | Throughput | Use Case |
|
|
53
|
+
|--------|---------|------------|----------|
|
|
54
|
+
| FastAPI + Uvicorn | Low | Medium | REST APIs, small models |
|
|
55
|
+
| Triton Inference Server | Very Low | Very High | GPU inference, batching |
|
|
56
|
+
| TensorFlow Serving | Low | High | TensorFlow models |
|
|
57
|
+
| TorchServe | Low | High | PyTorch models |
|
|
58
|
+
| Ray Serve | Medium | High | Complex pipelines, multi-model |
|
|
59
|
+
|
|
60
|
+
### Kubernetes Deployment
|
|
61
|
+
|
|
62
|
+
```yaml
|
|
63
|
+
apiVersion: apps/v1
|
|
64
|
+
kind: Deployment
|
|
65
|
+
metadata:
|
|
66
|
+
name: model-serving
|
|
67
|
+
spec:
|
|
68
|
+
replicas: 3
|
|
69
|
+
selector:
|
|
70
|
+
matchLabels:
|
|
71
|
+
app: model-serving
|
|
72
|
+
template:
|
|
73
|
+
spec:
|
|
74
|
+
containers:
|
|
75
|
+
- name: model
|
|
76
|
+
image: model:v1.0.0
|
|
77
|
+
resources:
|
|
78
|
+
requests:
|
|
79
|
+
memory: "2Gi"
|
|
80
|
+
cpu: "1"
|
|
81
|
+
limits:
|
|
82
|
+
memory: "4Gi"
|
|
83
|
+
cpu: "2"
|
|
84
|
+
readinessProbe:
|
|
85
|
+
httpGet:
|
|
86
|
+
path: /health
|
|
87
|
+
port: 8080
|
|
88
|
+
initialDelaySeconds: 10
|
|
89
|
+
periodSeconds: 5
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## Feature Store Architecture
|
|
95
|
+
|
|
96
|
+
### Feature Store Components
|
|
97
|
+
|
|
98
|
+
| Component | Purpose | Tools |
|
|
99
|
+
|-----------|---------|-------|
|
|
100
|
+
| Offline Store | Training data, batch features | BigQuery, Snowflake, S3 |
|
|
101
|
+
| Online Store | Low-latency serving | Redis, DynamoDB, Feast |
|
|
102
|
+
| Feature Registry | Metadata, lineage | Feast, Tecton, Hopsworks |
|
|
103
|
+
| Transformation | Feature engineering | Spark, Flink, dbt |
|
|
104
|
+
|
|
105
|
+
### Feature Pipeline Workflow
|
|
106
|
+
|
|
107
|
+
1. Define feature schema in registry
|
|
108
|
+
2. Implement transformation logic (SQL or Python)
|
|
109
|
+
3. Backfill historical features to offline store
|
|
110
|
+
4. Schedule incremental updates
|
|
111
|
+
5. Materialize to online store for serving
|
|
112
|
+
6. Monitor feature freshness and quality
|
|
113
|
+
7. **Validation:** Feature values within expected ranges, no nulls in required fields
|
|
114
|
+
|
|
115
|
+
### Feature Definition Example
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
from feast import Entity, Feature, FeatureView, FileSource
|
|
119
|
+
|
|
120
|
+
user = Entity(name="user_id", value_type=ValueType.INT64)
|
|
121
|
+
|
|
122
|
+
user_features = FeatureView(
|
|
123
|
+
name="user_features",
|
|
124
|
+
entities=["user_id"],
|
|
125
|
+
ttl=timedelta(days=1),
|
|
126
|
+
features=[
|
|
127
|
+
Feature(name="purchase_count_30d", dtype=ValueType.INT64),
|
|
128
|
+
Feature(name="avg_order_value", dtype=ValueType.FLOAT),
|
|
129
|
+
Feature(name="days_since_last_purchase", dtype=ValueType.INT64),
|
|
130
|
+
],
|
|
131
|
+
online=True,
|
|
132
|
+
source=FileSource(path="data/user_features.parquet"),
|
|
133
|
+
)
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## Model Monitoring
|
|
139
|
+
|
|
140
|
+
### Monitoring Dimensions
|
|
141
|
+
|
|
142
|
+
| Dimension | Metrics | Alert Threshold |
|
|
143
|
+
|-----------|---------|-----------------|
|
|
144
|
+
| Latency | p50, p95, p99 | p95 > 100ms |
|
|
145
|
+
| Throughput | requests/sec | < 80% baseline |
|
|
146
|
+
| Errors | error rate, 5xx count | > 0.1% |
|
|
147
|
+
| Data Drift | PSI, KS statistic | PSI > 0.2 |
|
|
148
|
+
| Model Drift | accuracy, AUC decay | > 5% drop |
|
|
149
|
+
|
|
150
|
+
### Data Drift Detection
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
from scipy.stats import ks_2samp
|
|
154
|
+
import numpy as np
|
|
155
|
+
|
|
156
|
+
def detect_drift(reference: np.array, current: np.array, threshold: float = 0.05):
|
|
157
|
+
"""Detect distribution drift using Kolmogorov-Smirnov test."""
|
|
158
|
+
statistic, p_value = ks_2samp(reference, current)
|
|
159
|
+
|
|
160
|
+
drift_detected = p_value < threshold
|
|
161
|
+
|
|
162
|
+
return {
|
|
163
|
+
"drift_detected": drift_detected,
|
|
164
|
+
"ks_statistic": statistic,
|
|
165
|
+
"p_value": p_value,
|
|
166
|
+
"threshold": threshold
|
|
167
|
+
}
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### Monitoring Dashboard Metrics
|
|
171
|
+
|
|
172
|
+
**Infrastructure:**
|
|
173
|
+
- Request latency (p50, p95, p99)
|
|
174
|
+
- Requests per second
|
|
175
|
+
- Error rate by type
|
|
176
|
+
- CPU/memory utilization
|
|
177
|
+
- GPU utilization (if applicable)
|
|
178
|
+
|
|
179
|
+
**Model Performance:**
|
|
180
|
+
- Prediction distribution
|
|
181
|
+
- Feature value distributions
|
|
182
|
+
- Model output confidence
|
|
183
|
+
- Ground truth vs predictions (when available)
|
|
184
|
+
|
|
185
|
+
---
|
|
186
|
+
|
|
187
|
+
## A/B Testing Infrastructure
|
|
188
|
+
|
|
189
|
+
### Experiment Workflow
|
|
190
|
+
|
|
191
|
+
1. Define experiment hypothesis and success metrics
|
|
192
|
+
2. Calculate required sample size for statistical power
|
|
193
|
+
3. Configure traffic split (control vs treatment)
|
|
194
|
+
4. Deploy treatment model alongside control
|
|
195
|
+
5. Route traffic based on user/session hash
|
|
196
|
+
6. Collect metrics for both variants
|
|
197
|
+
7. Run statistical significance test
|
|
198
|
+
8. **Validation:** p-value < 0.05, minimum sample size reached
|
|
199
|
+
|
|
200
|
+
### Traffic Splitting
|
|
201
|
+
|
|
202
|
+
```python
|
|
203
|
+
import hashlib
|
|
204
|
+
|
|
205
|
+
def get_variant(user_id: str, experiment: str, control_pct: float = 0.5) -> str:
|
|
206
|
+
"""Deterministic traffic splitting based on user ID."""
|
|
207
|
+
hash_input = f"{user_id}:{experiment}"
|
|
208
|
+
hash_value = int(hashlib.md5(hash_input.encode()).hexdigest(), 16)
|
|
209
|
+
bucket = (hash_value % 100) / 100.0
|
|
210
|
+
|
|
211
|
+
return "control" if bucket < control_pct else "treatment"
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
### Metrics Collection
|
|
215
|
+
|
|
216
|
+
| Metric Type | Examples | Collection Method |
|
|
217
|
+
|-------------|----------|-------------------|
|
|
218
|
+
| Primary | Conversion rate, revenue | Event logging |
|
|
219
|
+
| Secondary | Latency, engagement | Request logs |
|
|
220
|
+
| Guardrail | Error rate, crashes | Monitoring system |
|
|
221
|
+
|
|
222
|
+
---
|
|
223
|
+
|
|
224
|
+
## Automated Retraining
|
|
225
|
+
|
|
226
|
+
### Retraining Triggers
|
|
227
|
+
|
|
228
|
+
| Trigger | Detection Method | Action |
|
|
229
|
+
|---------|------------------|--------|
|
|
230
|
+
| Scheduled | Cron (weekly/monthly) | Full retrain |
|
|
231
|
+
| Performance drop | Accuracy < threshold | Immediate retrain |
|
|
232
|
+
| Data drift | PSI > 0.2 | Evaluate, then retrain |
|
|
233
|
+
| New data volume | X new samples | Incremental update |
|
|
234
|
+
|
|
235
|
+
### Retraining Pipeline
|
|
236
|
+
|
|
237
|
+
1. Trigger detection (schedule, drift, performance)
|
|
238
|
+
2. Fetch latest training data from feature store
|
|
239
|
+
3. Run training job with hyperparameter config
|
|
240
|
+
4. Evaluate model on holdout set
|
|
241
|
+
5. Compare against production model
|
|
242
|
+
6. If improved: register new model version
|
|
243
|
+
7. Deploy to staging for validation
|
|
244
|
+
8. Promote to production via canary
|
|
245
|
+
9. **Validation:** New model outperforms baseline on key metrics
|
|
246
|
+
|
|
247
|
+
### MLflow Model Registry Integration
|
|
248
|
+
|
|
249
|
+
```python
|
|
250
|
+
import mlflow
|
|
251
|
+
|
|
252
|
+
def register_model(model, metrics: dict, model_name: str):
|
|
253
|
+
"""Register trained model with MLflow."""
|
|
254
|
+
with mlflow.start_run():
|
|
255
|
+
# Log metrics
|
|
256
|
+
for name, value in metrics.items():
|
|
257
|
+
mlflow.log_metric(name, value)
|
|
258
|
+
|
|
259
|
+
# Log model
|
|
260
|
+
mlflow.sklearn.log_model(model, "model")
|
|
261
|
+
|
|
262
|
+
# Register in model registry
|
|
263
|
+
model_uri = f"runs:/{mlflow.active_run().info.run_id}/model"
|
|
264
|
+
mlflow.register_model(model_uri, model_name)
|
|
265
|
+
```
|