agentic-team-templates 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/README.md +280 -0
  2. package/bin/cli.js +5 -0
  3. package/package.json +47 -0
  4. package/src/index.js +521 -0
  5. package/templates/_shared/code-quality.md +162 -0
  6. package/templates/_shared/communication.md +114 -0
  7. package/templates/_shared/core-principles.md +62 -0
  8. package/templates/_shared/git-workflow.md +165 -0
  9. package/templates/_shared/security-fundamentals.md +173 -0
  10. package/templates/blockchain/.cursorrules/defi-patterns.md +520 -0
  11. package/templates/blockchain/.cursorrules/gas-optimization.md +339 -0
  12. package/templates/blockchain/.cursorrules/overview.md +130 -0
  13. package/templates/blockchain/.cursorrules/security.md +318 -0
  14. package/templates/blockchain/.cursorrules/smart-contracts.md +364 -0
  15. package/templates/blockchain/.cursorrules/testing.md +415 -0
  16. package/templates/blockchain/.cursorrules/web3-integration.md +538 -0
  17. package/templates/blockchain/CLAUDE.md +389 -0
  18. package/templates/cli-tools/.cursorrules/architecture.md +412 -0
  19. package/templates/cli-tools/.cursorrules/arguments.md +406 -0
  20. package/templates/cli-tools/.cursorrules/distribution.md +546 -0
  21. package/templates/cli-tools/.cursorrules/error-handling.md +455 -0
  22. package/templates/cli-tools/.cursorrules/overview.md +136 -0
  23. package/templates/cli-tools/.cursorrules/testing.md +537 -0
  24. package/templates/cli-tools/.cursorrules/user-experience.md +545 -0
  25. package/templates/cli-tools/CLAUDE.md +356 -0
  26. package/templates/data-engineering/.cursorrules/data-modeling.md +367 -0
  27. package/templates/data-engineering/.cursorrules/data-quality.md +455 -0
  28. package/templates/data-engineering/.cursorrules/overview.md +85 -0
  29. package/templates/data-engineering/.cursorrules/performance.md +339 -0
  30. package/templates/data-engineering/.cursorrules/pipeline-design.md +280 -0
  31. package/templates/data-engineering/.cursorrules/security.md +460 -0
  32. package/templates/data-engineering/.cursorrules/testing.md +452 -0
  33. package/templates/data-engineering/CLAUDE.md +974 -0
  34. package/templates/devops-sre/.cursorrules/capacity-planning.md +653 -0
  35. package/templates/devops-sre/.cursorrules/change-management.md +584 -0
  36. package/templates/devops-sre/.cursorrules/chaos-engineering.md +651 -0
  37. package/templates/devops-sre/.cursorrules/disaster-recovery.md +641 -0
  38. package/templates/devops-sre/.cursorrules/incident-management.md +565 -0
  39. package/templates/devops-sre/.cursorrules/observability.md +714 -0
  40. package/templates/devops-sre/.cursorrules/overview.md +230 -0
  41. package/templates/devops-sre/.cursorrules/postmortems.md +588 -0
  42. package/templates/devops-sre/.cursorrules/runbooks.md +760 -0
  43. package/templates/devops-sre/.cursorrules/slo-sli.md +617 -0
  44. package/templates/devops-sre/.cursorrules/toil-reduction.md +567 -0
  45. package/templates/devops-sre/CLAUDE.md +1007 -0
  46. package/templates/documentation/.cursorrules/adr.md +277 -0
  47. package/templates/documentation/.cursorrules/api-documentation.md +411 -0
  48. package/templates/documentation/.cursorrules/code-comments.md +253 -0
  49. package/templates/documentation/.cursorrules/maintenance.md +260 -0
  50. package/templates/documentation/.cursorrules/overview.md +82 -0
  51. package/templates/documentation/.cursorrules/readme-standards.md +306 -0
  52. package/templates/documentation/CLAUDE.md +120 -0
  53. package/templates/fullstack/.cursorrules/api-contracts.md +331 -0
  54. package/templates/fullstack/.cursorrules/architecture.md +298 -0
  55. package/templates/fullstack/.cursorrules/overview.md +109 -0
  56. package/templates/fullstack/.cursorrules/shared-types.md +348 -0
  57. package/templates/fullstack/.cursorrules/testing.md +386 -0
  58. package/templates/fullstack/CLAUDE.md +349 -0
  59. package/templates/ml-ai/.cursorrules/data-engineering.md +483 -0
  60. package/templates/ml-ai/.cursorrules/deployment.md +601 -0
  61. package/templates/ml-ai/.cursorrules/model-development.md +538 -0
  62. package/templates/ml-ai/.cursorrules/monitoring.md +658 -0
  63. package/templates/ml-ai/.cursorrules/overview.md +131 -0
  64. package/templates/ml-ai/.cursorrules/security.md +637 -0
  65. package/templates/ml-ai/.cursorrules/testing.md +678 -0
  66. package/templates/ml-ai/CLAUDE.md +1136 -0
  67. package/templates/mobile/.cursorrules/navigation.md +246 -0
  68. package/templates/mobile/.cursorrules/offline-first.md +302 -0
  69. package/templates/mobile/.cursorrules/overview.md +71 -0
  70. package/templates/mobile/.cursorrules/performance.md +345 -0
  71. package/templates/mobile/.cursorrules/testing.md +339 -0
  72. package/templates/mobile/CLAUDE.md +233 -0
  73. package/templates/platform-engineering/.cursorrules/ci-cd.md +778 -0
  74. package/templates/platform-engineering/.cursorrules/developer-experience.md +632 -0
  75. package/templates/platform-engineering/.cursorrules/infrastructure-as-code.md +600 -0
  76. package/templates/platform-engineering/.cursorrules/kubernetes.md +710 -0
  77. package/templates/platform-engineering/.cursorrules/observability.md +747 -0
  78. package/templates/platform-engineering/.cursorrules/overview.md +215 -0
  79. package/templates/platform-engineering/.cursorrules/security.md +855 -0
  80. package/templates/platform-engineering/.cursorrules/testing.md +878 -0
  81. package/templates/platform-engineering/CLAUDE.md +850 -0
  82. package/templates/utility-agent/.cursorrules/action-control.md +284 -0
  83. package/templates/utility-agent/.cursorrules/context-management.md +186 -0
  84. package/templates/utility-agent/.cursorrules/hallucination-prevention.md +253 -0
  85. package/templates/utility-agent/.cursorrules/overview.md +78 -0
  86. package/templates/utility-agent/.cursorrules/token-optimization.md +369 -0
  87. package/templates/utility-agent/CLAUDE.md +513 -0
  88. package/templates/web-backend/.cursorrules/api-design.md +255 -0
  89. package/templates/web-backend/.cursorrules/authentication.md +309 -0
  90. package/templates/web-backend/.cursorrules/database-patterns.md +298 -0
  91. package/templates/web-backend/.cursorrules/error-handling.md +366 -0
  92. package/templates/web-backend/.cursorrules/overview.md +69 -0
  93. package/templates/web-backend/.cursorrules/security.md +358 -0
  94. package/templates/web-backend/.cursorrules/testing.md +395 -0
  95. package/templates/web-backend/CLAUDE.md +366 -0
  96. package/templates/web-frontend/.cursorrules/accessibility.md +296 -0
  97. package/templates/web-frontend/.cursorrules/component-patterns.md +204 -0
  98. package/templates/web-frontend/.cursorrules/overview.md +72 -0
  99. package/templates/web-frontend/.cursorrules/performance.md +325 -0
  100. package/templates/web-frontend/.cursorrules/state-management.md +227 -0
  101. package/templates/web-frontend/.cursorrules/styling.md +271 -0
  102. package/templates/web-frontend/.cursorrules/testing.md +311 -0
  103. package/templates/web-frontend/CLAUDE.md +399 -0
@@ -0,0 +1,601 @@
1
+ # Model Deployment
2
+
3
+ Guidelines for deploying machine learning models to production, including serving patterns, scaling strategies, and infrastructure configuration.
4
+
5
+ ## Deployment Patterns
6
+
7
+ ### Real-Time Inference
8
+
9
+ For low-latency, synchronous predictions:
10
+
11
+ ```yaml
12
+ # kserve/inference-service.yaml
13
+ apiVersion: serving.kserve.io/v1beta1
14
+ kind: InferenceService
15
+ metadata:
16
+ name: fraud-detector
17
+ annotations:
18
+ serving.kserve.io/deploymentMode: Serverless
19
+ spec:
20
+ predictor:
21
+ model:
22
+ modelFormat:
23
+ name: mlflow
24
+ storageUri: s3://models/fraud-detector/v1
25
+ resources:
26
+ limits:
27
+ cpu: "2"
28
+ memory: 4Gi
29
+ nvidia.com/gpu: "1"
30
+ requests:
31
+ cpu: "1"
32
+ memory: 2Gi
33
+ minReplicas: 2
34
+ maxReplicas: 10
35
+ scaleTarget: 100
36
+ scaleMetric: concurrency
37
+ ```
38
+
39
+ ### Batch Inference
40
+
41
+ For high-throughput, asynchronous predictions:
42
+
43
+ ```python
44
+ from prefect import flow, task
45
+ from prefect.tasks import task_input_hash
46
+
47
+ @task(
48
+ cache_key_fn=task_input_hash,
49
+ cache_expiration=timedelta(hours=1),
50
+ retries=3,
51
+ retry_delay_seconds=60,
52
+ )
53
+ def run_batch_inference(
54
+ data_path: str,
55
+ model_uri: str,
56
+ output_path: str,
57
+ batch_size: int = 10000,
58
+ ) -> str:
59
+ """Run batch inference on a dataset."""
60
+
61
+ model = mlflow.pyfunc.load_model(model_uri)
62
+
63
+ # Process in chunks to manage memory
64
+ chunks = pd.read_parquet(data_path, chunksize=batch_size)
65
+
66
+ results = []
67
+ for chunk in chunks:
68
+ predictions = model.predict(chunk)
69
+ chunk["prediction"] = predictions
70
+ chunk["model_version"] = model_uri.split("/")[-1]
71
+ chunk["inference_timestamp"] = datetime.utcnow()
72
+ results.append(chunk)
73
+
74
+ # Write results
75
+ output_df = pd.concat(results)
76
+ output_df.to_parquet(output_path, index=False)
77
+
78
+ return output_path
79
+
80
+ @flow(name="daily-batch-inference")
81
+ def batch_inference_pipeline(date: str):
82
+ """Daily batch inference pipeline."""
83
+ data_path = f"s3://data/features/{date}/"
84
+ model_uri = "models:/fraud-detector/Production"
85
+ output_path = f"s3://predictions/{date}/predictions.parquet"
86
+
87
+ run_batch_inference(data_path, model_uri, output_path)
88
+ ```
89
+
90
+ ### Streaming Inference
91
+
92
+ For real-time event processing:
93
+
94
+ ```python
95
+ from kafka import KafkaConsumer, KafkaProducer
96
+ import json
97
+
98
+ class StreamingPredictor:
99
+ """Process predictions from a Kafka stream."""
100
+
101
+ def __init__(self, model_uri: str, input_topic: str, output_topic: str):
102
+ self.model = mlflow.pyfunc.load_model(model_uri)
103
+ self.consumer = KafkaConsumer(
104
+ input_topic,
105
+ bootstrap_servers=["kafka:9092"],
106
+ value_deserializer=lambda m: json.loads(m.decode("utf-8")),
107
+ group_id="inference-group",
108
+ auto_offset_reset="latest",
109
+ )
110
+ self.producer = KafkaProducer(
111
+ bootstrap_servers=["kafka:9092"],
112
+ value_serializer=lambda m: json.dumps(m).encode("utf-8"),
113
+ )
114
+ self.output_topic = output_topic
115
+
116
+ def run(self):
117
+ """Process messages continuously."""
118
+ for message in self.consumer:
119
+ try:
120
+ features = message.value
121
+ prediction = self.model.predict(pd.DataFrame([features]))[0]
122
+
123
+ result = {
124
+ "input": features,
125
+ "prediction": float(prediction),
126
+ "timestamp": datetime.utcnow().isoformat(),
127
+ "model_version": self.model_version,
128
+ }
129
+
130
+ self.producer.send(self.output_topic, result)
131
+
132
+ except Exception as e:
133
+ logger.error(f"Prediction failed: {e}", extra={"input": features})
134
+ ```
135
+
136
+ ## Custom Predictors
137
+
138
+ ### KServe Custom Predictor
139
+
140
+ ```python
141
+ from kserve import Model, ModelServer
142
+ from kserve.errors import ModelMissingError
143
+ import torch
144
+
145
+ class CustomPredictor(Model):
146
+ """Custom KServe predictor with preprocessing."""
147
+
148
+ def __init__(self, name: str):
149
+ super().__init__(name)
150
+ self.model = None
151
+ self.transformer = None
152
+ self.ready = False
153
+
154
+ def load(self) -> bool:
155
+ """Load model and artifacts."""
156
+ model_path = os.environ.get("MODEL_PATH", "/mnt/models")
157
+
158
+ # Load model
159
+ self.model = torch.jit.load(f"{model_path}/model.pt")
160
+ self.model.eval()
161
+
162
+ # Load preprocessing
163
+ self.transformer = FeatureTransformer.load(f"{model_path}/transformer.pkl")
164
+
165
+ # Load config
166
+ with open(f"{model_path}/config.yaml") as f:
167
+ self.config = yaml.safe_load(f)
168
+
169
+ self.ready = True
170
+ return self.ready
171
+
172
+ def preprocess(self, inputs: dict, headers: dict = None) -> torch.Tensor:
173
+ """Preprocess input data."""
174
+ df = pd.DataFrame(inputs["instances"])
175
+
176
+ # Validate
177
+ validated = self.validate_input(df)
178
+
179
+ # Transform
180
+ features = self.transformer.transform(validated)
181
+
182
+ return torch.tensor(features.values, dtype=torch.float32)
183
+
184
+ def predict(self, inputs: torch.Tensor, headers: dict = None) -> dict:
185
+ """Run inference."""
186
+ if not self.ready:
187
+ raise ModelMissingError(self.name)
188
+
189
+ with torch.no_grad():
190
+ logits = self.model(inputs)
191
+ probabilities = torch.sigmoid(logits).numpy()
192
+
193
+ return probabilities
194
+
195
+ def postprocess(self, outputs: np.ndarray, headers: dict = None) -> dict:
196
+ """Postprocess predictions."""
197
+ return {
198
+ "predictions": outputs.tolist(),
199
+ "model_version": os.environ.get("MODEL_VERSION", "unknown"),
200
+ "threshold": self.config.get("threshold", 0.5),
201
+ }
202
+
203
+ def validate_input(self, df: pd.DataFrame) -> pd.DataFrame:
204
+ """Validate input against schema."""
205
+ required_cols = self.config["required_features"]
206
+ missing = set(required_cols) - set(df.columns)
207
+ if missing:
208
+ raise ValueError(f"Missing required features: {missing}")
209
+
210
+ return df[required_cols]
211
+
212
+ if __name__ == "__main__":
213
+ model = CustomPredictor("fraud-detector")
214
+ ModelServer().start([model])
215
+ ```
216
+
217
+ ### FastAPI Serving
218
+
219
+ ```python
220
+ from fastapi import FastAPI, HTTPException
221
+ from pydantic import BaseModel, Field
222
+ import uvicorn
223
+
224
+ app = FastAPI(title="ML Model API", version="1.0.0")
225
+
226
+ class PredictionRequest(BaseModel):
227
+ features: dict[str, float]
228
+ request_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
229
+
230
+ class PredictionResponse(BaseModel):
231
+ prediction: float
232
+ probability: float
233
+ model_version: str
234
+ request_id: str
235
+ latency_ms: float
236
+
237
+ # Load model at startup
238
+ @app.on_event("startup")
239
+ async def load_model():
240
+ global model, transformer
241
+ model = mlflow.pyfunc.load_model("models:/fraud-detector/Production")
242
+ transformer = FeatureTransformer.load("transformer.pkl")
243
+
244
+ @app.post("/predict", response_model=PredictionResponse)
245
+ async def predict(request: PredictionRequest):
246
+ start_time = time.time()
247
+
248
+ try:
249
+ # Transform
250
+ df = pd.DataFrame([request.features])
251
+ features = transformer.transform(df)
252
+
253
+ # Predict
254
+ probability = model.predict(features)[0]
255
+ prediction = int(probability >= THRESHOLD)
256
+
257
+ latency_ms = (time.time() - start_time) * 1000
258
+
259
+ return PredictionResponse(
260
+ prediction=prediction,
261
+ probability=float(probability),
262
+ model_version=MODEL_VERSION,
263
+ request_id=request.request_id,
264
+ latency_ms=latency_ms,
265
+ )
266
+
267
+ except Exception as e:
268
+ logger.error(f"Prediction failed: {e}")
269
+ raise HTTPException(status_code=500, detail=str(e))
270
+
271
+ @app.get("/health")
272
+ async def health():
273
+ return {"status": "healthy", "model_loaded": model is not None}
274
+
275
+ if __name__ == "__main__":
276
+ uvicorn.run(app, host="0.0.0.0", port=8080)
277
+ ```
278
+
279
+ ## Containerization
280
+
281
+ ### Dockerfile
282
+
283
+ ```dockerfile
284
+ # Multi-stage build for smaller image
285
+ FROM python:3.11-slim as builder
286
+
287
+ WORKDIR /app
288
+
289
+ # Install build dependencies
290
+ RUN apt-get update && apt-get install -y --no-install-recommends \
291
+ build-essential \
292
+ && rm -rf /var/lib/apt/lists/*
293
+
294
+ # Install Python dependencies
295
+ COPY requirements.txt .
296
+ RUN pip wheel --no-cache-dir --wheel-dir /app/wheels -r requirements.txt
297
+
298
+ # Production image
299
+ FROM python:3.11-slim
300
+
301
+ WORKDIR /app
302
+
303
+ # Install runtime dependencies only
304
+ RUN apt-get update && apt-get install -y --no-install-recommends \
305
+ libgomp1 \
306
+ && rm -rf /var/lib/apt/lists/*
307
+
308
+ # Copy wheels and install
309
+ COPY --from=builder /app/wheels /wheels
310
+ RUN pip install --no-cache /wheels/*
311
+
312
+ # Copy application code
313
+ COPY src/ ./src/
314
+ COPY configs/ ./configs/
315
+
316
+ # Non-root user for security
317
+ RUN useradd -m -u 1000 appuser
318
+ USER appuser
319
+
320
+ # Health check
321
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
322
+ CMD curl -f http://localhost:8080/health || exit 1
323
+
324
+ EXPOSE 8080
325
+
326
+ CMD ["python", "-m", "src.serve"]
327
+ ```
328
+
329
+ ### GPU Support
330
+
331
+ ```dockerfile
332
+ FROM nvidia/cuda:12.1-runtime-ubuntu22.04
333
+
334
+ # Install Python
335
+ RUN apt-get update && apt-get install -y python3.11 python3-pip
336
+
337
+ # Install PyTorch with CUDA
338
+ RUN pip install torch==2.1.0+cu121 -f https://download.pytorch.org/whl/torch_stable.html
339
+
340
+ # ... rest of Dockerfile
341
+ ```
342
+
343
+ ## Scaling Strategies
344
+
345
+ ### Horizontal Pod Autoscaler
346
+
347
+ ```yaml
348
+ apiVersion: autoscaling/v2
349
+ kind: HorizontalPodAutoscaler
350
+ metadata:
351
+ name: model-server-hpa
352
+ spec:
353
+ scaleTargetRef:
354
+ apiVersion: apps/v1
355
+ kind: Deployment
356
+ name: model-server
357
+ minReplicas: 2
358
+ maxReplicas: 20
359
+ metrics:
360
+ - type: Resource
361
+ resource:
362
+ name: cpu
363
+ target:
364
+ type: Utilization
365
+ averageUtilization: 70
366
+ - type: Pods
367
+ pods:
368
+ metric:
369
+ name: requests_per_second
370
+ target:
371
+ type: AverageValue
372
+ averageValue: "100"
373
+ behavior:
374
+ scaleDown:
375
+ stabilizationWindowSeconds: 300
376
+ policies:
377
+ - type: Percent
378
+ value: 10
379
+ periodSeconds: 60
380
+ scaleUp:
381
+ stabilizationWindowSeconds: 0
382
+ policies:
383
+ - type: Percent
384
+ value: 100
385
+ periodSeconds: 15
386
+ ```
387
+
388
+ ### GPU Scheduling
389
+
390
+ ```yaml
391
+ apiVersion: v1
392
+ kind: Pod
393
+ metadata:
394
+ name: gpu-inference
395
+ spec:
396
+ containers:
397
+ - name: model-server
398
+ image: model-server:latest
399
+ resources:
400
+ limits:
401
+ nvidia.com/gpu: 1
402
+ memory: "16Gi"
403
+ requests:
404
+ nvidia.com/gpu: 1
405
+ memory: "8Gi"
406
+ nodeSelector:
407
+ accelerator: nvidia-tesla-t4
408
+ tolerations:
409
+ - key: nvidia.com/gpu
410
+ operator: Exists
411
+ effect: NoSchedule
412
+ ```
413
+
414
+ ## Model Loading Patterns
415
+
416
+ ### Lazy Loading
417
+
418
+ ```python
419
+ class LazyModel:
420
+ """Load model on first request."""
421
+
422
+ def __init__(self, model_uri: str):
423
+ self.model_uri = model_uri
424
+ self._model = None
425
+
426
+ @property
427
+ def model(self):
428
+ if self._model is None:
429
+ self._model = mlflow.pyfunc.load_model(self.model_uri)
430
+ return self._model
431
+
432
+ def predict(self, features):
433
+ return self.model.predict(features)
434
+ ```
435
+
436
+ ### Model Caching
437
+
438
+ ```python
439
+ from functools import lru_cache
440
+
441
+ @lru_cache(maxsize=3)
442
+ def load_model(model_uri: str):
443
+ """Cache loaded models."""
444
+ return mlflow.pyfunc.load_model(model_uri)
445
+
446
+ class ModelManager:
447
+ """Manage multiple model versions."""
448
+
449
+ def __init__(self):
450
+ self.models: dict[str, Any] = {}
451
+ self.default_version = "production"
452
+
453
+ def load_version(self, version: str) -> None:
454
+ """Load a specific model version."""
455
+ model_uri = f"models:/fraud-detector/{version}"
456
+ self.models[version] = mlflow.pyfunc.load_model(model_uri)
457
+
458
+ def predict(self, features, version: str = None) -> np.ndarray:
459
+ """Predict using specified or default version."""
460
+ version = version or self.default_version
461
+
462
+ if version not in self.models:
463
+ self.load_version(version)
464
+
465
+ return self.models[version].predict(features)
466
+ ```
467
+
468
+ ## A/B Testing
469
+
470
+ ### Traffic Splitting
471
+
472
+ ```yaml
473
+ # Istio VirtualService for traffic splitting
474
+ apiVersion: networking.istio.io/v1beta1
475
+ kind: VirtualService
476
+ metadata:
477
+ name: model-routing
478
+ spec:
479
+ hosts:
480
+ - model-service
481
+ http:
482
+ - match:
483
+ - headers:
484
+ x-model-version:
485
+ exact: "v2"
486
+ route:
487
+ - destination:
488
+ host: model-service-v2
489
+ - route:
490
+ - destination:
491
+ host: model-service-v1
492
+ weight: 90
493
+ - destination:
494
+ host: model-service-v2
495
+ weight: 10
496
+ ```
497
+
498
+ ### Shadow Deployment
499
+
500
+ ```python
501
+ class ShadowPredictor:
502
+ """Run predictions against shadow model for comparison."""
503
+
504
+ def __init__(self, primary_model, shadow_model):
505
+ self.primary = primary_model
506
+ self.shadow = shadow_model
507
+
508
+ async def predict(self, features):
509
+ # Run primary prediction (blocking)
510
+ primary_result = self.primary.predict(features)
511
+
512
+ # Run shadow prediction (non-blocking)
513
+ asyncio.create_task(self._shadow_predict(features, primary_result))
514
+
515
+ return primary_result
516
+
517
+ async def _shadow_predict(self, features, primary_result):
518
+ """Compare shadow predictions asynchronously."""
519
+ try:
520
+ shadow_result = self.shadow.predict(features)
521
+
522
+ # Log comparison
523
+ logger.info(
524
+ "shadow_comparison",
525
+ primary=primary_result,
526
+ shadow=shadow_result,
527
+ match=np.allclose(primary_result, shadow_result, rtol=0.01),
528
+ )
529
+ except Exception as e:
530
+ logger.error(f"Shadow prediction failed: {e}")
531
+ ```
532
+
533
+ ## Rollback Procedures
534
+
535
+ ```python
536
+ class ModelDeployer:
537
+ """Manage model deployments with rollback capability."""
538
+
539
+ def __init__(self, client: MlflowClient):
540
+ self.client = client
541
+
542
+ def deploy(self, model_name: str, version: str) -> None:
543
+ """Deploy a model version to production."""
544
+ # Record current production version for rollback
545
+ current_prod = self.client.get_latest_versions(model_name, stages=["Production"])
546
+ if current_prod:
547
+ self._record_rollback_version(model_name, current_prod[0].version)
548
+
549
+ # Transition to production
550
+ self.client.transition_model_version_stage(
551
+ name=model_name,
552
+ version=version,
553
+ stage="Production",
554
+ )
555
+
556
+ def rollback(self, model_name: str) -> str:
557
+ """Rollback to previous production version."""
558
+ previous_version = self._get_rollback_version(model_name)
559
+
560
+ if not previous_version:
561
+ raise ValueError("No rollback version available")
562
+
563
+ # Archive current
564
+ current_prod = self.client.get_latest_versions(model_name, stages=["Production"])
565
+ for v in current_prod:
566
+ self.client.transition_model_version_stage(
567
+ name=model_name,
568
+ version=v.version,
569
+ stage="Archived",
570
+ )
571
+
572
+ # Restore previous
573
+ self.client.transition_model_version_stage(
574
+ name=model_name,
575
+ version=previous_version,
576
+ stage="Production",
577
+ )
578
+
579
+ return previous_version
580
+ ```
581
+
582
+ ## Best Practices
583
+
584
+ ### Pre-Deployment Checklist
585
+
586
+ - [ ] Model validated on holdout test set
587
+ - [ ] Inference latency meets SLA
588
+ - [ ] Memory footprint acceptable
589
+ - [ ] Load testing completed
590
+ - [ ] Rollback procedure documented
591
+ - [ ] Monitoring configured
592
+ - [ ] Feature transformer included
593
+
594
+ ### Post-Deployment Checklist
595
+
596
+ - [ ] Health checks passing
597
+ - [ ] Predictions flowing to monitoring
598
+ - [ ] Alerts configured
599
+ - [ ] A/B test metrics tracking
600
+ - [ ] Shadow comparison (if applicable)
601
+ - [ ] Documentation updated