omgkit 2.19.3 → 2.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/README.md +537 -338
  2. package/package.json +2 -2
  3. package/plugin/agents/ai-architect-agent.md +282 -0
  4. package/plugin/agents/data-scientist-agent.md +221 -0
  5. package/plugin/agents/experiment-analyst-agent.md +318 -0
  6. package/plugin/agents/ml-engineer-agent.md +165 -0
  7. package/plugin/agents/mlops-engineer-agent.md +324 -0
  8. package/plugin/agents/model-optimizer-agent.md +287 -0
  9. package/plugin/agents/production-engineer-agent.md +360 -0
  10. package/plugin/agents/research-scientist-agent.md +274 -0
  11. package/plugin/commands/omgdata/augment.md +86 -0
  12. package/plugin/commands/omgdata/collect.md +81 -0
  13. package/plugin/commands/omgdata/label.md +83 -0
  14. package/plugin/commands/omgdata/split.md +83 -0
  15. package/plugin/commands/omgdata/validate.md +76 -0
  16. package/plugin/commands/omgdata/version.md +85 -0
  17. package/plugin/commands/omgdeploy/ab.md +94 -0
  18. package/plugin/commands/omgdeploy/cloud.md +89 -0
  19. package/plugin/commands/omgdeploy/edge.md +93 -0
  20. package/plugin/commands/omgdeploy/package.md +91 -0
  21. package/plugin/commands/omgdeploy/serve.md +92 -0
  22. package/plugin/commands/omgfeature/embed.md +93 -0
  23. package/plugin/commands/omgfeature/extract.md +93 -0
  24. package/plugin/commands/omgfeature/select.md +85 -0
  25. package/plugin/commands/omgfeature/store.md +97 -0
  26. package/plugin/commands/omgml/init.md +60 -0
  27. package/plugin/commands/omgml/status.md +82 -0
  28. package/plugin/commands/omgops/drift.md +87 -0
  29. package/plugin/commands/omgops/monitor.md +99 -0
  30. package/plugin/commands/omgops/pipeline.md +102 -0
  31. package/plugin/commands/omgops/registry.md +109 -0
  32. package/plugin/commands/omgops/retrain.md +91 -0
  33. package/plugin/commands/omgoptim/distill.md +90 -0
  34. package/plugin/commands/omgoptim/profile.md +92 -0
  35. package/plugin/commands/omgoptim/prune.md +81 -0
  36. package/plugin/commands/omgoptim/quantize.md +83 -0
  37. package/plugin/commands/omgtrain/baseline.md +78 -0
  38. package/plugin/commands/omgtrain/compare.md +99 -0
  39. package/plugin/commands/omgtrain/evaluate.md +85 -0
  40. package/plugin/commands/omgtrain/train.md +81 -0
  41. package/plugin/commands/omgtrain/tune.md +89 -0
  42. package/plugin/registry.yaml +252 -2
  43. package/plugin/skills/ml-systems/SKILL.md +65 -0
  44. package/plugin/skills/ml-systems/ai-accelerators/SKILL.md +342 -0
  45. package/plugin/skills/ml-systems/data-eng/SKILL.md +126 -0
  46. package/plugin/skills/ml-systems/deep-learning-primer/SKILL.md +143 -0
  47. package/plugin/skills/ml-systems/deployment-paradigms/SKILL.md +148 -0
  48. package/plugin/skills/ml-systems/dnn-architectures/SKILL.md +128 -0
  49. package/plugin/skills/ml-systems/edge-deployment/SKILL.md +366 -0
  50. package/plugin/skills/ml-systems/efficient-ai/SKILL.md +316 -0
  51. package/plugin/skills/ml-systems/feature-engineering/SKILL.md +151 -0
  52. package/plugin/skills/ml-systems/ml-frameworks/SKILL.md +187 -0
  53. package/plugin/skills/ml-systems/ml-serving-optimization/SKILL.md +371 -0
  54. package/plugin/skills/ml-systems/ml-systems-fundamentals/SKILL.md +103 -0
  55. package/plugin/skills/ml-systems/ml-workflow/SKILL.md +162 -0
  56. package/plugin/skills/ml-systems/mlops/SKILL.md +386 -0
  57. package/plugin/skills/ml-systems/model-deployment/SKILL.md +350 -0
  58. package/plugin/skills/ml-systems/model-dev/SKILL.md +160 -0
  59. package/plugin/skills/ml-systems/model-optimization/SKILL.md +339 -0
  60. package/plugin/skills/ml-systems/robust-ai/SKILL.md +395 -0
  61. package/plugin/skills/ml-systems/training-data/SKILL.md +152 -0
  62. package/plugin/workflows/ml-systems/data-preparation-workflow.md +276 -0
  63. package/plugin/workflows/ml-systems/edge-deployment-workflow.md +413 -0
  64. package/plugin/workflows/ml-systems/full-ml-lifecycle-workflow.md +405 -0
  65. package/plugin/workflows/ml-systems/hyperparameter-tuning-workflow.md +352 -0
  66. package/plugin/workflows/ml-systems/mlops-pipeline-workflow.md +384 -0
  67. package/plugin/workflows/ml-systems/model-deployment-workflow.md +392 -0
  68. package/plugin/workflows/ml-systems/model-development-workflow.md +218 -0
  69. package/plugin/workflows/ml-systems/model-evaluation-workflow.md +416 -0
  70. package/plugin/workflows/ml-systems/model-optimization-workflow.md +390 -0
  71. package/plugin/workflows/ml-systems/monitoring-drift-workflow.md +446 -0
  72. package/plugin/workflows/ml-systems/retraining-workflow.md +401 -0
  73. package/plugin/workflows/ml-systems/training-pipeline-workflow.md +382 -0
@@ -0,0 +1,350 @@
1
+ ---
2
+ name: model-deployment
3
+ description: Model deployment strategies including serving infrastructure, containerization, model packaging, versioning, and production deployment patterns.
4
+ ---
5
+
6
+ # Model Deployment
7
+
8
+ Deploying ML models to production.
9
+
10
+ ## Deployment Architecture
11
+
12
+ ```
13
+ ┌─────────────────────────────────────────────────────────────┐
14
+ │ ML DEPLOYMENT PATTERNS │
15
+ ├─────────────────────────────────────────────────────────────┤
16
+ │ │
17
+ │ BATCH INFERENCE REAL-TIME STREAMING │
18
+ │ ─────────────── ───────── ───────── │
19
+ │ Spark/Airflow REST/gRPC Kafka/Flink │
20
+ │ High throughput Low latency Continuous │
21
+ │ Scheduled runs On-demand Event-driven │
22
+ │ │
23
+ │ EMBEDDED EDGE SERVERLESS │
24
+ │ ──────── ──── ────────── │
25
+ │ Mobile SDK IoT devices AWS Lambda │
26
+ │ On-device Local inference Auto-scaling │
27
+ │ Offline capable Bandwidth limited Pay per request │
28
+ │ │
29
+ └─────────────────────────────────────────────────────────────┘
30
+ ```
31
+
32
+ ## Model Serving Frameworks
33
+
34
+ ### TorchServe
35
+ ```python
36
+ # Handler for TorchServe
37
+ from ts.torch_handler.base_handler import BaseHandler
38
+ import torch
39
+
40
+ class ModelHandler(BaseHandler):
41
+ def initialize(self, context):
42
+ self.manifest = context.manifest
43
+ model_dir = context.system_properties.get("model_dir")
44
+ self.model = torch.jit.load(f"{model_dir}/model.pt")
45
+ self.model.eval()
46
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
47
+ self.model.to(self.device)
48
+
49
+ def preprocess(self, data):
50
+ inputs = []
51
+ for row in data:
52
+ input_data = row.get("data") or row.get("body")
53
+ inputs.append(torch.tensor(input_data))
54
+ return torch.stack(inputs).to(self.device)
55
+
56
+ def inference(self, data):
57
+ with torch.no_grad():
58
+ return self.model(data)
59
+
60
+ def postprocess(self, inference_output):
61
+ return inference_output.tolist()
62
+
63
+ # Package model
64
+ # torch-model-archiver --model-name model --version 1.0 \
65
+ # --serialized-file model.pt --handler handler.py
66
+ ```
67
+
68
+ ### TensorFlow Serving
69
+ ```python
70
+ import tensorflow as tf
71
+
72
+ # Save model in SavedModel format
73
+ tf.saved_model.save(model, "saved_model/1")
74
+
75
+ # Serve with Docker
76
+ # docker run -p 8501:8501 \
77
+ # -v /path/to/saved_model:/models/model \
78
+ # -e MODEL_NAME=model \
79
+ # tensorflow/serving
80
+
81
+ # Client request
82
+ import requests
83
+ import json
84
+
85
+ data = {"instances": [[1.0, 2.0, 3.0]]}
86
+ response = requests.post(
87
+ "http://localhost:8501/v1/models/model:predict",
88
+ json=data
89
+ )
90
+ predictions = response.json()["predictions"]
91
+ ```
92
+
93
+ ### Triton Inference Server
94
+ ```python
95
+ # Model repository structure
96
+ # models/
97
+ # model_name/
98
+ # config.pbtxt
99
+ # 1/
100
+ # model.onnx
101
+
102
+ # config.pbtxt
103
+ """
104
+ name: "my_model"
105
+ platform: "onnxruntime_onnx"
106
+ max_batch_size: 64
107
+ input [
108
+ {
109
+ name: "input"
110
+ data_type: TYPE_FP32
111
+ dims: [ -1, 784 ]
112
+ }
113
+ ]
114
+ output [
115
+ {
116
+ name: "output"
117
+ data_type: TYPE_FP32
118
+ dims: [ -1, 10 ]
119
+ }
120
+ ]
121
+ instance_group [
122
+ { count: 2, kind: KIND_GPU }
123
+ ]
124
+ dynamic_batching {
125
+ preferred_batch_size: [ 16, 32 ]
126
+ max_queue_delay_microseconds: 100
127
+ }
128
+ """
129
+
130
+ # Python client
131
+ import tritonclient.grpc as grpcclient
132
+
133
+ client = grpcclient.InferenceServerClient("localhost:8001")
134
+ inputs = [grpcclient.InferInput("input", [1, 784], "FP32")]
135
+ inputs[0].set_data_from_numpy(input_data)
136
+ outputs = [grpcclient.InferRequestedOutput("output")]
137
+ result = client.infer("my_model", inputs, outputs=outputs)
138
+ ```
139
+
140
+ ## Containerization
141
+
142
+ ### Docker for ML
143
+ ```dockerfile
144
+ # Multi-stage build for production
145
+ FROM python:3.10-slim as builder
146
+
147
+ WORKDIR /app
148
+ COPY requirements.txt .
149
+ RUN pip install --user --no-cache-dir -r requirements.txt
150
+
151
+ FROM python:3.10-slim
152
+
153
+ # Non-root user for security
154
+ RUN useradd -m -u 1000 appuser
155
+ USER appuser
156
+
157
+ WORKDIR /app
158
+ COPY --from=builder /root/.local /home/appuser/.local
159
+ COPY --chown=appuser:appuser . .
160
+
161
+ ENV PATH=/home/appuser/.local/bin:$PATH
162
+ ENV MODEL_PATH=/app/models/model.pt
163
+
164
+ HEALTHCHECK --interval=30s --timeout=10s --retries=3 \
165
+ CMD curl -f http://localhost:8000/health || exit 1
166
+
167
+ EXPOSE 8000
168
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
169
+ ```
170
+
171
+ ### Kubernetes Deployment
172
+ ```yaml
173
+ apiVersion: apps/v1
174
+ kind: Deployment
175
+ metadata:
176
+ name: ml-model
177
+ spec:
178
+ replicas: 3
179
+ selector:
180
+ matchLabels:
181
+ app: ml-model
182
+ template:
183
+ metadata:
184
+ labels:
185
+ app: ml-model
186
+ spec:
187
+ containers:
188
+ - name: model
189
+ image: ml-model:v1.0
190
+ resources:
191
+ requests:
192
+ memory: "2Gi"
193
+ cpu: "1"
194
+ nvidia.com/gpu: 1
195
+ limits:
196
+ memory: "4Gi"
197
+ cpu: "2"
198
+ nvidia.com/gpu: 1
199
+ ports:
200
+ - containerPort: 8000
201
+ livenessProbe:
202
+ httpGet:
203
+ path: /health
204
+ port: 8000
205
+ initialDelaySeconds: 30
206
+ periodSeconds: 10
207
+ readinessProbe:
208
+ httpGet:
209
+ path: /ready
210
+ port: 8000
211
+ initialDelaySeconds: 5
212
+ periodSeconds: 5
213
+ env:
214
+ - name: MODEL_VERSION
215
+ value: "1.0"
216
+ ---
217
+ apiVersion: v1
218
+ kind: Service
219
+ metadata:
220
+ name: ml-model-service
221
+ spec:
222
+ selector:
223
+ app: ml-model
224
+ ports:
225
+ - port: 80
226
+ targetPort: 8000
227
+ type: LoadBalancer
228
+ ---
229
+ apiVersion: autoscaling/v2
230
+ kind: HorizontalPodAutoscaler
231
+ metadata:
232
+ name: ml-model-hpa
233
+ spec:
234
+ scaleTargetRef:
235
+ apiVersion: apps/v1
236
+ kind: Deployment
237
+ name: ml-model
238
+ minReplicas: 2
239
+ maxReplicas: 10
240
+ metrics:
241
+ - type: Resource
242
+ resource:
243
+ name: cpu
244
+ target:
245
+ type: Utilization
246
+ averageUtilization: 70
247
+ ```
248
+
249
+ ## FastAPI Model Server
250
+
251
+ ```python
252
+ from fastapi import FastAPI, HTTPException
253
+ from pydantic import BaseModel
254
+ import torch
255
+ import numpy as np
256
+
257
+ app = FastAPI(title="ML Model API", version="1.0")
258
+
259
+ class PredictionRequest(BaseModel):
260
+ features: list[float]
261
+
262
+ class PredictionResponse(BaseModel):
263
+ prediction: int
264
+ confidence: float
265
+ model_version: str
266
+
267
+ # Load model on startup
268
+ @app.on_event("startup")
269
+ async def load_model():
270
+ global model
271
+ model = torch.jit.load("model.pt")
272
+ model.eval()
273
+
274
+ @app.get("/health")
275
+ async def health():
276
+ return {"status": "healthy"}
277
+
278
+ @app.get("/ready")
279
+ async def ready():
280
+ if model is None:
281
+ raise HTTPException(status_code=503, detail="Model not loaded")
282
+ return {"status": "ready"}
283
+
284
+ @app.post("/predict", response_model=PredictionResponse)
285
+ async def predict(request: PredictionRequest):
286
+ try:
287
+ input_tensor = torch.tensor([request.features])
288
+ with torch.no_grad():
289
+ output = model(input_tensor)
290
+ probs = torch.softmax(output, dim=1)
291
+ prediction = output.argmax(dim=1).item()
292
+ confidence = probs[0][prediction].item()
293
+
294
+ return PredictionResponse(
295
+ prediction=prediction,
296
+ confidence=confidence,
297
+ model_version="1.0"
298
+ )
299
+ except Exception as e:
300
+ raise HTTPException(status_code=500, detail=str(e))
301
+
302
+ @app.post("/batch_predict")
303
+ async def batch_predict(requests: list[PredictionRequest]):
304
+ inputs = torch.tensor([r.features for r in requests])
305
+ with torch.no_grad():
306
+ outputs = model(inputs)
307
+ return {"predictions": outputs.argmax(dim=1).tolist()}
308
+ ```
309
+
310
+ ## Model Versioning
311
+
312
+ ```python
313
+ import mlflow
314
+
315
+ # Register model version
316
+ with mlflow.start_run():
317
+ mlflow.sklearn.log_model(model, "model", registered_model_name="production_model")
318
+
319
+ # Transition to production
320
+ client = mlflow.tracking.MlflowClient()
321
+ client.transition_model_version_stage(
322
+ name="production_model",
323
+ version=3,
324
+ stage="Production"
325
+ )
326
+
327
+ # Load production model
328
+ model = mlflow.pyfunc.load_model("models:/production_model/Production")
329
+
330
+ # Canary deployment
331
+ def route_request(request, canary_percentage=10):
332
+ import random
333
+ if random.random() < canary_percentage / 100:
334
+ return canary_model.predict(request)
335
+ return production_model.predict(request)
336
+ ```
337
+
338
+ ## Commands
339
+ - `/omgdeploy:package` - Package model
340
+ - `/omgdeploy:serve` - Serve model
341
+ - `/omgdeploy:cloud` - Cloud deployment
342
+ - `/omgops:registry` - Model registry
343
+
344
+ ## Best Practices
345
+
346
+ 1. Use health and readiness probes
347
+ 2. Implement graceful shutdown
348
+ 3. Version models explicitly
349
+ 4. Monitor inference latency
350
+ 5. Use canary deployments for safety
@@ -0,0 +1,160 @@
1
+ ---
2
+ name: model-development
3
+ description: Model development practices including model selection, training pipelines, hyperparameter tuning, evaluation, and model selection strategies.
4
+ ---
5
+
6
+ # Model Development
7
+
8
+ Building and training ML models effectively.
9
+
10
+ ## Model Selection
11
+
12
+ ```python
13
+ from sklearn.model_selection import cross_val_score
14
+
15
+ models = {
16
+ "logistic": LogisticRegression(),
17
+ "random_forest": RandomForestClassifier(),
18
+ "xgboost": XGBClassifier(),
19
+ "lightgbm": LGBMClassifier(),
20
+ "catboost": CatBoostClassifier(verbose=False)
21
+ }
22
+
23
+ results = {}
24
+ for name, model in models.items():
25
+ scores = cross_val_score(model, X, y, cv=5, scoring="f1_macro")
26
+ results[name] = {
27
+ "mean": scores.mean(),
28
+ "std": scores.std()
29
+ }
30
+ print(f"{name}: {scores.mean():.3f} (+/- {scores.std():.3f})")
31
+ ```
32
+
33
+ ## Training Pipeline
34
+
35
+ ```python
36
+ import torch
37
+ import torch.nn as nn
38
+ from torch.utils.data import DataLoader
39
+
40
+ class TrainingPipeline:
41
+ def __init__(self, model, optimizer, criterion, device):
42
+ self.model = model.to(device)
43
+ self.optimizer = optimizer
44
+ self.criterion = criterion
45
+ self.device = device
46
+
47
+ def train_epoch(self, dataloader):
48
+ self.model.train()
49
+ total_loss = 0
50
+ for batch in dataloader:
51
+ x, y = batch[0].to(self.device), batch[1].to(self.device)
52
+ self.optimizer.zero_grad()
53
+ output = self.model(x)
54
+ loss = self.criterion(output, y)
55
+ loss.backward()
56
+ torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
57
+ self.optimizer.step()
58
+ total_loss += loss.item()
59
+ return total_loss / len(dataloader)
60
+
61
+ def evaluate(self, dataloader):
62
+ self.model.eval()
63
+ predictions, targets = [], []
64
+ with torch.no_grad():
65
+ for batch in dataloader:
66
+ x, y = batch[0].to(self.device), batch[1].to(self.device)
67
+ output = self.model(x)
68
+ predictions.extend(output.argmax(dim=1).cpu().numpy())
69
+ targets.extend(y.cpu().numpy())
70
+ return accuracy_score(targets, predictions)
71
+ ```
72
+
73
+ ## Hyperparameter Tuning
74
+
75
+ ```python
76
+ import optuna
77
+
78
+ def objective(trial):
79
+ params = {
80
+ "learning_rate": trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True),
81
+ "max_depth": trial.suggest_int("max_depth", 3, 10),
82
+ "n_estimators": trial.suggest_int("n_estimators", 50, 500),
83
+ "min_child_weight": trial.suggest_int("min_child_weight", 1, 10)
84
+ }
85
+
86
+ model = XGBClassifier(**params, use_label_encoder=False, eval_metric="logloss")
87
+ scores = cross_val_score(model, X_train, y_train, cv=5, scoring="f1_macro")
88
+
89
+ return scores.mean()
90
+
91
+ study = optuna.create_study(direction="maximize")
92
+ study.optimize(objective, n_trials=100)
93
+
94
+ print(f"Best params: {study.best_params}")
95
+ print(f"Best F1: {study.best_value:.3f}")
96
+ ```
97
+
98
+ ## Model Evaluation
99
+
100
+ ```python
101
+ from sklearn.metrics import classification_report, confusion_matrix
102
+
103
+ def comprehensive_evaluation(model, X_test, y_test):
104
+ y_pred = model.predict(X_test)
105
+ y_prob = model.predict_proba(X_test)[:, 1]
106
+
107
+ # Classification metrics
108
+ print(classification_report(y_test, y_pred))
109
+
110
+ # Confusion matrix
111
+ cm = confusion_matrix(y_test, y_pred)
112
+ print(f"Confusion Matrix:\n{cm}")
113
+
114
+ # ROC-AUC
115
+ roc_auc = roc_auc_score(y_test, y_prob)
116
+ print(f"ROC-AUC: {roc_auc:.3f}")
117
+
118
+ # Precision-Recall AUC
119
+ pr_auc = average_precision_score(y_test, y_prob)
120
+ print(f"PR-AUC: {pr_auc:.3f}")
121
+
122
+ return {
123
+ "classification_report": classification_report(y_test, y_pred, output_dict=True),
124
+ "confusion_matrix": cm,
125
+ "roc_auc": roc_auc,
126
+ "pr_auc": pr_auc
127
+ }
128
+ ```
129
+
130
+ ## Model Registry
131
+
132
+ ```python
133
+ import mlflow.sklearn
134
+
135
+ # Register model
136
+ with mlflow.start_run():
137
+ mlflow.sklearn.log_model(
138
+ model,
139
+ "model",
140
+ registered_model_name="churn_predictor"
141
+ )
142
+
143
+ # Load registered model
144
+ model = mlflow.pyfunc.load_model(
145
+ model_uri="models:/churn_predictor/Production"
146
+ )
147
+ ```
148
+
149
+ ## Commands
150
+ - `/omgtrain:train` - Train model
151
+ - `/omgtrain:tune` - Hyperparameter tuning
152
+ - `/omgtrain:evaluate` - Evaluate model
153
+
154
+ ## Best Practices
155
+
156
+ 1. Use cross-validation
157
+ 2. Tune hyperparameters systematically
158
+ 3. Evaluate on multiple metrics
159
+ 4. Check for overfitting
160
+ 5. Register successful models