omgkit 2.20.0 → 2.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +125 -10
- package/package.json +1 -1
- package/plugin/agents/ai-architect-agent.md +282 -0
- package/plugin/agents/data-scientist-agent.md +221 -0
- package/plugin/agents/experiment-analyst-agent.md +318 -0
- package/plugin/agents/ml-engineer-agent.md +165 -0
- package/plugin/agents/mlops-engineer-agent.md +324 -0
- package/plugin/agents/model-optimizer-agent.md +287 -0
- package/plugin/agents/production-engineer-agent.md +360 -0
- package/plugin/agents/research-scientist-agent.md +274 -0
- package/plugin/commands/omgdata/augment.md +86 -0
- package/plugin/commands/omgdata/collect.md +81 -0
- package/plugin/commands/omgdata/label.md +83 -0
- package/plugin/commands/omgdata/split.md +83 -0
- package/plugin/commands/omgdata/validate.md +76 -0
- package/plugin/commands/omgdata/version.md +85 -0
- package/plugin/commands/omgdeploy/ab.md +94 -0
- package/plugin/commands/omgdeploy/cloud.md +89 -0
- package/plugin/commands/omgdeploy/edge.md +93 -0
- package/plugin/commands/omgdeploy/package.md +91 -0
- package/plugin/commands/omgdeploy/serve.md +92 -0
- package/plugin/commands/omgfeature/embed.md +93 -0
- package/plugin/commands/omgfeature/extract.md +93 -0
- package/plugin/commands/omgfeature/select.md +85 -0
- package/plugin/commands/omgfeature/store.md +97 -0
- package/plugin/commands/omgml/init.md +60 -0
- package/plugin/commands/omgml/status.md +82 -0
- package/plugin/commands/omgops/drift.md +87 -0
- package/plugin/commands/omgops/monitor.md +99 -0
- package/plugin/commands/omgops/pipeline.md +102 -0
- package/plugin/commands/omgops/registry.md +109 -0
- package/plugin/commands/omgops/retrain.md +91 -0
- package/plugin/commands/omgoptim/distill.md +90 -0
- package/plugin/commands/omgoptim/profile.md +92 -0
- package/plugin/commands/omgoptim/prune.md +81 -0
- package/plugin/commands/omgoptim/quantize.md +83 -0
- package/plugin/commands/omgtrain/baseline.md +78 -0
- package/plugin/commands/omgtrain/compare.md +99 -0
- package/plugin/commands/omgtrain/evaluate.md +85 -0
- package/plugin/commands/omgtrain/train.md +81 -0
- package/plugin/commands/omgtrain/tune.md +89 -0
- package/plugin/registry.yaml +252 -2
- package/plugin/skills/ml-systems/SKILL.md +65 -0
- package/plugin/skills/ml-systems/ai-accelerators/SKILL.md +342 -0
- package/plugin/skills/ml-systems/data-eng/SKILL.md +126 -0
- package/plugin/skills/ml-systems/deep-learning-primer/SKILL.md +143 -0
- package/plugin/skills/ml-systems/deployment-paradigms/SKILL.md +148 -0
- package/plugin/skills/ml-systems/dnn-architectures/SKILL.md +128 -0
- package/plugin/skills/ml-systems/edge-deployment/SKILL.md +366 -0
- package/plugin/skills/ml-systems/efficient-ai/SKILL.md +316 -0
- package/plugin/skills/ml-systems/feature-engineering/SKILL.md +151 -0
- package/plugin/skills/ml-systems/ml-frameworks/SKILL.md +187 -0
- package/plugin/skills/ml-systems/ml-serving-optimization/SKILL.md +371 -0
- package/plugin/skills/ml-systems/ml-systems-fundamentals/SKILL.md +103 -0
- package/plugin/skills/ml-systems/ml-workflow/SKILL.md +162 -0
- package/plugin/skills/ml-systems/mlops/SKILL.md +386 -0
- package/plugin/skills/ml-systems/model-deployment/SKILL.md +350 -0
- package/plugin/skills/ml-systems/model-dev/SKILL.md +160 -0
- package/plugin/skills/ml-systems/model-optimization/SKILL.md +339 -0
- package/plugin/skills/ml-systems/robust-ai/SKILL.md +395 -0
- package/plugin/skills/ml-systems/training-data/SKILL.md +152 -0
- package/plugin/workflows/ml-systems/data-preparation-workflow.md +276 -0
- package/plugin/workflows/ml-systems/edge-deployment-workflow.md +413 -0
- package/plugin/workflows/ml-systems/full-ml-lifecycle-workflow.md +405 -0
- package/plugin/workflows/ml-systems/hyperparameter-tuning-workflow.md +352 -0
- package/plugin/workflows/ml-systems/mlops-pipeline-workflow.md +384 -0
- package/plugin/workflows/ml-systems/model-deployment-workflow.md +392 -0
- package/plugin/workflows/ml-systems/model-development-workflow.md +218 -0
- package/plugin/workflows/ml-systems/model-evaluation-workflow.md +416 -0
- package/plugin/workflows/ml-systems/model-optimization-workflow.md +390 -0
- package/plugin/workflows/ml-systems/monitoring-drift-workflow.md +446 -0
- package/plugin/workflows/ml-systems/retraining-workflow.md +401 -0
- package/plugin/workflows/ml-systems/training-pipeline-workflow.md +382 -0
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Model Optimization Workflow
|
|
3
|
+
description: Model optimization workflow for improving efficiency through quantization, pruning, distillation, and hardware-specific optimizations.
|
|
4
|
+
category: ml-systems
|
|
5
|
+
complexity: medium
|
|
6
|
+
agents:
|
|
7
|
+
- model-optimizer-agent
|
|
8
|
+
- production-engineer-agent
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# Model Optimization Workflow
|
|
12
|
+
|
|
13
|
+
Optimize models for production efficiency.
|
|
14
|
+
|
|
15
|
+
## Overview
|
|
16
|
+
|
|
17
|
+
```
|
|
18
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
19
|
+
│ MODEL OPTIMIZATION WORKFLOW │
|
|
20
|
+
├─────────────────────────────────────────────────────────────┤
|
|
21
|
+
│ │
|
|
22
|
+
│ 1. PROFILE 2. QUANTIZE 3. PRUNE │
|
|
23
|
+
│ ↓ ↓ ↓ │
|
|
24
|
+
│ Baseline metrics FP16/INT8 Remove weights │
|
|
25
|
+
│ Bottleneck ID Calibration Structured/unstructured │
|
|
26
|
+
│ Target setting Accuracy check Fine-tune │
|
|
27
|
+
│ │
|
|
28
|
+
│ 4. DISTILL 5. COMPILE 6. VALIDATE │
|
|
29
|
+
│ ↓ ↓ ↓ │
|
|
30
|
+
│ Teacher-student TensorRT/XLA A/B comparison │
|
|
31
|
+
│ Smaller model Graph optimize Accuracy delta │
|
|
32
|
+
│ Match accuracy Target hardware Production test │
|
|
33
|
+
│ │
|
|
34
|
+
└─────────────────────────────────────────────────────────────┘
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Steps
|
|
38
|
+
|
|
39
|
+
### Step 1: Profile Baseline
|
|
40
|
+
**Agent**: model-optimizer-agent
|
|
41
|
+
|
|
42
|
+
**Inputs**:
|
|
43
|
+
- Original model
|
|
44
|
+
- Target hardware
|
|
45
|
+
- Performance requirements
|
|
46
|
+
|
|
47
|
+
**Actions**:
|
|
48
|
+
```bash
|
|
49
|
+
# Profile model
|
|
50
|
+
/omgoptim:profile --model model.pt --device cuda --batch-sizes 1,8,32
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
def profile_model(model, input_shape, device='cuda'):
|
|
55
|
+
import torch
|
|
56
|
+
import time
|
|
57
|
+
from thop import profile as thop_profile
|
|
58
|
+
|
|
59
|
+
model = model.to(device).eval()
|
|
60
|
+
x = torch.randn(1, *input_shape).to(device)
|
|
61
|
+
|
|
62
|
+
# Model size
|
|
63
|
+
param_size = sum(p.numel() * p.element_size() for p in model.parameters())
|
|
64
|
+
buffer_size = sum(b.numel() * b.element_size() for b in model.buffers())
|
|
65
|
+
|
|
66
|
+
# FLOPs
|
|
67
|
+
flops, params = thop_profile(model, inputs=(x,), verbose=False)
|
|
68
|
+
|
|
69
|
+
# Latency
|
|
70
|
+
warmup = 50
|
|
71
|
+
iterations = 200
|
|
72
|
+
|
|
73
|
+
for _ in range(warmup):
|
|
74
|
+
model(x)
|
|
75
|
+
|
|
76
|
+
torch.cuda.synchronize()
|
|
77
|
+
start = time.perf_counter()
|
|
78
|
+
for _ in range(iterations):
|
|
79
|
+
model(x)
|
|
80
|
+
torch.cuda.synchronize()
|
|
81
|
+
latency_ms = (time.perf_counter() - start) / iterations * 1000
|
|
82
|
+
|
|
83
|
+
return {
|
|
84
|
+
'size_mb': (param_size + buffer_size) / 1024 / 1024,
|
|
85
|
+
'params': params,
|
|
86
|
+
'flops': flops,
|
|
87
|
+
'latency_ms': latency_ms,
|
|
88
|
+
'throughput': 1000 / latency_ms
|
|
89
|
+
}
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
**Outputs**:
|
|
93
|
+
- Baseline metrics
|
|
94
|
+
- Bottleneck analysis
|
|
95
|
+
- Optimization targets
|
|
96
|
+
|
|
97
|
+
### Step 2: Quantization
|
|
98
|
+
**Agent**: model-optimizer-agent
|
|
99
|
+
|
|
100
|
+
**Inputs**:
|
|
101
|
+
- Model
|
|
102
|
+
- Calibration data
|
|
103
|
+
- Target precision
|
|
104
|
+
|
|
105
|
+
**Actions**:
|
|
106
|
+
```bash
|
|
107
|
+
# Apply quantization
|
|
108
|
+
/omgoptim:quantize --model model.pt --precision int8 --calibration calibration.pt
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
import torch
|
|
113
|
+
|
|
114
|
+
def apply_quantization(model, calibration_loader, method='dynamic'):
|
|
115
|
+
if method == 'dynamic':
|
|
116
|
+
# Dynamic quantization (weights only)
|
|
117
|
+
quantized = torch.quantization.quantize_dynamic(
|
|
118
|
+
model,
|
|
119
|
+
{torch.nn.Linear, torch.nn.LSTM},
|
|
120
|
+
dtype=torch.qint8
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
elif method == 'static':
|
|
124
|
+
# Static quantization (weights + activations)
|
|
125
|
+
model.eval()
|
|
126
|
+
model.qconfig = torch.quantization.get_default_qconfig('fbgemm')
|
|
127
|
+
model_prepared = torch.quantization.prepare(model)
|
|
128
|
+
|
|
129
|
+
# Calibrate
|
|
130
|
+
with torch.no_grad():
|
|
131
|
+
for batch in calibration_loader:
|
|
132
|
+
model_prepared(batch[0])
|
|
133
|
+
|
|
134
|
+
quantized = torch.quantization.convert(model_prepared)
|
|
135
|
+
|
|
136
|
+
elif method == 'qat':
|
|
137
|
+
# Quantization-aware training
|
|
138
|
+
model.train()
|
|
139
|
+
model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
|
|
140
|
+
model_prepared = torch.quantization.prepare_qat(model)
|
|
141
|
+
|
|
142
|
+
# Fine-tune
|
|
143
|
+
for epoch in range(3):
|
|
144
|
+
train_epoch(model_prepared, train_loader)
|
|
145
|
+
|
|
146
|
+
model_prepared.eval()
|
|
147
|
+
quantized = torch.quantization.convert(model_prepared)
|
|
148
|
+
|
|
149
|
+
return quantized
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
**Outputs**:
|
|
153
|
+
- Quantized model
|
|
154
|
+
- Size reduction
|
|
155
|
+
- Accuracy comparison
|
|
156
|
+
|
|
157
|
+
### Step 3: Pruning
|
|
158
|
+
**Agent**: model-optimizer-agent
|
|
159
|
+
|
|
160
|
+
**Inputs**:
|
|
161
|
+
- Model
|
|
162
|
+
- Target sparsity
|
|
163
|
+
- Pruning strategy
|
|
164
|
+
|
|
165
|
+
**Actions**:
|
|
166
|
+
```bash
|
|
167
|
+
# Apply pruning
|
|
168
|
+
/omgoptim:prune --model model.pt --sparsity 0.5 --method magnitude
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
```python
|
|
172
|
+
import torch.nn.utils.prune as prune
|
|
173
|
+
|
|
174
|
+
def iterative_pruning(model, target_sparsity, train_loader, val_loader):
|
|
175
|
+
current_sparsity = 0
|
|
176
|
+
n_steps = 5
|
|
177
|
+
step_sparsity = target_sparsity / n_steps
|
|
178
|
+
|
|
179
|
+
for step in range(n_steps):
|
|
180
|
+
# Prune
|
|
181
|
+
for name, module in model.named_modules():
|
|
182
|
+
if isinstance(module, (torch.nn.Linear, torch.nn.Conv2d)):
|
|
183
|
+
prune.l1_unstructured(module, 'weight', amount=step_sparsity)
|
|
184
|
+
|
|
185
|
+
# Fine-tune
|
|
186
|
+
for epoch in range(2):
|
|
187
|
+
train_epoch(model, train_loader)
|
|
188
|
+
|
|
189
|
+
# Evaluate
|
|
190
|
+
current_sparsity = calculate_sparsity(model)
|
|
191
|
+
accuracy = evaluate(model, val_loader)
|
|
192
|
+
print(f"Step {step+1}: Sparsity={current_sparsity:.2%}, Accuracy={accuracy:.4f}")
|
|
193
|
+
|
|
194
|
+
# Make permanent
|
|
195
|
+
for module in model.modules():
|
|
196
|
+
if hasattr(module, 'weight_orig'):
|
|
197
|
+
prune.remove(module, 'weight')
|
|
198
|
+
|
|
199
|
+
return model
|
|
200
|
+
|
|
201
|
+
def calculate_sparsity(model):
|
|
202
|
+
total_zeros = 0
|
|
203
|
+
total_params = 0
|
|
204
|
+
for p in model.parameters():
|
|
205
|
+
total_zeros += (p == 0).sum().item()
|
|
206
|
+
total_params += p.numel()
|
|
207
|
+
return total_zeros / total_params
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
**Outputs**:
|
|
211
|
+
- Pruned model
|
|
212
|
+
- Sparsity achieved
|
|
213
|
+
- Performance impact
|
|
214
|
+
|
|
215
|
+
### Step 4: Knowledge Distillation
|
|
216
|
+
**Agent**: model-optimizer-agent
|
|
217
|
+
|
|
218
|
+
**Inputs**:
|
|
219
|
+
- Teacher model
|
|
220
|
+
- Student architecture
|
|
221
|
+
- Training data
|
|
222
|
+
|
|
223
|
+
**Actions**:
|
|
224
|
+
```bash
|
|
225
|
+
# Distill knowledge
|
|
226
|
+
/omgoptim:distill --teacher teacher.pt --student-config student.yaml --epochs 10
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
```python
|
|
230
|
+
class DistillationTrainer:
|
|
231
|
+
def __init__(self, teacher, student, temperature=4.0, alpha=0.5):
|
|
232
|
+
self.teacher = teacher.eval()
|
|
233
|
+
self.student = student
|
|
234
|
+
self.temperature = temperature
|
|
235
|
+
self.alpha = alpha
|
|
236
|
+
|
|
237
|
+
def train(self, train_loader, epochs, optimizer):
|
|
238
|
+
for epoch in range(epochs):
|
|
239
|
+
for x, y in train_loader:
|
|
240
|
+
optimizer.zero_grad()
|
|
241
|
+
|
|
242
|
+
with torch.no_grad():
|
|
243
|
+
teacher_logits = self.teacher(x)
|
|
244
|
+
|
|
245
|
+
student_logits = self.student(x)
|
|
246
|
+
|
|
247
|
+
# Distillation loss
|
|
248
|
+
loss = self.distillation_loss(student_logits, teacher_logits, y)
|
|
249
|
+
|
|
250
|
+
loss.backward()
|
|
251
|
+
optimizer.step()
|
|
252
|
+
|
|
253
|
+
def distillation_loss(self, student_logits, teacher_logits, labels):
|
|
254
|
+
# Soft targets
|
|
255
|
+
soft_teacher = F.softmax(teacher_logits / self.temperature, dim=1)
|
|
256
|
+
soft_student = F.log_softmax(student_logits / self.temperature, dim=1)
|
|
257
|
+
soft_loss = F.kl_div(soft_student, soft_teacher, reduction='batchmean')
|
|
258
|
+
soft_loss *= self.temperature ** 2
|
|
259
|
+
|
|
260
|
+
# Hard targets
|
|
261
|
+
hard_loss = F.cross_entropy(student_logits, labels)
|
|
262
|
+
|
|
263
|
+
return self.alpha * hard_loss + (1 - self.alpha) * soft_loss
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
**Outputs**:
|
|
267
|
+
- Distilled student model
|
|
268
|
+
- Compression ratio
|
|
269
|
+
- Accuracy retention
|
|
270
|
+
|
|
271
|
+
### Step 5: Compile & Optimize
|
|
272
|
+
**Agent**: production-engineer-agent
|
|
273
|
+
|
|
274
|
+
**Inputs**:
|
|
275
|
+
- Optimized model
|
|
276
|
+
- Target hardware
|
|
277
|
+
- Deployment format
|
|
278
|
+
|
|
279
|
+
**Actions**:
|
|
280
|
+
```python
|
|
281
|
+
# TensorRT compilation
|
|
282
|
+
def compile_tensorrt(model, input_shape, precision='fp16'):
|
|
283
|
+
import torch_tensorrt
|
|
284
|
+
|
|
285
|
+
trt_model = torch_tensorrt.compile(
|
|
286
|
+
model,
|
|
287
|
+
inputs=[torch_tensorrt.Input(
|
|
288
|
+
min_shape=[1, *input_shape],
|
|
289
|
+
opt_shape=[8, *input_shape],
|
|
290
|
+
max_shape=[32, *input_shape],
|
|
291
|
+
dtype=torch.float16 if precision == 'fp16' else torch.float32
|
|
292
|
+
)],
|
|
293
|
+
enabled_precisions={torch.float16} if precision == 'fp16' else {torch.float32}
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
return trt_model
|
|
297
|
+
|
|
298
|
+
# ONNX export and optimize
|
|
299
|
+
def export_onnx(model, input_shape, output_path):
|
|
300
|
+
x = torch.randn(1, *input_shape)
|
|
301
|
+
torch.onnx.export(
|
|
302
|
+
model, x, output_path,
|
|
303
|
+
input_names=['input'],
|
|
304
|
+
output_names=['output'],
|
|
305
|
+
dynamic_axes={'input': {0: 'batch'}, 'output': {0: 'batch'}},
|
|
306
|
+
opset_version=14
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
# Optimize with ONNX Runtime
|
|
310
|
+
import onnxruntime as ort
|
|
311
|
+
sess_options = ort.SessionOptions()
|
|
312
|
+
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
**Outputs**:
|
|
316
|
+
- Compiled model
|
|
317
|
+
- Optimized graph
|
|
318
|
+
- Deployment artifacts
|
|
319
|
+
|
|
320
|
+
### Step 6: Validation
|
|
321
|
+
**Agent**: experiment-analyst-agent
|
|
322
|
+
|
|
323
|
+
**Inputs**:
|
|
324
|
+
- Original model
|
|
325
|
+
- Optimized model
|
|
326
|
+
- Test dataset
|
|
327
|
+
|
|
328
|
+
**Actions**:
|
|
329
|
+
```python
|
|
330
|
+
def validate_optimization(original_model, optimized_model, test_loader, tolerance=0.02):
|
|
331
|
+
# Accuracy comparison
|
|
332
|
+
orig_preds = predict_all(original_model, test_loader)
|
|
333
|
+
opt_preds = predict_all(optimized_model, test_loader)
|
|
334
|
+
|
|
335
|
+
orig_acc = accuracy_score(test_loader.targets, orig_preds)
|
|
336
|
+
opt_acc = accuracy_score(test_loader.targets, opt_preds)
|
|
337
|
+
|
|
338
|
+
# Profile comparison
|
|
339
|
+
orig_profile = profile_model(original_model)
|
|
340
|
+
opt_profile = profile_model(optimized_model)
|
|
341
|
+
|
|
342
|
+
results = {
|
|
343
|
+
'accuracy': {
|
|
344
|
+
'original': orig_acc,
|
|
345
|
+
'optimized': opt_acc,
|
|
346
|
+
'delta': opt_acc - orig_acc,
|
|
347
|
+
'within_tolerance': abs(orig_acc - opt_acc) < tolerance
|
|
348
|
+
},
|
|
349
|
+
'speedup': orig_profile['latency_ms'] / opt_profile['latency_ms'],
|
|
350
|
+
'size_reduction': orig_profile['size_mb'] / opt_profile['size_mb'],
|
|
351
|
+
'profiles': {
|
|
352
|
+
'original': orig_profile,
|
|
353
|
+
'optimized': opt_profile
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
results['approved'] = (
|
|
358
|
+
results['accuracy']['within_tolerance'] and
|
|
359
|
+
results['speedup'] > 1.5 # At least 1.5x faster
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
return results
|
|
363
|
+
```
|
|
364
|
+
|
|
365
|
+
**Outputs**:
|
|
366
|
+
- Comparison report
|
|
367
|
+
- Approval status
|
|
368
|
+
- Production recommendation
|
|
369
|
+
|
|
370
|
+
## Artifacts
|
|
371
|
+
|
|
372
|
+
- `baseline_profile.json` - Original metrics
|
|
373
|
+
- `optimized_model.pt` - Final model
|
|
374
|
+
- `optimization_report.json` - Full report
|
|
375
|
+
- `onnx/` - ONNX exports
|
|
376
|
+
- `tensorrt/` - TRT engines
|
|
377
|
+
|
|
378
|
+
## Next Workflows
|
|
379
|
+
|
|
380
|
+
After optimization:
|
|
381
|
+
- → **model-deployment-workflow** for production
|
|
382
|
+
- → **edge-deployment-workflow** for edge devices
|
|
383
|
+
|
|
384
|
+
## Quality Gates
|
|
385
|
+
|
|
386
|
+
- [ ] All steps completed successfully
|
|
387
|
+
- [ ] Metrics meet defined thresholds
|
|
388
|
+
- [ ] Documentation updated
|
|
389
|
+
- [ ] Artifacts versioned and stored
|
|
390
|
+
- [ ] Stakeholder approval obtained
|