omgkit 2.19.3 → 2.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/README.md +537 -338
  2. package/package.json +2 -2
  3. package/plugin/agents/ai-architect-agent.md +282 -0
  4. package/plugin/agents/data-scientist-agent.md +221 -0
  5. package/plugin/agents/experiment-analyst-agent.md +318 -0
  6. package/plugin/agents/ml-engineer-agent.md +165 -0
  7. package/plugin/agents/mlops-engineer-agent.md +324 -0
  8. package/plugin/agents/model-optimizer-agent.md +287 -0
  9. package/plugin/agents/production-engineer-agent.md +360 -0
  10. package/plugin/agents/research-scientist-agent.md +274 -0
  11. package/plugin/commands/omgdata/augment.md +86 -0
  12. package/plugin/commands/omgdata/collect.md +81 -0
  13. package/plugin/commands/omgdata/label.md +83 -0
  14. package/plugin/commands/omgdata/split.md +83 -0
  15. package/plugin/commands/omgdata/validate.md +76 -0
  16. package/plugin/commands/omgdata/version.md +85 -0
  17. package/plugin/commands/omgdeploy/ab.md +94 -0
  18. package/plugin/commands/omgdeploy/cloud.md +89 -0
  19. package/plugin/commands/omgdeploy/edge.md +93 -0
  20. package/plugin/commands/omgdeploy/package.md +91 -0
  21. package/plugin/commands/omgdeploy/serve.md +92 -0
  22. package/plugin/commands/omgfeature/embed.md +93 -0
  23. package/plugin/commands/omgfeature/extract.md +93 -0
  24. package/plugin/commands/omgfeature/select.md +85 -0
  25. package/plugin/commands/omgfeature/store.md +97 -0
  26. package/plugin/commands/omgml/init.md +60 -0
  27. package/plugin/commands/omgml/status.md +82 -0
  28. package/plugin/commands/omgops/drift.md +87 -0
  29. package/plugin/commands/omgops/monitor.md +99 -0
  30. package/plugin/commands/omgops/pipeline.md +102 -0
  31. package/plugin/commands/omgops/registry.md +109 -0
  32. package/plugin/commands/omgops/retrain.md +91 -0
  33. package/plugin/commands/omgoptim/distill.md +90 -0
  34. package/plugin/commands/omgoptim/profile.md +92 -0
  35. package/plugin/commands/omgoptim/prune.md +81 -0
  36. package/plugin/commands/omgoptim/quantize.md +83 -0
  37. package/plugin/commands/omgtrain/baseline.md +78 -0
  38. package/plugin/commands/omgtrain/compare.md +99 -0
  39. package/plugin/commands/omgtrain/evaluate.md +85 -0
  40. package/plugin/commands/omgtrain/train.md +81 -0
  41. package/plugin/commands/omgtrain/tune.md +89 -0
  42. package/plugin/registry.yaml +252 -2
  43. package/plugin/skills/ml-systems/SKILL.md +65 -0
  44. package/plugin/skills/ml-systems/ai-accelerators/SKILL.md +342 -0
  45. package/plugin/skills/ml-systems/data-eng/SKILL.md +126 -0
  46. package/plugin/skills/ml-systems/deep-learning-primer/SKILL.md +143 -0
  47. package/plugin/skills/ml-systems/deployment-paradigms/SKILL.md +148 -0
  48. package/plugin/skills/ml-systems/dnn-architectures/SKILL.md +128 -0
  49. package/plugin/skills/ml-systems/edge-deployment/SKILL.md +366 -0
  50. package/plugin/skills/ml-systems/efficient-ai/SKILL.md +316 -0
  51. package/plugin/skills/ml-systems/feature-engineering/SKILL.md +151 -0
  52. package/plugin/skills/ml-systems/ml-frameworks/SKILL.md +187 -0
  53. package/plugin/skills/ml-systems/ml-serving-optimization/SKILL.md +371 -0
  54. package/plugin/skills/ml-systems/ml-systems-fundamentals/SKILL.md +103 -0
  55. package/plugin/skills/ml-systems/ml-workflow/SKILL.md +162 -0
  56. package/plugin/skills/ml-systems/mlops/SKILL.md +386 -0
  57. package/plugin/skills/ml-systems/model-deployment/SKILL.md +350 -0
  58. package/plugin/skills/ml-systems/model-dev/SKILL.md +160 -0
  59. package/plugin/skills/ml-systems/model-optimization/SKILL.md +339 -0
  60. package/plugin/skills/ml-systems/robust-ai/SKILL.md +395 -0
  61. package/plugin/skills/ml-systems/training-data/SKILL.md +152 -0
  62. package/plugin/workflows/ml-systems/data-preparation-workflow.md +276 -0
  63. package/plugin/workflows/ml-systems/edge-deployment-workflow.md +413 -0
  64. package/plugin/workflows/ml-systems/full-ml-lifecycle-workflow.md +405 -0
  65. package/plugin/workflows/ml-systems/hyperparameter-tuning-workflow.md +352 -0
  66. package/plugin/workflows/ml-systems/mlops-pipeline-workflow.md +384 -0
  67. package/plugin/workflows/ml-systems/model-deployment-workflow.md +392 -0
  68. package/plugin/workflows/ml-systems/model-development-workflow.md +218 -0
  69. package/plugin/workflows/ml-systems/model-evaluation-workflow.md +416 -0
  70. package/plugin/workflows/ml-systems/model-optimization-workflow.md +390 -0
  71. package/plugin/workflows/ml-systems/monitoring-drift-workflow.md +446 -0
  72. package/plugin/workflows/ml-systems/retraining-workflow.md +401 -0
  73. package/plugin/workflows/ml-systems/training-pipeline-workflow.md +382 -0
@@ -0,0 +1,339 @@
1
+ ---
2
+ name: model-optimization
3
+ description: Model optimization techniques including hyperparameter tuning, architecture search, training optimization, and performance profiling for ML systems.
4
+ ---
5
+
6
+ # Model Optimization
7
+
8
+ Techniques for optimizing ML model performance.
9
+
10
+ ## Hyperparameter Optimization
11
+
12
+ ### Optuna Integration
13
+ ```python
14
+ import optuna
15
+ from optuna.pruners import MedianPruner
16
+ from optuna.samplers import TPESampler
17
+
18
+ def objective(trial):
19
+ # Suggest hyperparameters
20
+ params = {
21
+ 'learning_rate': trial.suggest_float('lr', 1e-5, 1e-1, log=True),
22
+ 'batch_size': trial.suggest_categorical('batch_size', [16, 32, 64, 128]),
23
+ 'num_layers': trial.suggest_int('num_layers', 2, 8),
24
+ 'hidden_dim': trial.suggest_int('hidden_dim', 64, 512, step=64),
25
+ 'dropout': trial.suggest_float('dropout', 0.1, 0.5),
26
+ 'optimizer': trial.suggest_categorical('optimizer', ['adam', 'sgd', 'adamw']),
27
+ 'weight_decay': trial.suggest_float('weight_decay', 1e-6, 1e-2, log=True),
28
+ }
29
+
30
+ model = build_model(params)
31
+
32
+ for epoch in range(max_epochs):
33
+ train_loss = train_epoch(model, train_loader)
34
+ val_loss = validate(model, val_loader)
35
+
36
+ # Report for pruning
37
+ trial.report(val_loss, epoch)
38
+ if trial.should_prune():
39
+ raise optuna.TrialPruned()
40
+
41
+ return val_loss
42
+
43
+ # Create study with pruning
44
+ study = optuna.create_study(
45
+ direction='minimize',
46
+ sampler=TPESampler(seed=42),
47
+ pruner=MedianPruner(n_startup_trials=5, n_warmup_steps=10)
48
+ )
49
+
50
+ study.optimize(objective, n_trials=100, n_jobs=4)
51
+
52
+ print(f"Best params: {study.best_params}")
53
+ print(f"Best value: {study.best_value:.4f}")
54
+ ```
55
+
56
+ ### Hyperparameter Search Strategies
57
+ ```python
58
+ # Grid Search
59
+ from sklearn.model_selection import GridSearchCV
60
+
61
+ param_grid = {
62
+ 'n_estimators': [100, 200, 300],
63
+ 'max_depth': [3, 5, 7, 10],
64
+ 'learning_rate': [0.01, 0.1, 0.3]
65
+ }
66
+
67
+ grid = GridSearchCV(model, param_grid, cv=5, scoring='f1_macro', n_jobs=-1)
68
+ grid.fit(X_train, y_train)
69
+
70
+ # Random Search
71
+ from sklearn.model_selection import RandomizedSearchCV
72
+ from scipy.stats import uniform, randint
73
+
74
+ param_distributions = {
75
+ 'n_estimators': randint(50, 500),
76
+ 'max_depth': randint(2, 15),
77
+ 'learning_rate': uniform(0.001, 0.5)
78
+ }
79
+
80
+ random_search = RandomizedSearchCV(
81
+ model, param_distributions, n_iter=100, cv=5, random_state=42
82
+ )
83
+
84
+ # Bayesian Optimization with scikit-optimize
85
+ from skopt import BayesSearchCV
86
+ from skopt.space import Real, Integer, Categorical
87
+
88
+ search_spaces = {
89
+ 'learning_rate': Real(1e-5, 1e-1, prior='log-uniform'),
90
+ 'num_layers': Integer(2, 10),
91
+ 'activation': Categorical(['relu', 'gelu', 'swish'])
92
+ }
93
+
94
+ bayes_search = BayesSearchCV(
95
+ model, search_spaces, n_iter=50, cv=5, random_state=42
96
+ )
97
+ ```
98
+
99
+ ## Training Optimization
100
+
101
+ ### Learning Rate Scheduling
102
+ ```python
103
+ import torch.optim.lr_scheduler as lr_scheduler
104
+
105
+ # Warmup + Cosine Annealing
106
+ class WarmupCosineScheduler:
107
+ def __init__(self, optimizer, warmup_steps, total_steps):
108
+ self.optimizer = optimizer
109
+ self.warmup_steps = warmup_steps
110
+ self.total_steps = total_steps
111
+ self.current_step = 0
112
+ self.base_lr = optimizer.param_groups[0]['lr']
113
+
114
+ def step(self):
115
+ self.current_step += 1
116
+ if self.current_step < self.warmup_steps:
117
+ lr = self.base_lr * self.current_step / self.warmup_steps
118
+ else:
119
+ progress = (self.current_step - self.warmup_steps) / (self.total_steps - self.warmup_steps)
120
+ lr = self.base_lr * 0.5 * (1 + math.cos(math.pi * progress))
121
+
122
+ for param_group in self.optimizer.param_groups:
123
+ param_group['lr'] = lr
124
+
125
+ # One Cycle Policy
126
+ scheduler = lr_scheduler.OneCycleLR(
127
+ optimizer,
128
+ max_lr=0.01,
129
+ total_steps=total_steps,
130
+ pct_start=0.3,
131
+ anneal_strategy='cos'
132
+ )
133
+
134
+ # Reduce on Plateau
135
+ scheduler = lr_scheduler.ReduceLROnPlateau(
136
+ optimizer, mode='min', factor=0.5, patience=5, verbose=True
137
+ )
138
+ ```
139
+
140
+ ### Mixed Precision Training
141
+ ```python
142
+ from torch.cuda.amp import autocast, GradScaler
143
+
144
+ scaler = GradScaler()
145
+
146
+ for epoch in range(epochs):
147
+ for batch in train_loader:
148
+ optimizer.zero_grad()
149
+
150
+ with autocast():
151
+ outputs = model(inputs)
152
+ loss = criterion(outputs, targets)
153
+
154
+ scaler.scale(loss).backward()
155
+ scaler.unscale_(optimizer)
156
+ torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
157
+ scaler.step(optimizer)
158
+ scaler.update()
159
+ ```
160
+
161
+ ### Gradient Accumulation
162
+ ```python
163
+ accumulation_steps = 4
164
+ optimizer.zero_grad()
165
+
166
+ for i, batch in enumerate(train_loader):
167
+ outputs = model(batch)
168
+ loss = criterion(outputs, targets) / accumulation_steps
169
+ loss.backward()
170
+
171
+ if (i + 1) % accumulation_steps == 0:
172
+ optimizer.step()
173
+ optimizer.zero_grad()
174
+ ```
175
+
176
+ ## Architecture Optimization
177
+
178
+ ### Neural Architecture Search
179
+ ```python
180
+ import nni
181
+ from nni.nas.pytorch import mutables
182
+
183
+ class SearchableBlock(nn.Module):
184
+ def __init__(self, in_channels, out_channels):
185
+ super().__init__()
186
+ self.op_choice = mutables.LayerChoice([
187
+ nn.Conv2d(in_channels, out_channels, 3, padding=1),
188
+ nn.Conv2d(in_channels, out_channels, 5, padding=2),
189
+ DepthSeparableConv(in_channels, out_channels),
190
+ nn.Identity() if in_channels == out_channels else nn.Conv2d(in_channels, out_channels, 1)
191
+ ])
192
+
193
+ def forward(self, x):
194
+ return self.op_choice(x)
195
+
196
+ # AutoML with Ray Tune
197
+ from ray import tune
198
+ from ray.tune.schedulers import ASHAScheduler
199
+
200
+ def train_model(config):
201
+ model = build_model(config)
202
+ for epoch in range(config['epochs']):
203
+ train_loss = train(model)
204
+ val_acc = validate(model)
205
+ tune.report(loss=train_loss, accuracy=val_acc)
206
+
207
+ analysis = tune.run(
208
+ train_model,
209
+ config={
210
+ "lr": tune.loguniform(1e-5, 1e-1),
211
+ "layers": tune.choice([2, 4, 6, 8]),
212
+ "hidden": tune.choice([128, 256, 512]),
213
+ },
214
+ scheduler=ASHAScheduler(metric="accuracy", mode="max"),
215
+ num_samples=100
216
+ )
217
+ ```
218
+
219
+ ## Memory Optimization
220
+
221
+ ### Gradient Checkpointing
222
+ ```python
223
+ from torch.utils.checkpoint import checkpoint
224
+
225
+ class CheckpointedModel(nn.Module):
226
+ def __init__(self):
227
+ super().__init__()
228
+ self.layers = nn.ModuleList([
229
+ TransformerBlock(d_model=512) for _ in range(24)
230
+ ])
231
+
232
+ def forward(self, x):
233
+ for layer in self.layers:
234
+ x = checkpoint(layer, x, use_reentrant=False)
235
+ return x
236
+ ```
237
+
238
+ ### Memory-Efficient Attention
239
+ ```python
240
+ # Flash Attention (via xformers or native)
241
+ from xformers.ops import memory_efficient_attention
242
+
243
+ class EfficientAttention(nn.Module):
244
+ def forward(self, q, k, v, mask=None):
245
+ return memory_efficient_attention(q, k, v, attn_bias=mask)
246
+
247
+ # Sliding Window Attention
248
+ class SlidingWindowAttention(nn.Module):
249
+ def __init__(self, window_size=256):
250
+ super().__init__()
251
+ self.window_size = window_size
252
+
253
+ def forward(self, q, k, v):
254
+ seq_len = q.size(1)
255
+ outputs = []
256
+
257
+ for i in range(0, seq_len, self.window_size):
258
+ start = max(0, i - self.window_size // 2)
259
+ end = min(seq_len, i + self.window_size)
260
+
261
+ q_chunk = q[:, i:min(i + self.window_size, seq_len)]
262
+ k_chunk = k[:, start:end]
263
+ v_chunk = v[:, start:end]
264
+
265
+ attn = torch.matmul(q_chunk, k_chunk.transpose(-2, -1))
266
+ attn = F.softmax(attn / math.sqrt(q.size(-1)), dim=-1)
267
+ outputs.append(torch.matmul(attn, v_chunk))
268
+
269
+ return torch.cat(outputs, dim=1)
270
+ ```
271
+
272
+ ## Performance Profiling
273
+
274
+ ```python
275
+ import torch.profiler as profiler
276
+
277
+ with profiler.profile(
278
+ activities=[
279
+ profiler.ProfilerActivity.CPU,
280
+ profiler.ProfilerActivity.CUDA,
281
+ ],
282
+ schedule=profiler.schedule(wait=1, warmup=1, active=3, repeat=1),
283
+ on_trace_ready=profiler.tensorboard_trace_handler('./logs'),
284
+ record_shapes=True,
285
+ profile_memory=True,
286
+ with_stack=True
287
+ ) as prof:
288
+ for step, batch in enumerate(train_loader):
289
+ if step >= 5:
290
+ break
291
+ train_step(model, batch)
292
+ prof.step()
293
+
294
+ # Print summary
295
+ print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))
296
+ ```
297
+
298
+ ## Distributed Training
299
+
300
+ ```python
301
+ import torch.distributed as dist
302
+ from torch.nn.parallel import DistributedDataParallel as DDP
303
+
304
+ def setup(rank, world_size):
305
+ dist.init_process_group("nccl", rank=rank, world_size=world_size)
306
+ torch.cuda.set_device(rank)
307
+
308
+ def train_ddp(rank, world_size):
309
+ setup(rank, world_size)
310
+
311
+ model = Model().to(rank)
312
+ model = DDP(model, device_ids=[rank])
313
+
314
+ sampler = DistributedSampler(dataset, num_replicas=world_size, rank=rank)
315
+ loader = DataLoader(dataset, sampler=sampler, batch_size=32)
316
+
317
+ for epoch in range(epochs):
318
+ sampler.set_epoch(epoch)
319
+ for batch in loader:
320
+ train_step(model, batch)
321
+
322
+ dist.destroy_process_group()
323
+
324
+ # Launch with torchrun
325
+ # torchrun --nproc_per_node=4 train.py
326
+ ```
327
+
328
+ ## Commands
329
+ - `/omgtrain:tune` - Hyperparameter tuning
330
+ - `/omgoptim:profile` - Profile model
331
+ - `/omgtrain:train` - Train with optimizations
332
+
333
+ ## Best Practices
334
+
335
+ 1. Profile before optimizing
336
+ 2. Use mixed precision by default
337
+ 3. Start with proven architectures
338
+ 4. Tune learning rate first
339
+ 5. Use distributed training for scale