omgkit 2.20.0 → 2.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +125 -10
- package/package.json +1 -1
- package/plugin/agents/ai-architect-agent.md +282 -0
- package/plugin/agents/data-scientist-agent.md +221 -0
- package/plugin/agents/experiment-analyst-agent.md +318 -0
- package/plugin/agents/ml-engineer-agent.md +165 -0
- package/plugin/agents/mlops-engineer-agent.md +324 -0
- package/plugin/agents/model-optimizer-agent.md +287 -0
- package/plugin/agents/production-engineer-agent.md +360 -0
- package/plugin/agents/research-scientist-agent.md +274 -0
- package/plugin/commands/omgdata/augment.md +86 -0
- package/plugin/commands/omgdata/collect.md +81 -0
- package/plugin/commands/omgdata/label.md +83 -0
- package/plugin/commands/omgdata/split.md +83 -0
- package/plugin/commands/omgdata/validate.md +76 -0
- package/plugin/commands/omgdata/version.md +85 -0
- package/plugin/commands/omgdeploy/ab.md +94 -0
- package/plugin/commands/omgdeploy/cloud.md +89 -0
- package/plugin/commands/omgdeploy/edge.md +93 -0
- package/plugin/commands/omgdeploy/package.md +91 -0
- package/plugin/commands/omgdeploy/serve.md +92 -0
- package/plugin/commands/omgfeature/embed.md +93 -0
- package/plugin/commands/omgfeature/extract.md +93 -0
- package/plugin/commands/omgfeature/select.md +85 -0
- package/plugin/commands/omgfeature/store.md +97 -0
- package/plugin/commands/omgml/init.md +60 -0
- package/plugin/commands/omgml/status.md +82 -0
- package/plugin/commands/omgops/drift.md +87 -0
- package/plugin/commands/omgops/monitor.md +99 -0
- package/plugin/commands/omgops/pipeline.md +102 -0
- package/plugin/commands/omgops/registry.md +109 -0
- package/plugin/commands/omgops/retrain.md +91 -0
- package/plugin/commands/omgoptim/distill.md +90 -0
- package/plugin/commands/omgoptim/profile.md +92 -0
- package/plugin/commands/omgoptim/prune.md +81 -0
- package/plugin/commands/omgoptim/quantize.md +83 -0
- package/plugin/commands/omgtrain/baseline.md +78 -0
- package/plugin/commands/omgtrain/compare.md +99 -0
- package/plugin/commands/omgtrain/evaluate.md +85 -0
- package/plugin/commands/omgtrain/train.md +81 -0
- package/plugin/commands/omgtrain/tune.md +89 -0
- package/plugin/registry.yaml +252 -2
- package/plugin/skills/ml-systems/SKILL.md +65 -0
- package/plugin/skills/ml-systems/ai-accelerators/SKILL.md +342 -0
- package/plugin/skills/ml-systems/data-eng/SKILL.md +126 -0
- package/plugin/skills/ml-systems/deep-learning-primer/SKILL.md +143 -0
- package/plugin/skills/ml-systems/deployment-paradigms/SKILL.md +148 -0
- package/plugin/skills/ml-systems/dnn-architectures/SKILL.md +128 -0
- package/plugin/skills/ml-systems/edge-deployment/SKILL.md +366 -0
- package/plugin/skills/ml-systems/efficient-ai/SKILL.md +316 -0
- package/plugin/skills/ml-systems/feature-engineering/SKILL.md +151 -0
- package/plugin/skills/ml-systems/ml-frameworks/SKILL.md +187 -0
- package/plugin/skills/ml-systems/ml-serving-optimization/SKILL.md +371 -0
- package/plugin/skills/ml-systems/ml-systems-fundamentals/SKILL.md +103 -0
- package/plugin/skills/ml-systems/ml-workflow/SKILL.md +162 -0
- package/plugin/skills/ml-systems/mlops/SKILL.md +386 -0
- package/plugin/skills/ml-systems/model-deployment/SKILL.md +350 -0
- package/plugin/skills/ml-systems/model-dev/SKILL.md +160 -0
- package/plugin/skills/ml-systems/model-optimization/SKILL.md +339 -0
- package/plugin/skills/ml-systems/robust-ai/SKILL.md +395 -0
- package/plugin/skills/ml-systems/training-data/SKILL.md +152 -0
- package/plugin/workflows/ml-systems/data-preparation-workflow.md +276 -0
- package/plugin/workflows/ml-systems/edge-deployment-workflow.md +413 -0
- package/plugin/workflows/ml-systems/full-ml-lifecycle-workflow.md +405 -0
- package/plugin/workflows/ml-systems/hyperparameter-tuning-workflow.md +352 -0
- package/plugin/workflows/ml-systems/mlops-pipeline-workflow.md +384 -0
- package/plugin/workflows/ml-systems/model-deployment-workflow.md +392 -0
- package/plugin/workflows/ml-systems/model-development-workflow.md +218 -0
- package/plugin/workflows/ml-systems/model-evaluation-workflow.md +416 -0
- package/plugin/workflows/ml-systems/model-optimization-workflow.md +390 -0
- package/plugin/workflows/ml-systems/monitoring-drift-workflow.md +446 -0
- package/plugin/workflows/ml-systems/retraining-workflow.md +401 -0
- package/plugin/workflows/ml-systems/training-pipeline-workflow.md +382 -0
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Data Preparation Workflow
|
|
3
|
+
description: Comprehensive data preparation workflow including collection, validation, cleaning, labeling, augmentation, and versioning.
|
|
4
|
+
category: ml-systems
|
|
5
|
+
complexity: medium
|
|
6
|
+
agents:
|
|
7
|
+
- data-scientist-agent
|
|
8
|
+
- ml-engineer-agent
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# Data Preparation Workflow
|
|
12
|
+
|
|
13
|
+
Complete workflow for preparing high-quality training data.
|
|
14
|
+
|
|
15
|
+
## Overview
|
|
16
|
+
|
|
17
|
+
```
|
|
18
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
19
|
+
│ DATA PREPARATION WORKFLOW │
|
|
20
|
+
├─────────────────────────────────────────────────────────────┤
|
|
21
|
+
│ │
|
|
22
|
+
│ 1. COLLECT 2. VALIDATE 3. CLEAN │
|
|
23
|
+
│ ↓ ↓ ↓ │
|
|
24
|
+
│ Gather data Schema checks Handle missing │
|
|
25
|
+
│ Multiple sources Quality gates Remove outliers │
|
|
26
|
+
│ │
|
|
27
|
+
│ 4. LABEL 5. AUGMENT 6. SPLIT & VERSION │
|
|
28
|
+
│ ↓ ↓ ↓ │
|
|
29
|
+
│ Manual/weak Synthetic data Train/val/test │
|
|
30
|
+
│ Active learning Balance classes DVC versioning │
|
|
31
|
+
│ │
|
|
32
|
+
└─────────────────────────────────────────────────────────────┘
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Steps
|
|
36
|
+
|
|
37
|
+
### Step 1: Data Collection
|
|
38
|
+
**Agent**: ml-engineer-agent
|
|
39
|
+
|
|
40
|
+
**Inputs**:
|
|
41
|
+
- Data source configurations
|
|
42
|
+
- Collection requirements
|
|
43
|
+
- Schema definitions
|
|
44
|
+
|
|
45
|
+
**Actions**:
|
|
46
|
+
```bash
|
|
47
|
+
# Collect data from sources
|
|
48
|
+
/omgdata:collect --sources sources.yaml --output raw/
|
|
49
|
+
|
|
50
|
+
# Sources can include:
|
|
51
|
+
# - Databases (SQL, NoSQL)
|
|
52
|
+
# - APIs
|
|
53
|
+
# - File systems
|
|
54
|
+
# - Streaming sources
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
**Outputs**:
|
|
58
|
+
- Raw data files
|
|
59
|
+
- Collection metadata
|
|
60
|
+
- Source lineage
|
|
61
|
+
|
|
62
|
+
### Step 2: Data Validation
|
|
63
|
+
**Agent**: data-scientist-agent
|
|
64
|
+
|
|
65
|
+
**Inputs**:
|
|
66
|
+
- Raw collected data
|
|
67
|
+
- Schema definitions
|
|
68
|
+
- Quality expectations
|
|
69
|
+
|
|
70
|
+
**Actions**:
|
|
71
|
+
```bash
|
|
72
|
+
# Validate against schema
|
|
73
|
+
/omgdata:validate --schema schema.yaml --data raw/data.csv
|
|
74
|
+
|
|
75
|
+
# Validation checks:
|
|
76
|
+
# - Schema compliance
|
|
77
|
+
# - Data types
|
|
78
|
+
# - Value ranges
|
|
79
|
+
# - Null patterns
|
|
80
|
+
# - Referential integrity
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
**Quality Gates**:
|
|
84
|
+
```python
|
|
85
|
+
quality_gates = {
|
|
86
|
+
"completeness": {"threshold": 0.95, "columns": ["id", "target"]},
|
|
87
|
+
"uniqueness": {"threshold": 0.99, "column": "id"},
|
|
88
|
+
"freshness": {"max_age_hours": 24},
|
|
89
|
+
"schema_match": {"strict": True}
|
|
90
|
+
}
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
**Outputs**:
|
|
94
|
+
- Validation report
|
|
95
|
+
- Quality metrics
|
|
96
|
+
- Data anomalies list
|
|
97
|
+
|
|
98
|
+
### Step 3: Data Cleaning
|
|
99
|
+
**Agent**: data-scientist-agent
|
|
100
|
+
|
|
101
|
+
**Inputs**:
|
|
102
|
+
- Validated data
|
|
103
|
+
- Cleaning rules
|
|
104
|
+
- Business logic
|
|
105
|
+
|
|
106
|
+
**Actions**:
|
|
107
|
+
```python
|
|
108
|
+
# Cleaning operations
|
|
109
|
+
cleaning_steps = [
|
|
110
|
+
# Handle missing values
|
|
111
|
+
{"column": "age", "method": "median"},
|
|
112
|
+
{"column": "category", "method": "mode"},
|
|
113
|
+
{"column": "optional_field", "method": "drop_row", "threshold": 0.5},
|
|
114
|
+
|
|
115
|
+
# Remove outliers
|
|
116
|
+
{"column": "amount", "method": "iqr", "factor": 3},
|
|
117
|
+
|
|
118
|
+
# Fix inconsistencies
|
|
119
|
+
{"column": "status", "mapping": {"active": "Active", "ACTIVE": "Active"}},
|
|
120
|
+
|
|
121
|
+
# Type corrections
|
|
122
|
+
{"column": "date", "dtype": "datetime"},
|
|
123
|
+
]
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
**Outputs**:
|
|
127
|
+
- Cleaned dataset
|
|
128
|
+
- Cleaning log
|
|
129
|
+
- Removed records report
|
|
130
|
+
|
|
131
|
+
### Step 4: Data Labeling
|
|
132
|
+
**Agent**: data-scientist-agent
|
|
133
|
+
|
|
134
|
+
**Inputs**:
|
|
135
|
+
- Cleaned data (unlabeled or partially labeled)
|
|
136
|
+
- Labeling guidelines
|
|
137
|
+
- Label schema
|
|
138
|
+
|
|
139
|
+
**Actions**:
|
|
140
|
+
```bash
|
|
141
|
+
# Setup labeling workflow
|
|
142
|
+
/omgdata:label --strategy weak_supervision --output labeled/
|
|
143
|
+
|
|
144
|
+
# Labeling strategies:
|
|
145
|
+
# - Manual labeling (export to Label Studio)
|
|
146
|
+
# - Weak supervision (Snorkel labeling functions)
|
|
147
|
+
# - Active learning (uncertainty sampling)
|
|
148
|
+
# - Semi-supervised (pseudo-labels)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
**Labeling Functions Example**:
|
|
152
|
+
```python
|
|
153
|
+
from snorkel.labeling import labeling_function
|
|
154
|
+
|
|
155
|
+
@labeling_function()
|
|
156
|
+
def lf_keyword_spam(x):
|
|
157
|
+
spam_words = ["free", "winner", "click here"]
|
|
158
|
+
return 1 if any(w in x.text.lower() for w in spam_words) else -1
|
|
159
|
+
|
|
160
|
+
@labeling_function()
|
|
161
|
+
def lf_short_text(x):
|
|
162
|
+
return 1 if len(x.text) < 20 else -1
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
**Outputs**:
|
|
166
|
+
- Labeled dataset
|
|
167
|
+
- Label quality metrics
|
|
168
|
+
- Inter-annotator agreement
|
|
169
|
+
|
|
170
|
+
### Step 5: Data Augmentation
|
|
171
|
+
**Agent**: data-scientist-agent
|
|
172
|
+
|
|
173
|
+
**Inputs**:
|
|
174
|
+
- Labeled dataset
|
|
175
|
+
- Class distribution
|
|
176
|
+
- Augmentation requirements
|
|
177
|
+
|
|
178
|
+
**Actions**:
|
|
179
|
+
```bash
|
|
180
|
+
# Augment data
|
|
181
|
+
/omgdata:augment --strategy smote --ratio 1.0
|
|
182
|
+
|
|
183
|
+
# Augmentation techniques:
|
|
184
|
+
# - SMOTE for tabular (imbalanced classes)
|
|
185
|
+
# - Text augmentation (synonyms, back-translation)
|
|
186
|
+
# - Image augmentation (rotation, flip, color)
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
**Augmentation Config**:
|
|
190
|
+
```python
|
|
191
|
+
augmentation_config = {
|
|
192
|
+
"tabular": {
|
|
193
|
+
"method": "smote",
|
|
194
|
+
"sampling_strategy": "minority",
|
|
195
|
+
"k_neighbors": 5
|
|
196
|
+
},
|
|
197
|
+
"text": {
|
|
198
|
+
"methods": ["synonym_replace", "random_insert"],
|
|
199
|
+
"aug_percent": 0.3
|
|
200
|
+
},
|
|
201
|
+
"image": {
|
|
202
|
+
"transforms": ["horizontal_flip", "rotation", "brightness"],
|
|
203
|
+
"probability": 0.5
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
**Outputs**:
|
|
209
|
+
- Augmented dataset
|
|
210
|
+
- Class balance report
|
|
211
|
+
- Augmentation metadata
|
|
212
|
+
|
|
213
|
+
### Step 6: Split & Version
|
|
214
|
+
**Agent**: ml-engineer-agent
|
|
215
|
+
|
|
216
|
+
**Inputs**:
|
|
217
|
+
- Final processed dataset
|
|
218
|
+
- Split strategy
|
|
219
|
+
- Version metadata
|
|
220
|
+
|
|
221
|
+
**Actions**:
|
|
222
|
+
```bash
|
|
223
|
+
# Split data
|
|
224
|
+
/omgdata:split --strategy stratified --train 0.7 --val 0.15 --test 0.15
|
|
225
|
+
|
|
226
|
+
# Version with DVC
|
|
227
|
+
/omgdata:version --message "v1.2 - Added augmentation, fixed labels"
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
**Split Strategies**:
|
|
231
|
+
```python
|
|
232
|
+
split_strategies = {
|
|
233
|
+
"random": {"stratify": True, "seed": 42},
|
|
234
|
+
"temporal": {"time_column": "date", "train_end": "2024-01-01"},
|
|
235
|
+
"group": {"group_column": "user_id"}, # No data leakage
|
|
236
|
+
}
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
**Outputs**:
|
|
240
|
+
- Train/validation/test splits
|
|
241
|
+
- Version tagged in DVC
|
|
242
|
+
- Split statistics
|
|
243
|
+
|
|
244
|
+
## Checkpoints
|
|
245
|
+
|
|
246
|
+
| Phase | Checkpoint | Criteria |
|
|
247
|
+
|-------|------------|----------|
|
|
248
|
+
| 1 | Data collected | All sources ingested |
|
|
249
|
+
| 2 | Validation passed | Quality gates met |
|
|
250
|
+
| 3 | Data cleaned | No critical issues |
|
|
251
|
+
| 4 | Labels complete | Coverage >95% |
|
|
252
|
+
| 5 | Augmentation done | Classes balanced |
|
|
253
|
+
| 6 | Versioned | DVC commit created |
|
|
254
|
+
|
|
255
|
+
## Artifacts
|
|
256
|
+
|
|
257
|
+
- `raw/` - Raw collected data
|
|
258
|
+
- `processed/` - Cleaned and processed data
|
|
259
|
+
- `labeled/` - Labeled datasets
|
|
260
|
+
- `augmented/` - Augmented data
|
|
261
|
+
- `splits/` - Train/val/test splits
|
|
262
|
+
- `data.dvc` - DVC tracking file
|
|
263
|
+
|
|
264
|
+
## Next Workflows
|
|
265
|
+
|
|
266
|
+
After data preparation:
|
|
267
|
+
- → **model-development-workflow** for model training
|
|
268
|
+
- → **feature-engineering-workflow** for advanced features
|
|
269
|
+
|
|
270
|
+
## Quality Gates
|
|
271
|
+
|
|
272
|
+
- [ ] All steps completed successfully
|
|
273
|
+
- [ ] Metrics meet defined thresholds
|
|
274
|
+
- [ ] Documentation updated
|
|
275
|
+
- [ ] Artifacts versioned and stored
|
|
276
|
+
- [ ] Stakeholder approval obtained
|
|
@@ -0,0 +1,413 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Edge Deployment Workflow
|
|
3
|
+
description: Edge deployment workflow for deploying ML models to mobile devices, embedded systems, and IoT with optimization for resource constraints.
|
|
4
|
+
category: ml-systems
|
|
5
|
+
complexity: medium
|
|
6
|
+
agents:
|
|
7
|
+
- model-optimizer-agent
|
|
8
|
+
- production-engineer-agent
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# Edge Deployment Workflow
|
|
12
|
+
|
|
13
|
+
Deploy ML models to edge devices.
|
|
14
|
+
|
|
15
|
+
## Overview
|
|
16
|
+
|
|
17
|
+
```
|
|
18
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
19
|
+
│ EDGE DEPLOYMENT WORKFLOW │
|
|
20
|
+
├─────────────────────────────────────────────────────────────┤
|
|
21
|
+
│ │
|
|
22
|
+
│ 1. ASSESS 2. OPTIMIZE 3. CONVERT │
|
|
23
|
+
│ TARGET MODEL FORMAT │
|
|
24
|
+
│ ↓ ↓ ↓ │
|
|
25
|
+
│ Hardware specs Quantize TFLite/CoreML │
|
|
26
|
+
│ Constraints Prune ONNX │
|
|
27
|
+
│ Framework Distill TensorRT │
|
|
28
|
+
│ │
|
|
29
|
+
│ 4. PACKAGE 5. DEPLOY 6. VALIDATE │
|
|
30
|
+
│ ↓ ↓ ↓ │
|
|
31
|
+
│ Mobile SDK OTA update Device testing │
|
|
32
|
+
│ Firmware App store Performance │
|
|
33
|
+
│ Container Direct flash Accuracy │
|
|
34
|
+
│ │
|
|
35
|
+
└─────────────────────────────────────────────────────────────┘
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Steps
|
|
39
|
+
|
|
40
|
+
### Step 1: Assess Target
|
|
41
|
+
**Agent**: production-engineer-agent
|
|
42
|
+
|
|
43
|
+
**Inputs**:
|
|
44
|
+
- Target device specifications
|
|
45
|
+
- Use case requirements
|
|
46
|
+
- Connectivity constraints
|
|
47
|
+
|
|
48
|
+
**Actions**:
|
|
49
|
+
```python
|
|
50
|
+
# Device assessment
|
|
51
|
+
target_devices = {
|
|
52
|
+
'mobile_ios': {
|
|
53
|
+
'framework': 'CoreML',
|
|
54
|
+
'compute': ['CPU', 'GPU', 'Neural Engine'],
|
|
55
|
+
'memory_mb': 4096,
|
|
56
|
+
'storage_mb': 500,
|
|
57
|
+
'precision': ['float16', 'int8']
|
|
58
|
+
},
|
|
59
|
+
'mobile_android': {
|
|
60
|
+
'framework': 'TFLite',
|
|
61
|
+
'compute': ['CPU', 'GPU', 'NNAPI'],
|
|
62
|
+
'memory_mb': 3072,
|
|
63
|
+
'storage_mb': 300,
|
|
64
|
+
'precision': ['float16', 'int8']
|
|
65
|
+
},
|
|
66
|
+
'jetson_nano': {
|
|
67
|
+
'framework': 'TensorRT',
|
|
68
|
+
'compute': ['GPU (Maxwell)'],
|
|
69
|
+
'memory_mb': 4096,
|
|
70
|
+
'storage_mb': 2048,
|
|
71
|
+
'precision': ['float16', 'int8']
|
|
72
|
+
},
|
|
73
|
+
'raspberry_pi': {
|
|
74
|
+
'framework': 'TFLite',
|
|
75
|
+
'compute': ['CPU'],
|
|
76
|
+
'memory_mb': 1024,
|
|
77
|
+
'storage_mb': 100,
|
|
78
|
+
'precision': ['int8']
|
|
79
|
+
},
|
|
80
|
+
'coral_edge': {
|
|
81
|
+
'framework': 'TFLite + EdgeTPU',
|
|
82
|
+
'compute': ['Edge TPU'],
|
|
83
|
+
'memory_mb': 1024,
|
|
84
|
+
'storage_mb': 50,
|
|
85
|
+
'precision': ['int8']
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
def assess_deployment_target(model_profile, target):
|
|
90
|
+
constraints = target_devices[target]
|
|
91
|
+
|
|
92
|
+
assessment = {
|
|
93
|
+
'size_ok': model_profile['size_mb'] < constraints['storage_mb'],
|
|
94
|
+
'memory_ok': model_profile['runtime_memory_mb'] < constraints['memory_mb'],
|
|
95
|
+
'precision_supported': model_profile['precision'] in constraints['precision'],
|
|
96
|
+
'framework': constraints['framework'],
|
|
97
|
+
'optimization_needed': []
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
if not assessment['size_ok']:
|
|
101
|
+
assessment['optimization_needed'].append('quantization')
|
|
102
|
+
assessment['optimization_needed'].append('pruning')
|
|
103
|
+
|
|
104
|
+
return assessment
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
**Outputs**:
|
|
108
|
+
- Target assessment
|
|
109
|
+
- Constraint analysis
|
|
110
|
+
- Optimization requirements
|
|
111
|
+
|
|
112
|
+
### Step 2: Optimize Model
|
|
113
|
+
**Agent**: model-optimizer-agent
|
|
114
|
+
|
|
115
|
+
**Inputs**:
|
|
116
|
+
- Original model
|
|
117
|
+
- Target constraints
|
|
118
|
+
- Optimization budget
|
|
119
|
+
|
|
120
|
+
**Actions**:
|
|
121
|
+
```bash
|
|
122
|
+
# Optimize for edge
|
|
123
|
+
/omgdeploy:edge --model model.pt --target mobile_android --optimize full
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
def optimize_for_edge(model, target_size_mb, target_latency_ms):
|
|
128
|
+
optimizations = []
|
|
129
|
+
|
|
130
|
+
# 1. Quantization (always first)
|
|
131
|
+
quantized = quantize_dynamic(model)
|
|
132
|
+
if measure_size(quantized) <= target_size_mb:
|
|
133
|
+
optimizations.append(('quantization', quantized))
|
|
134
|
+
|
|
135
|
+
# 2. Pruning (if still too large)
|
|
136
|
+
if measure_size(quantized) > target_size_mb:
|
|
137
|
+
pruned = iterative_pruning(model, target_sparsity=0.5)
|
|
138
|
+
pruned_quantized = quantize_dynamic(pruned)
|
|
139
|
+
if measure_size(pruned_quantized) <= target_size_mb:
|
|
140
|
+
optimizations.append(('pruning + quantization', pruned_quantized))
|
|
141
|
+
|
|
142
|
+
# 3. Knowledge distillation (for significant reduction)
|
|
143
|
+
if target_size_mb < measure_size(model) / 10:
|
|
144
|
+
student = create_mobile_net(model)
|
|
145
|
+
distilled = knowledge_distill(model, student)
|
|
146
|
+
optimizations.append(('distillation', distilled))
|
|
147
|
+
|
|
148
|
+
# Select best based on accuracy-size tradeoff
|
|
149
|
+
best = max(optimizations, key=lambda x: evaluate(x[1])['accuracy'])
|
|
150
|
+
return best
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
**Outputs**:
|
|
154
|
+
- Optimized model
|
|
155
|
+
- Size/speed improvements
|
|
156
|
+
- Accuracy delta
|
|
157
|
+
|
|
158
|
+
### Step 3: Convert Format
|
|
159
|
+
**Agent**: production-engineer-agent
|
|
160
|
+
|
|
161
|
+
**Inputs**:
|
|
162
|
+
- Optimized model
|
|
163
|
+
- Target framework
|
|
164
|
+
- Conversion config
|
|
165
|
+
|
|
166
|
+
**TFLite Conversion**:
|
|
167
|
+
```python
|
|
168
|
+
import tensorflow as tf
|
|
169
|
+
|
|
170
|
+
def convert_to_tflite(model, calibration_data, quantize='int8'):
|
|
171
|
+
converter = tf.lite.TFLiteConverter.from_saved_model(model)
|
|
172
|
+
converter.optimizations = [tf.lite.Optimize.DEFAULT]
|
|
173
|
+
|
|
174
|
+
if quantize == 'int8':
|
|
175
|
+
def representative_dataset():
|
|
176
|
+
for data in calibration_data:
|
|
177
|
+
yield [data.astype(np.float32)]
|
|
178
|
+
|
|
179
|
+
converter.representative_dataset = representative_dataset
|
|
180
|
+
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
|
|
181
|
+
converter.inference_input_type = tf.uint8
|
|
182
|
+
converter.inference_output_type = tf.uint8
|
|
183
|
+
|
|
184
|
+
elif quantize == 'float16':
|
|
185
|
+
converter.target_spec.supported_types = [tf.float16]
|
|
186
|
+
|
|
187
|
+
tflite_model = converter.convert()
|
|
188
|
+
|
|
189
|
+
# Validate
|
|
190
|
+
interpreter = tf.lite.Interpreter(model_content=tflite_model)
|
|
191
|
+
interpreter.allocate_tensors()
|
|
192
|
+
|
|
193
|
+
return tflite_model
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
**CoreML Conversion**:
|
|
197
|
+
```python
|
|
198
|
+
import coremltools as ct
|
|
199
|
+
|
|
200
|
+
def convert_to_coreml(model, example_input):
|
|
201
|
+
traced = torch.jit.trace(model, example_input)
|
|
202
|
+
|
|
203
|
+
mlmodel = ct.convert(
|
|
204
|
+
traced,
|
|
205
|
+
inputs=[ct.TensorType(shape=example_input.shape)],
|
|
206
|
+
compute_precision=ct.precision.FLOAT16,
|
|
207
|
+
minimum_deployment_target=ct.target.iOS15
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
# Add metadata
|
|
211
|
+
mlmodel.author = "ML Team"
|
|
212
|
+
mlmodel.short_description = "Edge inference model"
|
|
213
|
+
|
|
214
|
+
return mlmodel
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
**TensorRT Conversion (Jetson)**:
|
|
218
|
+
```python
|
|
219
|
+
import tensorrt as trt
|
|
220
|
+
|
|
221
|
+
def convert_to_tensorrt(onnx_path, precision='fp16'):
|
|
222
|
+
logger = trt.Logger(trt.Logger.WARNING)
|
|
223
|
+
builder = trt.Builder(logger)
|
|
224
|
+
network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
|
|
225
|
+
parser = trt.OnnxParser(network, logger)
|
|
226
|
+
|
|
227
|
+
with open(onnx_path, 'rb') as f:
|
|
228
|
+
parser.parse(f.read())
|
|
229
|
+
|
|
230
|
+
config = builder.create_builder_config()
|
|
231
|
+
config.max_workspace_size = 1 << 28 # 256MB
|
|
232
|
+
|
|
233
|
+
if precision == 'fp16':
|
|
234
|
+
config.set_flag(trt.BuilderFlag.FP16)
|
|
235
|
+
|
|
236
|
+
engine = builder.build_engine(network, config)
|
|
237
|
+
return engine
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
**Outputs**:
|
|
241
|
+
- Converted model files
|
|
242
|
+
- Validation results
|
|
243
|
+
- Integration code
|
|
244
|
+
|
|
245
|
+
### Step 4: Package
|
|
246
|
+
**Agent**: production-engineer-agent
|
|
247
|
+
|
|
248
|
+
**Inputs**:
|
|
249
|
+
- Converted model
|
|
250
|
+
- Target platform
|
|
251
|
+
- Distribution method
|
|
252
|
+
|
|
253
|
+
**Mobile SDK Package**:
|
|
254
|
+
```kotlin
|
|
255
|
+
// Android integration
|
|
256
|
+
class ModelInference(context: Context) {
|
|
257
|
+
private val interpreter: Interpreter
|
|
258
|
+
|
|
259
|
+
init {
|
|
260
|
+
val modelBuffer = loadModelFile(context, "model.tflite")
|
|
261
|
+
val options = Interpreter.Options()
|
|
262
|
+
.setNumThreads(4)
|
|
263
|
+
.addDelegate(GpuDelegate())
|
|
264
|
+
interpreter = Interpreter(modelBuffer, options)
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
fun predict(input: FloatArray): FloatArray {
|
|
268
|
+
val output = Array(1) { FloatArray(NUM_CLASSES) }
|
|
269
|
+
interpreter.run(input, output)
|
|
270
|
+
return output[0]
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
```swift
|
|
276
|
+
// iOS integration
|
|
277
|
+
class ModelInference {
|
|
278
|
+
private let model: VNCoreMLModel
|
|
279
|
+
|
|
280
|
+
init() throws {
|
|
281
|
+
let config = MLModelConfiguration()
|
|
282
|
+
config.computeUnits = .all
|
|
283
|
+
let mlModel = try MyModel(configuration: config)
|
|
284
|
+
self.model = try VNCoreMLModel(for: mlModel.model)
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
func predict(image: CGImage) async throws -> [String: Float] {
|
|
288
|
+
// Inference code
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
**Outputs**:
|
|
294
|
+
- SDK package
|
|
295
|
+
- Integration guide
|
|
296
|
+
- Test app
|
|
297
|
+
|
|
298
|
+
### Step 5: Deploy
|
|
299
|
+
**Agent**: mlops-engineer-agent
|
|
300
|
+
|
|
301
|
+
**Inputs**:
|
|
302
|
+
- Packaged model
|
|
303
|
+
- Deployment channel
|
|
304
|
+
- Update strategy
|
|
305
|
+
|
|
306
|
+
**Actions**:
|
|
307
|
+
```bash
|
|
308
|
+
# Deploy to devices
|
|
309
|
+
/omgdeploy:edge --package model-sdk/ --channel production --strategy staged
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
**OTA Update**:
|
|
313
|
+
```python
|
|
314
|
+
class OTAModelManager:
|
|
315
|
+
def __init__(self, base_url, device_id):
|
|
316
|
+
self.base_url = base_url
|
|
317
|
+
self.device_id = device_id
|
|
318
|
+
self.current_version = self.get_local_version()
|
|
319
|
+
|
|
320
|
+
def check_update(self):
|
|
321
|
+
response = requests.get(f"{self.base_url}/version")
|
|
322
|
+
server_version = response.json()['version']
|
|
323
|
+
return server_version > self.current_version
|
|
324
|
+
|
|
325
|
+
def download_update(self):
|
|
326
|
+
response = requests.get(f"{self.base_url}/model")
|
|
327
|
+
model_path = self.save_model(response.content)
|
|
328
|
+
return model_path
|
|
329
|
+
|
|
330
|
+
def apply_update(self, model_path):
|
|
331
|
+
# Validate model
|
|
332
|
+
if self.validate_model(model_path):
|
|
333
|
+
self.swap_model(model_path)
|
|
334
|
+
self.update_version()
|
|
335
|
+
return True
|
|
336
|
+
return False
|
|
337
|
+
```
|
|
338
|
+
|
|
339
|
+
**Outputs**:
|
|
340
|
+
- Deployed models
|
|
341
|
+
- Update mechanism
|
|
342
|
+
- Rollback capability
|
|
343
|
+
|
|
344
|
+
### Step 6: Validate
|
|
345
|
+
**Agent**: experiment-analyst-agent
|
|
346
|
+
|
|
347
|
+
**Inputs**:
|
|
348
|
+
- Deployed model
|
|
349
|
+
- Test devices
|
|
350
|
+
- Acceptance criteria
|
|
351
|
+
|
|
352
|
+
**Actions**:
|
|
353
|
+
```python
|
|
354
|
+
def validate_edge_deployment(model_path, test_data, device_type):
|
|
355
|
+
# Load model based on device
|
|
356
|
+
if device_type == 'tflite':
|
|
357
|
+
interpreter = tf.lite.Interpreter(model_path=model_path)
|
|
358
|
+
interpreter.allocate_tensors()
|
|
359
|
+
|
|
360
|
+
# Accuracy test
|
|
361
|
+
correct = 0
|
|
362
|
+
total = 0
|
|
363
|
+
for x, y in test_data:
|
|
364
|
+
pred = infer(interpreter, x)
|
|
365
|
+
correct += (pred == y)
|
|
366
|
+
total += 1
|
|
367
|
+
|
|
368
|
+
accuracy = correct / total
|
|
369
|
+
|
|
370
|
+
# Latency test
|
|
371
|
+
latencies = []
|
|
372
|
+
for _ in range(100):
|
|
373
|
+
start = time.time()
|
|
374
|
+
infer(interpreter, test_data[0][0])
|
|
375
|
+
latencies.append(time.time() - start)
|
|
376
|
+
|
|
377
|
+
# Power test (if available)
|
|
378
|
+
# Battery impact measurement
|
|
379
|
+
|
|
380
|
+
return {
|
|
381
|
+
'accuracy': accuracy,
|
|
382
|
+
'latency_ms': np.mean(latencies) * 1000,
|
|
383
|
+
'latency_p95_ms': np.percentile(latencies, 95) * 1000,
|
|
384
|
+
'model_size_mb': os.path.getsize(model_path) / 1024 / 1024
|
|
385
|
+
}
|
|
386
|
+
```
|
|
387
|
+
|
|
388
|
+
**Outputs**:
|
|
389
|
+
- Device test results
|
|
390
|
+
- Performance metrics
|
|
391
|
+
- Deployment approval
|
|
392
|
+
|
|
393
|
+
## Artifacts
|
|
394
|
+
|
|
395
|
+
- `models/tflite/` - TFLite models
|
|
396
|
+
- `models/coreml/` - CoreML models
|
|
397
|
+
- `models/tensorrt/` - TensorRT engines
|
|
398
|
+
- `sdk/` - Platform SDKs
|
|
399
|
+
- `docs/integration.md` - Integration guide
|
|
400
|
+
|
|
401
|
+
## Next Workflows
|
|
402
|
+
|
|
403
|
+
After edge deployment:
|
|
404
|
+
- → **monitoring-drift-workflow** for edge monitoring
|
|
405
|
+
- → **retraining-workflow** for model updates
|
|
406
|
+
|
|
407
|
+
## Quality Gates
|
|
408
|
+
|
|
409
|
+
- [ ] All steps completed successfully
|
|
410
|
+
- [ ] Metrics meet defined thresholds
|
|
411
|
+
- [ ] Documentation updated
|
|
412
|
+
- [ ] Artifacts versioned and stored
|
|
413
|
+
- [ ] Stakeholder approval obtained
|