@defai.digital/cli 13.4.4 → 13.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/bundled/agents/architect.json +117 -0
  2. package/bundled/agents/auditor.json +114 -0
  3. package/bundled/agents/bug-hunter.json +128 -0
  4. package/bundled/agents/builder.json +128 -0
  5. package/bundled/agents/ceo.json +6 -1
  6. package/bundled/agents/executor.json +150 -0
  7. package/bundled/agents/fullstack.json +10 -2
  8. package/bundled/agents/operator.json +119 -0
  9. package/bundled/agents/researcher.json +42 -13
  10. package/bundled/agents/reviewer.json +90 -42
  11. package/bundled/templates/monorepo/contract-index.ts.hbs +7 -0
  12. package/bundled/templates/monorepo/contract-test.ts.hbs +130 -0
  13. package/bundled/templates/monorepo/contracts-package.json.hbs +29 -0
  14. package/bundled/templates/monorepo/domain-index.ts.hbs +115 -0
  15. package/bundled/templates/monorepo/domain-package.json.hbs +27 -0
  16. package/bundled/templates/monorepo/gitignore.hbs +32 -0
  17. package/bundled/templates/monorepo/invariants.md.hbs +43 -0
  18. package/bundled/templates/monorepo/package.json.hbs +28 -0
  19. package/bundled/templates/monorepo/pnpm-workspace.yaml.hbs +5 -0
  20. package/bundled/templates/monorepo/schema.ts.hbs +82 -0
  21. package/bundled/templates/monorepo/template.json +106 -0
  22. package/bundled/templates/monorepo/tsconfig.json.hbs +22 -0
  23. package/bundled/templates/standalone/contract-index.ts.hbs +5 -0
  24. package/bundled/templates/standalone/contract-test.ts.hbs +95 -0
  25. package/bundled/templates/standalone/contracts-root-index.ts.hbs +7 -0
  26. package/bundled/templates/standalone/domain-index.ts.hbs +6 -0
  27. package/bundled/templates/standalone/domain-repository.ts.hbs +44 -0
  28. package/bundled/templates/standalone/domain-service.ts.hbs +102 -0
  29. package/bundled/templates/standalone/gitignore.hbs +27 -0
  30. package/bundled/templates/standalone/invariants.md.hbs +35 -0
  31. package/bundled/templates/standalone/package.json.hbs +41 -0
  32. package/bundled/templates/standalone/schema.ts.hbs +61 -0
  33. package/bundled/templates/standalone/src-index.ts.hbs +11 -0
  34. package/bundled/templates/standalone/template.json +91 -0
  35. package/bundled/templates/standalone/tsconfig.json.hbs +20 -0
  36. package/bundled/templates/standalone/vitest.config.ts.hbs +8 -0
  37. package/bundled/workflows/adversarial-debate.yaml +222 -0
  38. package/bundled/workflows/analyst.yaml +115 -0
  39. package/bundled/workflows/assistant.yaml +74 -0
  40. package/bundled/workflows/code-review-discussion.yaml +166 -0
  41. package/bundled/workflows/code-reviewer.yaml +94 -0
  42. package/bundled/workflows/contract-first-project.yaml +356 -0
  43. package/bundled/workflows/debugger.yaml +107 -0
  44. package/bundled/workflows/designer.yaml +113 -0
  45. package/bundled/workflows/developer.yaml +105 -0
  46. package/bundled/workflows/discuss-step-examples.yaml +153 -0
  47. package/bundled/workflows/infrastructure-automation.yaml +283 -0
  48. package/bundled/workflows/ml-ab-testing.yaml +311 -0
  49. package/bundled/workflows/ml-experiment-tracker.yaml +150 -0
  50. package/bundled/workflows/ml-feature-engineering.yaml +242 -0
  51. package/bundled/workflows/ml-model-evaluation.yaml +234 -0
  52. package/bundled/workflows/ml-model-monitoring.yaml +227 -0
  53. package/bundled/workflows/ml-model-registry.yaml +232 -0
  54. package/bundled/workflows/mlops-deployment.yaml +267 -0
  55. package/bundled/workflows/mobile-development.yaml +312 -0
  56. package/bundled/workflows/multi-model-discussion.yaml +243 -0
  57. package/bundled/workflows/product-discovery.yaml +295 -0
  58. package/bundled/workflows/qa-specialist.yaml +116 -0
  59. package/bundled/workflows/refactoring.yaml +105 -0
  60. package/bundled/workflows/security-audit.yaml +135 -0
  61. package/bundled/workflows/std/analysis.yaml +190 -0
  62. package/bundled/workflows/std/code-review.yaml +117 -0
  63. package/bundled/workflows/std/debugging.yaml +155 -0
  64. package/bundled/workflows/std/documentation.yaml +180 -0
  65. package/bundled/workflows/std/implementation.yaml +197 -0
  66. package/bundled/workflows/std/refactoring.yaml +180 -0
  67. package/bundled/workflows/std/testing.yaml +200 -0
  68. package/bundled/workflows/strategic-planning.yaml +235 -0
  69. package/bundled/workflows/technology-research.yaml +239 -0
  70. package/dist/bootstrap.d.ts.map +1 -1
  71. package/dist/bootstrap.js +10 -6
  72. package/dist/bootstrap.js.map +1 -1
  73. package/dist/commands/discuss.d.ts.map +1 -1
  74. package/dist/commands/discuss.js +4 -1
  75. package/dist/commands/discuss.js.map +1 -1
  76. package/dist/commands/doctor.d.ts +1 -1
  77. package/dist/commands/doctor.js +3 -3
  78. package/dist/commands/doctor.js.map +1 -1
  79. package/dist/commands/init.d.ts.map +1 -1
  80. package/dist/commands/init.js +65 -5
  81. package/dist/commands/init.js.map +1 -1
  82. package/dist/commands/monitor.d.ts.map +1 -1
  83. package/dist/commands/monitor.js +29 -1
  84. package/dist/commands/monitor.js.map +1 -1
  85. package/dist/commands/scaffold.d.ts.map +1 -1
  86. package/dist/commands/scaffold.js +6 -3
  87. package/dist/commands/scaffold.js.map +1 -1
  88. package/dist/commands/setup.d.ts.map +1 -1
  89. package/dist/commands/setup.js +119 -3
  90. package/dist/commands/setup.js.map +1 -1
  91. package/dist/commands/status.d.ts +10 -0
  92. package/dist/commands/status.d.ts.map +1 -1
  93. package/dist/commands/status.js +151 -49
  94. package/dist/commands/status.js.map +1 -1
  95. package/dist/commands/update.d.ts.map +1 -1
  96. package/dist/commands/update.js +1 -43
  97. package/dist/commands/update.js.map +1 -1
  98. package/dist/web/api.d.ts +18 -0
  99. package/dist/web/api.d.ts.map +1 -1
  100. package/dist/web/api.js +480 -39
  101. package/dist/web/api.js.map +1 -1
  102. package/dist/web/dashboard.d.ts.map +1 -1
  103. package/dist/web/dashboard.js +1449 -132
  104. package/dist/web/dashboard.js.map +1 -1
  105. package/package.json +21 -21
@@ -0,0 +1,311 @@
1
+ workflowId: ml-ab-testing
2
+ name: Model A/B Testing
3
+ description: Statistical comparison of model variants in production
4
+ version: "1.0.0"
5
+ category: machine-learning
6
+ tags:
7
+ - ml
8
+ - ab-testing
9
+ - experimentation
10
+ - statistics
11
+
12
+ metadata:
13
+ requiredAbilities:
14
+ - machine-learning
15
+ - statistical-analysis
16
+ - data-analysis
17
+ estimatedDuration: 300
18
+ complexity: high
19
+
20
+ steps:
21
+ - stepId: experiment-design
22
+ name: Design A/B Experiment
23
+ type: prompt
24
+ timeout: 180000
25
+ config:
26
+ agent: data-scientist
27
+ task: |
28
+ Design a statistically rigorous A/B test for model comparison.
29
+
30
+ ## Hypothesis Definition
31
+
32
+ 1. **Null Hypothesis (H0)**:
33
+ - State: "There is no difference between model A and model B"
34
+
35
+ 2. **Alternative Hypothesis (H1)**:
36
+ - State: "Model B performs better than model A"
37
+ - Test type: one-tailed or two-tailed
38
+
39
+ ## Sample Size Calculation
40
+
41
+ Given:
42
+ - Baseline conversion rate / metric: {{baseline_rate}}
43
+ - Minimum detectable effect (MDE): {{mde}}
44
+ - Significance level (alpha): 0.05
45
+ - Statistical power (1-beta): 0.80
46
+
47
+ Calculate:
48
+ - Required sample size per variant
49
+ - Estimated test duration based on traffic
50
+
51
+ ## Metric Definition
52
+
53
+ 1. **Primary Metric**:
54
+ - Metric name
55
+ - Definition
56
+ - Success direction (higher/lower is better)
57
+
58
+ 2. **Secondary Metrics**:
59
+ - List of supporting metrics
60
+
61
+ 3. **Guardrail Metrics**:
62
+ - Metrics that must not degrade
63
+ - Degradation threshold
64
+
65
+ ## Randomization Strategy
66
+ - Randomization unit (user, session, request)
67
+ - Stratification variables (if any)
68
+ - Holdout percentage
69
+
70
+ - stepId: traffic-allocation
71
+ name: Define Traffic Allocation
72
+ type: prompt
73
+ timeout: 60000
74
+ config:
75
+ agent: ml-engineer
76
+ task: |
77
+ Plan the traffic allocation strategy.
78
+
79
+ ## Traffic Split
80
+
81
+ | Variant | Description | Traffic % |
82
+ |---------|-------------|-----------|
83
+ | Control | Current production model | {{control_pct}}% |
84
+ | Treatment | New model candidate | {{treatment_pct}}% |
85
+ | Holdout | No model (baseline) | {{holdout_pct}}% |
86
+
87
+ ## Ramping Strategy
88
+
89
+ - **Day 1-2**: 1% treatment (burn-in period)
90
+ - **Day 3-7**: 10% treatment (early signal)
91
+ - **Day 8-14**: 50% treatment (full test)
92
+
93
+ ## Safeguards
94
+
95
+ 1. **Auto-Rollback Triggers**:
96
+ - Error rate > {{error_threshold}}%
97
+ - Latency P99 > {{latency_threshold}}ms
98
+ - Guardrail metric degradation > {{guardrail_threshold}}%
99
+
100
+ 2. **Manual Review Triggers**:
101
+ - Unusual traffic patterns
102
+ - Unexpected metric movements
103
+
104
+ - stepId: store-experiment-config
105
+ name: Store Experiment Configuration
106
+ type: tool
107
+ timeout: 10000
108
+ tool: memory_store
109
+ config:
110
+ namespace: ml-ab-experiments
111
+ key: "{{experiment_id}}/config"
112
+ value:
113
+ experiment_id: "{{experiment_id}}"
114
+ experiment_name: "{{experiment_name}}"
115
+ status: "configured"
116
+ hypothesis:
117
+ null_hypothesis: "{{h0}}"
118
+ alternative_hypothesis: "{{h1}}"
119
+ variants:
120
+ control:
121
+ model_name: "{{control_model}}"
122
+ model_version: "{{control_version}}"
123
+ traffic_percent: "{{control_pct}}"
124
+ treatment:
125
+ model_name: "{{treatment_model}}"
126
+ model_version: "{{treatment_version}}"
127
+ traffic_percent: "{{treatment_pct}}"
128
+ metrics:
129
+ primary: "{{primary_metric}}"
130
+ secondary: "{{secondary_metrics}}"
131
+ guardrails: "{{guardrail_metrics}}"
132
+ statistical_params:
133
+ significance_level: 0.05
134
+ power: 0.80
135
+ mde: "{{mde}}"
136
+ required_sample_size: "{{sample_size}}"
137
+ schedule:
138
+ start_date: "{{start_date}}"
139
+ estimated_end_date: "{{end_date}}"
140
+ ramping_schedule: "{{ramp_schedule}}"
141
+ created_at: "{{timestamp}}"
142
+ created_by: "{{user}}"
143
+
144
+ - stepId: monitor-experiment
145
+ name: Monitor Running Experiment
146
+ type: prompt
147
+ timeout: 120000
148
+ config:
149
+ agent: data-scientist
150
+ task: |
151
+ Monitor the running A/B experiment for issues.
152
+
153
+ ## Health Checks
154
+
155
+ 1. **Sample Ratio Mismatch (SRM)**:
156
+ - Expected ratio: {{expected_ratio}}
157
+ - Observed ratio: {{observed_ratio}}
158
+ - Chi-squared test for SRM
159
+ - ALERT if p-value < 0.001
160
+
161
+ 2. **Traffic Distribution**:
162
+ - Verify randomization is working
163
+ - Check for selection bias
164
+
165
+ 3. **Guardrail Metrics**:
166
+ - Current values vs baseline
167
+ - Flag any degradation
168
+
169
+ 4. **Data Quality**:
170
+ - Missing data rate
171
+ - Logging issues
172
+
173
+ ## Early Stopping Check
174
+
175
+ - **Sequential testing**: Can we stop early?
176
+ - Current confidence level
177
+ - Projected final sample size
178
+
179
+ ## Issues Found
180
+ - List any anomalies
181
+ - Recommended actions
182
+
183
+ - stepId: analyze-results
184
+ name: Analyze Experiment Results
185
+ type: prompt
186
+ timeout: 180000
187
+ config:
188
+ agent: data-scientist
189
+ task: |
190
+ Perform statistical analysis of A/B test results.
191
+
192
+ ## Primary Metric Analysis
193
+
194
+ 1. **Descriptive Statistics**:
195
+ | Variant | N | Mean | Std | Median |
196
+ |---------|---|------|-----|--------|
197
+ | Control | | | | |
198
+ | Treatment | | | | |
199
+
200
+ 2. **Effect Size**:
201
+ - Absolute difference: {{treatment_mean}} - {{control_mean}}
202
+ - Relative lift: (T - C) / C * 100%
203
+ - 95% Confidence Interval: [{{ci_lower}}, {{ci_upper}}]
204
+
205
+ 3. **Statistical Significance**:
206
+ - Test used: t-test / Mann-Whitney / Chi-squared
207
+ - Test statistic: {{test_stat}}
208
+ - P-value: {{p_value}}
209
+ - Significant at alpha=0.05: Yes/No
210
+
211
+ 4. **Practical Significance**:
212
+ - Is the effect size meaningful?
213
+ - Does it meet MDE threshold?
214
+
215
+ ## Secondary Metrics
216
+
217
+ | Metric | Control | Treatment | Lift | P-value | Significant |
218
+ |--------|---------|-----------|------|---------|-------------|
219
+
220
+ ## Guardrail Metrics
221
+
222
+ | Metric | Control | Treatment | Change | Status |
223
+ |--------|---------|-----------|--------|--------|
224
+
225
+ ## Segment Analysis
226
+
227
+ Analyze effect by key segments:
228
+ - New vs returning users
229
+ - Mobile vs desktop
230
+ - Geographic regions
231
+ - User cohorts
232
+
233
+ ## Novelty/Primacy Effects
234
+
235
+ - Plot metric over time
236
+ - Check for effect decay
237
+ - Estimate steady-state effect
238
+
239
+ - stepId: make-decision
240
+ name: Make Experiment Decision
241
+ type: prompt
242
+ timeout: 120000
243
+ config:
244
+ agent: ml-engineer
245
+ task: |
246
+ Based on analysis, make a decision about the experiment.
247
+
248
+ ## Decision Framework
249
+
250
+ | Significance | Practical Impact | Guardrails | Decision |
251
+ |--------------|------------------|------------|----------|
252
+ | Yes | Positive | Pass | SHIP |
253
+ | Yes | Negative | - | ROLLBACK |
254
+ | No | - | Pass | EXTEND or NO_SHIP |
255
+ | - | - | Fail | ROLLBACK |
256
+
257
+ ## Decision
258
+
259
+ **Recommendation**: {{decision}}
260
+
261
+ **Confidence**: HIGH / MEDIUM / LOW
262
+
263
+ **Reasoning**:
264
+ - Statistical evidence: {{stat_reasoning}}
265
+ - Business impact: {{business_reasoning}}
266
+ - Risk assessment: {{risk_reasoning}}
267
+
268
+ ## If SHIP
269
+ - Rollout plan (gradual vs immediate)
270
+ - Monitoring requirements
271
+ - Success criteria for full rollout
272
+
273
+ ## If ROLLBACK
274
+ - Root cause hypothesis
275
+ - Recommended investigation
276
+ - Next experiment suggestions
277
+
278
+ ## If EXTEND
279
+ - Additional sample size needed
280
+ - New end date
281
+ - Any design changes
282
+
283
+ - stepId: store-results
284
+ name: Store Experiment Results
285
+ type: tool
286
+ timeout: 10000
287
+ tool: memory_store
288
+ config:
289
+ namespace: ml-ab-experiments
290
+ key: "{{experiment_id}}/results"
291
+ ttl: 31536000
292
+ value:
293
+ experiment_id: "{{experiment_id}}"
294
+ status: "completed"
295
+ results:
296
+ primary_metric:
297
+ control_mean: "{{control_mean}}"
298
+ treatment_mean: "{{treatment_mean}}"
299
+ relative_lift: "{{lift}}"
300
+ confidence_interval: "{{ci}}"
301
+ p_value: "{{p_value}}"
302
+ significant: "{{significant}}"
303
+ secondary_metrics: "{{secondary_results}}"
304
+ guardrail_metrics: "{{guardrail_results}}"
305
+ segment_analysis: "{{segment_results}}"
306
+ decision:
307
+ recommendation: "{{decision}}"
308
+ confidence: "{{confidence}}"
309
+ reasoning: "{{reasoning}}"
310
+ completed_at: "{{timestamp}}"
311
+ decided_by: "{{user}}"
@@ -0,0 +1,150 @@
1
+ workflowId: ml-experiment-tracker
2
+ name: ML Experiment Tracker
3
+ description: Track, compare, and manage ML experiments for reproducibility
4
+ version: "1.0.0"
5
+ category: machine-learning
6
+ tags:
7
+ - ml
8
+ - experiments
9
+ - tracking
10
+ - reproducibility
11
+
12
+ metadata:
13
+ requiredAbilities:
14
+ - machine-learning
15
+ - statistical-analysis
16
+ - data-analysis
17
+ estimatedDuration: 300
18
+ complexity: medium
19
+
20
+ steps:
21
+ - stepId: define-experiment
22
+ name: Define Experiment
23
+ type: prompt
24
+ timeout: 60000
25
+ config:
26
+ agent: ml-engineer
27
+ task: |
28
+ Define the ML experiment with the following structure:
29
+
30
+ ## Experiment Definition
31
+
32
+ 1. **Hypothesis**: What are we testing?
33
+ 2. **Baseline Model**: What is the current best model?
34
+ 3. **Metrics to Track**:
35
+ - Primary metric (optimization target)
36
+ - Secondary metrics (guardrails)
37
+ 4. **Success Criteria**: What improvement is significant?
38
+ 5. **Dataset Version**: Which data split are we using?
39
+
40
+ Provide structured output for logging.
41
+
42
+ - stepId: log-parameters
43
+ name: Log Experiment Parameters
44
+ type: tool
45
+ timeout: 10000
46
+ tool: memory_store
47
+ config:
48
+ namespace: ml-experiments
49
+ key: "{{experiment_id}}/parameters"
50
+ value:
51
+ experiment_id: "{{experiment_id}}"
52
+ experiment_name: "{{experiment_name}}"
53
+ hypothesis: "{{hypothesis}}"
54
+ model_type: "{{model_type}}"
55
+ hyperparameters: "{{hyperparameters}}"
56
+ dataset_version: "{{dataset_version}}"
57
+ baseline_model: "{{baseline_model}}"
58
+ success_criteria: "{{success_criteria}}"
59
+ created_at: "{{timestamp}}"
60
+ created_by: "{{user}}"
61
+ status: "running"
62
+
63
+ - stepId: log-metrics
64
+ name: Log Training Metrics
65
+ type: tool
66
+ timeout: 10000
67
+ tool: memory_store
68
+ config:
69
+ namespace: ml-experiments
70
+ key: "{{experiment_id}}/metrics"
71
+ value:
72
+ experiment_id: "{{experiment_id}}"
73
+ metrics:
74
+ accuracy: "{{accuracy}}"
75
+ precision: "{{precision}}"
76
+ recall: "{{recall}}"
77
+ f1_score: "{{f1_score}}"
78
+ auc_roc: "{{auc_roc}}"
79
+ loss: "{{loss}}"
80
+ training_metrics:
81
+ training_time_seconds: "{{training_time}}"
82
+ epochs_completed: "{{epochs}}"
83
+ early_stopping_epoch: "{{early_stop_epoch}}"
84
+ resource_usage:
85
+ peak_memory_mb: "{{peak_memory}}"
86
+ gpu_utilization: "{{gpu_util}}"
87
+ logged_at: "{{timestamp}}"
88
+
89
+ - stepId: compare-experiments
90
+ name: Compare with Baseline
91
+ type: prompt
92
+ timeout: 120000
93
+ config:
94
+ agent: data-scientist
95
+ task: |
96
+ Compare experiment {{experiment_id}} against the baseline model.
97
+
98
+ ## Analysis Required
99
+
100
+ 1. **Metric Comparison**:
101
+ - Calculate absolute and relative improvement
102
+ - For each metric: baseline vs experiment
103
+
104
+ 2. **Statistical Significance**:
105
+ - Is the improvement statistically significant?
106
+ - Calculate p-value if applicable
107
+ - Report confidence intervals
108
+
109
+ 3. **Trade-off Analysis**:
110
+ - Accuracy vs inference latency
111
+ - Model complexity vs performance
112
+ - Training cost vs improvement
113
+
114
+ 4. **Recommendation**:
115
+ - PROMOTE: Significant improvement, ready for production
116
+ - ITERATE: Promising but needs refinement
117
+ - REJECT: No improvement or regression
118
+
119
+ Provide structured recommendation with justification.
120
+
121
+ - stepId: update-status
122
+ name: Update Experiment Status
123
+ type: tool
124
+ timeout: 10000
125
+ tool: memory_store
126
+ config:
127
+ namespace: ml-experiments
128
+ key: "{{experiment_id}}/status"
129
+ value:
130
+ experiment_id: "{{experiment_id}}"
131
+ status: "{{recommendation}}"
132
+ comparison_summary: "{{comparison_summary}}"
133
+ statistical_significance: "{{p_value}}"
134
+ recommendation_rationale: "{{rationale}}"
135
+ completed_at: "{{timestamp}}"
136
+
137
+ - stepId: store-comparison-report
138
+ name: Store Comparison Report
139
+ type: tool
140
+ timeout: 10000
141
+ tool: memory_store
142
+ config:
143
+ namespace: ml-experiment-reports
144
+ key: "{{experiment_id}}/comparison"
145
+ ttl: 7776000
146
+ value:
147
+ experiment_id: "{{experiment_id}}"
148
+ baseline_id: "{{baseline_model}}"
149
+ full_report: "{{comparison_report}}"
150
+ created_at: "{{timestamp}}"
@@ -0,0 +1,242 @@
1
+ workflowId: ml-feature-engineering
2
+ name: Feature Engineering Pipeline
3
+ description: Systematic feature creation, validation, and selection
4
+ version: "1.0.0"
5
+ category: machine-learning
6
+ tags:
7
+ - ml
8
+ - features
9
+ - feature-engineering
10
+ - feature-selection
11
+
12
+ metadata:
13
+ requiredAbilities:
14
+ - machine-learning
15
+ - data-analysis
16
+ - feature-engineering
17
+ estimatedDuration: 600
18
+ complexity: high
19
+
20
+ steps:
21
+ - stepId: feature-ideation
22
+ name: Feature Ideation
23
+ type: prompt
24
+ timeout: 180000
25
+ config:
26
+ agent: data-scientist
27
+ task: |
28
+ Given the prediction target and available data, brainstorm candidate features.
29
+
30
+ ## Feature Categories to Consider
31
+
32
+ 1. **Raw Features**:
33
+ - Direct columns from source data
34
+ - Basic transformations (log, sqrt, power)
35
+
36
+ 2. **Domain Knowledge Features**:
37
+ - Business logic features
38
+ - Industry-specific indicators
39
+ - Expert-derived calculations
40
+
41
+ 3. **Interaction Features**:
42
+ - Feature products (A * B)
43
+ - Feature ratios (A / B)
44
+ - Feature differences (A - B)
45
+
46
+ 4. **Time-Based Features** (if temporal data):
47
+ - Lag features
48
+ - Rolling statistics (mean, std, min, max)
49
+ - Time since event
50
+ - Seasonal indicators
51
+ - Trend features
52
+
53
+ 5. **Aggregation Features**:
54
+ - Group-by statistics
55
+ - Entity-level aggregations
56
+ - Window aggregations
57
+
58
+ 6. **Text Features** (if text data):
59
+ - TF-IDF
60
+ - Word embeddings
61
+ - Sentiment scores
62
+ - Named entities
63
+
64
+ 7. **Categorical Encodings**:
65
+ - One-hot encoding
66
+ - Target encoding
67
+ - Frequency encoding
68
+ - Embedding encoding
69
+
70
+ ## Output Format
71
+ For each candidate feature:
72
+ - Feature name
73
+ - Description
74
+ - Calculation logic
75
+ - Expected predictive value (hypothesis)
76
+ - Implementation complexity (low/medium/high)
77
+
78
+ - stepId: feature-implementation
79
+ name: Implement Features
80
+ type: prompt
81
+ timeout: 300000
82
+ config:
83
+ agent: data-scientist
84
+ task: |
85
+ Implement the candidate features with production-quality code.
86
+
87
+ ## For Each Feature
88
+
89
+ 1. **Transformation Code**:
90
+ ```python
91
+ def compute_feature_name(df: pd.DataFrame) -> pd.Series:
92
+ '''
93
+ Description of what this feature represents.
94
+
95
+ Args:
96
+ df: Input dataframe with required columns
97
+
98
+ Returns:
99
+ pd.Series: Computed feature values
100
+ '''
101
+ # Implementation
102
+ pass
103
+ ```
104
+
105
+ 2. **Missing Value Handling**:
106
+ - Strategy: drop / fill_mean / fill_median / fill_mode / fill_constant
107
+ - Justification for chosen strategy
108
+
109
+ 3. **Edge Case Handling**:
110
+ - Division by zero
111
+ - Negative values for log transforms
112
+ - Null propagation
113
+
114
+ 4. **Data Type**:
115
+ - Output dtype
116
+ - Value range
117
+
118
+ ## Feature Pipeline
119
+ Create a combined feature pipeline function that:
120
+ - Takes raw data
121
+ - Applies all transformations
122
+ - Returns feature matrix
123
+
124
+ - stepId: feature-validation
125
+ name: Validate Features
126
+ type: prompt
127
+ timeout: 180000
128
+ config:
129
+ agent: data-scientist
130
+ task: |
131
+ Validate feature quality and check for common issues.
132
+
133
+ ## Data Leakage Check
134
+
135
+ 1. **Target Leakage**:
136
+ - Does any feature contain information from the target?
137
+ - Is any feature computed using future data?
138
+ - Check correlation with target on train vs test
139
+
140
+ 2. **Train-Test Leakage**:
141
+ - Are features computed using test data statistics?
142
+ - Is any global normalization applied before split?
143
+
144
+ ## Feature Quality Checks
145
+
146
+ 1. **Missing Values**:
147
+ - Missing rate per feature
148
+ - Flag features with >50% missing
149
+
150
+ 2. **Cardinality**:
151
+ - Unique value count
152
+ - Flag high-cardinality categoricals
153
+
154
+ 3. **Distribution**:
155
+ - Check for extreme skewness
156
+ - Identify outliers
157
+ - Check for constant features
158
+
159
+ 4. **Correlation Analysis**:
160
+ - Correlation with target
161
+ - Inter-feature correlation matrix
162
+ - Flag highly correlated pairs (>0.95)
163
+
164
+ ## Validation Report
165
+ For each feature:
166
+ - Leakage risk: SAFE / WARNING / DANGEROUS
167
+ - Quality score: 1-10
168
+ - Issues found
169
+ - Recommendations
170
+
171
+ - stepId: feature-selection
172
+ name: Select Final Features
173
+ type: prompt
174
+ timeout: 180000
175
+ config:
176
+ agent: ml-engineer
177
+ task: |
178
+ Select the final feature set for modeling.
179
+
180
+ ## Selection Methods
181
+
182
+ 1. **Importance-Based Selection**:
183
+ - SHAP values from baseline model
184
+ - Permutation importance
185
+ - Random forest feature importance
186
+
187
+ 2. **Statistical Selection**:
188
+ - Mutual information with target
189
+ - Chi-squared test (categorical)
190
+ - ANOVA F-value (continuous)
191
+
192
+ 3. **Iterative Selection**:
193
+ - Forward selection results
194
+ - Backward elimination results
195
+ - Recursive feature elimination
196
+
197
+ ## Selection Criteria
198
+
199
+ - Remove features with:
200
+ - Near-zero variance
201
+ - High missing rate (>50%)
202
+ - High correlation with other features (>0.95)
203
+ - Low importance (bottom 10%)
204
+ - Data leakage risk
205
+
206
+ - Keep features with:
207
+ - High target correlation
208
+ - Domain importance
209
+ - Unique information
210
+
211
+ ## Final Feature Set
212
+
213
+ | Feature | Importance Rank | Rationale |
214
+ |---------|-----------------|-----------|
215
+ | feature_1 | 1 | ... |
216
+ | feature_2 | 2 | ... |
217
+
218
+ Total features selected: X out of Y candidates
219
+
220
+ - stepId: store-feature-definitions
221
+ name: Store Feature Definitions
222
+ type: tool
223
+ timeout: 10000
224
+ tool: memory_store
225
+ config:
226
+ namespace: ml-feature-store
227
+ key: "{{feature_set_name}}/{{version}}"
228
+ value:
229
+ feature_set_name: "{{feature_set_name}}"
230
+ version: "{{version}}"
231
+ description: "{{description}}"
232
+ target_variable: "{{target}}"
233
+ features: "{{feature_definitions}}"
234
+ validation_results:
235
+ leakage_check: "{{leakage_status}}"
236
+ quality_scores: "{{quality_scores}}"
237
+ selection_summary:
238
+ total_candidates: "{{total_candidates}}"
239
+ selected_count: "{{selected_count}}"
240
+ selection_method: "{{selection_method}}"
241
+ created_at: "{{timestamp}}"
242
+ created_by: "{{user}}"