musubi-sdd 3.0.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/bin/musubi-browser.js +0 -0
  2. package/bin/musubi-convert.js +0 -0
  3. package/bin/musubi-gui.js +0 -0
  4. package/bin/musubi-validate.js +0 -10
  5. package/package.json +1 -1
  6. package/src/templates/agents/claude-code/skills/ai-ml-engineer/mlops-guide.md +350 -0
  7. package/src/templates/agents/claude-code/skills/ai-ml-engineer/model-card-template.md +246 -0
  8. package/src/templates/agents/claude-code/skills/api-designer/api-patterns.md +336 -0
  9. package/src/templates/agents/claude-code/skills/api-designer/openapi-template.md +376 -0
  10. package/src/templates/agents/claude-code/skills/bug-hunter/root-cause-analysis.md +177 -0
  11. package/src/templates/agents/claude-code/skills/change-impact-analyzer/dependency-graph-patterns.md +348 -0
  12. package/src/templates/agents/claude-code/skills/change-impact-analyzer/impact-analysis-template.md +246 -0
  13. package/src/templates/agents/claude-code/skills/cloud-architect/aws-patterns.md +239 -0
  14. package/src/templates/agents/claude-code/skills/cloud-architect/azure-patterns.md +300 -0
  15. package/src/templates/agents/claude-code/skills/cloud-architect/terraform-templates/azure-webapp.tf +337 -0
  16. package/src/templates/agents/claude-code/skills/code-reviewer/best-practices.md +155 -0
  17. package/src/templates/agents/claude-code/skills/code-reviewer/review-checklist.md +184 -0
  18. package/src/templates/agents/claude-code/skills/code-reviewer/review-standards.md +272 -0
  19. package/src/templates/agents/claude-code/skills/constitution-enforcer/constitutional-articles.md +449 -0
  20. package/src/templates/agents/claude-code/skills/constitution-enforcer/phase-minus-one-gates.md +375 -0
  21. package/src/templates/agents/claude-code/skills/database-administrator/backup-recovery.md +331 -0
  22. package/src/templates/agents/claude-code/skills/database-administrator/tuning-guide.md +314 -0
  23. package/src/templates/agents/claude-code/skills/database-schema-designer/schema-patterns.md +335 -0
  24. package/src/templates/agents/claude-code/skills/devops-engineer/ci-cd-templates.md +443 -0
  25. package/src/templates/agents/claude-code/skills/devops-engineer/pipeline-templates/github-actions.yml +311 -0
  26. package/src/templates/agents/claude-code/skills/devops-engineer/pipeline-templates/gitlab-ci.yml +255 -0
  27. package/src/templates/agents/claude-code/skills/orchestrator/patterns.md +266 -0
  28. package/src/templates/agents/claude-code/skills/orchestrator/selection-matrix.md +185 -0
  29. package/src/templates/agents/claude-code/skills/performance-engineer/optimization-playbook.md +306 -0
  30. package/src/templates/agents/claude-code/skills/performance-optimizer/benchmark-template.md +272 -0
  31. package/src/templates/agents/claude-code/skills/performance-optimizer/optimization-patterns.md +273 -0
  32. package/src/templates/agents/claude-code/skills/project-manager/agile-ceremonies.md +283 -0
  33. package/src/templates/agents/claude-code/skills/project-manager/project-templates.md +345 -0
  34. package/src/templates/agents/claude-code/skills/quality-assurance/qa-plan-template.md +219 -0
  35. package/src/templates/agents/claude-code/skills/release-coordinator/feature-flag-guide.md +312 -0
  36. package/src/templates/agents/claude-code/skills/release-coordinator/release-plan-template.md +230 -0
  37. package/src/templates/agents/claude-code/skills/requirements-analyst/ears-format.md +259 -0
  38. package/src/templates/agents/claude-code/skills/requirements-analyst/validation-rules.md +359 -0
  39. package/src/templates/agents/claude-code/skills/security-auditor/audit-checklists.md +243 -0
  40. package/src/templates/agents/claude-code/skills/security-auditor/owasp-top-10.md +349 -0
  41. package/src/templates/agents/claude-code/skills/security-auditor/vulnerability-patterns.md +295 -0
  42. package/src/templates/agents/claude-code/skills/site-reliability-engineer/incident-response-template.md +286 -0
  43. package/src/templates/agents/claude-code/skills/site-reliability-engineer/observability-patterns.md +359 -0
  44. package/src/templates/agents/claude-code/skills/site-reliability-engineer/slo-sli-guide.md +302 -0
  45. package/src/templates/agents/claude-code/skills/software-developer/solid-principles.md +348 -0
  46. package/src/templates/agents/claude-code/skills/software-developer/test-first-workflow.md +370 -0
  47. package/src/templates/agents/claude-code/skills/steering/auto-update-rules.md +328 -0
  48. package/src/templates/agents/claude-code/skills/system-architect/adr-template.md +295 -0
  49. package/src/templates/agents/claude-code/skills/system-architect/c4-model-guide.md +328 -0
  50. package/src/templates/agents/claude-code/skills/technical-writer/doc-templates/documentation-templates.md +436 -0
  51. package/src/templates/agents/claude-code/skills/test-engineer/ears-test-mapping.md +444 -0
  52. package/src/templates/agents/claude-code/skills/test-engineer/test-types.md +425 -0
  53. package/src/templates/agents/claude-code/skills/traceability-auditor/coverage-matrix-template.md +131 -0
  54. package/src/templates/agents/claude-code/skills/traceability-auditor/gap-detection-rules.md +227 -0
  55. package/src/templates/agents/claude-code/skills/ui-ux-designer/accessibility-guidelines.md +318 -0
  56. package/src/templates/agents/claude-code/skills/ui-ux-designer/design-system-components.md +345 -0
  57. package/src/validators/constitutional-validator.js +494 -0
File without changes
File without changes
package/bin/musubi-gui.js CHANGED
File without changes
@@ -184,16 +184,6 @@ program
184
184
  }
185
185
  });
186
186
 
187
- // All validations (duplicate removed, keeping original)
188
-
189
- displayResults('Complexity Validation', results, options);
190
- process.exit(results.passed ? 0 : 1);
191
- } catch (error) {
192
- console.error(chalk.red('✗ Validation error:'), error.message);
193
- process.exit(1);
194
- }
195
- });
196
-
197
187
  // All validations
198
188
  program
199
189
  .command('all')
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "musubi-sdd",
3
- "version": "3.0.0",
3
+ "version": "3.0.1",
4
4
  "description": "Ultimate Specification Driven Development Tool with 27 Agents for 7 AI Coding Platforms + MCP Integration (Claude Code, GitHub Copilot, Cursor, Gemini CLI, Windsurf, Codex, Qwen Code)",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -0,0 +1,350 @@
1
+ # MLOps Guide
2
+
3
+ ## Overview
4
+
5
+ Best practices for Machine Learning Operations (MLOps) in production systems.
6
+
7
+ ---
8
+
9
+ ## MLOps Lifecycle
10
+
11
+ ```
12
+ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐
13
+ │ Data │───▶│ Train │───▶│ Deploy │───▶│ Monitor │
14
+ └──────────┘ └──────────┘ └──────────┘ └──────────┘
15
+ │ │ │ │
16
+ └───────────────┴───────────────┴───────────────┘
17
+ Continuous Loop
18
+ ```
19
+
20
+ ---
21
+
22
+ ## 1. Data Management
23
+
24
+ ### Data Versioning
25
+
26
+ ```bash
27
+ # Using DVC (Data Version Control)
28
+ dvc init
29
+ dvc add data/training_data.csv
30
+ git add data/training_data.csv.dvc .gitignore
31
+ git commit -m "Add training data v1"
32
+ ```
33
+
34
+ ### Data Pipeline
35
+
36
+ ```python
37
+ # data_pipeline.py
38
+ from prefect import flow, task
39
+
40
+ @task
41
+ def extract_data(source: str) -> pd.DataFrame:
42
+ return pd.read_csv(source)
43
+
44
+ @task
45
+ def transform_data(df: pd.DataFrame) -> pd.DataFrame:
46
+ # Feature engineering
47
+ df['feature_1'] = df['col_a'] * df['col_b']
48
+ return df
49
+
50
+ @task
51
+ def validate_data(df: pd.DataFrame) -> bool:
52
+ # Data quality checks
53
+ assert df['feature_1'].isnull().sum() == 0
54
+ return True
55
+
56
+ @flow
57
+ def data_pipeline(source: str):
58
+ df = extract_data(source)
59
+ df = transform_data(df)
60
+ validate_data(df)
61
+ return df
62
+ ```
63
+
64
+ ---
65
+
66
+ ## 2. Experiment Tracking
67
+
68
+ ### MLflow Setup
69
+
70
+ ```python
71
+ import mlflow
72
+ from mlflow.tracking import MlflowClient
73
+
74
+ # Set tracking URI
75
+ mlflow.set_tracking_uri("http://mlflow-server:5000")
76
+ mlflow.set_experiment("my-experiment")
77
+
78
+ # Log experiment
79
+ with mlflow.start_run():
80
+ # Log parameters
81
+ mlflow.log_param("learning_rate", 0.01)
82
+ mlflow.log_param("epochs", 100)
83
+
84
+ # Train model
85
+ model = train_model(X_train, y_train)
86
+
87
+ # Log metrics
88
+ mlflow.log_metric("accuracy", accuracy)
89
+ mlflow.log_metric("f1_score", f1)
90
+
91
+ # Log model
92
+ mlflow.sklearn.log_model(model, "model")
93
+
94
+ # Log artifacts
95
+ mlflow.log_artifact("feature_importance.png")
96
+ ```
97
+
98
+ ---
99
+
100
+ ## 3. Model Registry
101
+
102
+ ### Model Versioning
103
+
104
+ ```python
105
+ from mlflow.tracking import MlflowClient
106
+
107
+ client = MlflowClient()
108
+
109
+ # Register model
110
+ model_uri = f"runs:/{run_id}/model"
111
+ mv = client.create_model_version(
112
+ name="my-model",
113
+ source=model_uri,
114
+ run_id=run_id
115
+ )
116
+
117
+ # Transition to staging
118
+ client.transition_model_version_stage(
119
+ name="my-model",
120
+ version=mv.version,
121
+ stage="Staging"
122
+ )
123
+
124
+ # Promote to production
125
+ client.transition_model_version_stage(
126
+ name="my-model",
127
+ version=mv.version,
128
+ stage="Production"
129
+ )
130
+ ```
131
+
132
+ ### Model Metadata
133
+
134
+ ```yaml
135
+ # model_metadata.yaml
136
+ model:
137
+ name: fraud-detector
138
+ version: 2.1.0
139
+ framework: scikit-learn
140
+
141
+ training:
142
+ date: 2024-01-15
143
+ dataset_version: v1.2
144
+ metrics:
145
+ accuracy: 0.95
146
+ f1_score: 0.92
147
+
148
+ requirements:
149
+ - scikit-learn==1.3.0
150
+ - pandas==2.0.0
151
+
152
+ schema:
153
+ input:
154
+ - name: amount
155
+ type: float
156
+ - name: category
157
+ type: string
158
+ output:
159
+ - name: is_fraud
160
+ type: boolean
161
+ - name: confidence
162
+ type: float
163
+ ```
164
+
165
+ ---
166
+
167
+ ## 4. Model Deployment
168
+
169
+ ### Serving with FastAPI
170
+
171
+ ```python
172
+ from fastapi import FastAPI
173
+ import mlflow
174
+
175
+ app = FastAPI()
176
+
177
+ # Load model on startup
178
+ model = mlflow.sklearn.load_model("models:/my-model/Production")
179
+
180
+ @app.post("/predict")
181
+ async def predict(features: dict):
182
+ df = pd.DataFrame([features])
183
+ prediction = model.predict(df)
184
+ probability = model.predict_proba(df)
185
+
186
+ return {
187
+ "prediction": int(prediction[0]),
188
+ "confidence": float(probability[0].max())
189
+ }
190
+
191
+ @app.get("/health")
192
+ async def health():
193
+ return {"status": "healthy", "model_version": "2.1.0"}
194
+ ```
195
+
196
+ ### Deployment Strategies
197
+
198
+ | Strategy | Description | Use Case |
199
+ |----------|-------------|----------|
200
+ | Shadow | Run parallel to existing | Validate new model |
201
+ | Canary | Gradual traffic shift | Safe rollout |
202
+ | Blue-Green | Full switch | Quick rollback |
203
+ | A/B Test | Split traffic | Compare models |
204
+
205
+ ---
206
+
207
+ ## 5. Monitoring
208
+
209
+ ### Prediction Logging
210
+
211
+ ```python
212
+ import logging
213
+ from datetime import datetime
214
+
215
+ def log_prediction(request, response, latency_ms):
216
+ logging.info({
217
+ "timestamp": datetime.utcnow().isoformat(),
218
+ "request_id": request.id,
219
+ "features": request.features,
220
+ "prediction": response.prediction,
221
+ "confidence": response.confidence,
222
+ "latency_ms": latency_ms,
223
+ "model_version": "2.1.0"
224
+ })
225
+ ```
226
+
227
+ ### Data Drift Detection
228
+
229
+ ```python
230
+ from scipy import stats
231
+
232
+ def detect_drift(reference_data, current_data, threshold=0.05):
233
+ """Detect distribution drift using KS test."""
234
+ drifted_features = []
235
+
236
+ for column in reference_data.columns:
237
+ statistic, p_value = stats.ks_2samp(
238
+ reference_data[column],
239
+ current_data[column]
240
+ )
241
+
242
+ if p_value < threshold:
243
+ drifted_features.append({
244
+ "feature": column,
245
+ "p_value": p_value,
246
+ "statistic": statistic
247
+ })
248
+
249
+ return drifted_features
250
+ ```
251
+
252
+ ### Performance Metrics
253
+
254
+ ```python
255
+ # Prometheus metrics for ML
256
+ from prometheus_client import Counter, Histogram, Gauge
257
+
258
+ prediction_counter = Counter(
259
+ 'model_predictions_total',
260
+ 'Total predictions',
261
+ ['model_version', 'prediction_class']
262
+ )
263
+
264
+ prediction_latency = Histogram(
265
+ 'model_prediction_latency_seconds',
266
+ 'Prediction latency',
267
+ ['model_version']
268
+ )
269
+
270
+ model_accuracy = Gauge(
271
+ 'model_accuracy',
272
+ 'Current model accuracy',
273
+ ['model_version']
274
+ )
275
+ ```
276
+
277
+ ---
278
+
279
+ ## 6. CI/CD for ML
280
+
281
+ ### GitHub Actions Pipeline
282
+
283
+ ```yaml
284
+ # .github/workflows/ml-pipeline.yml
285
+ name: ML Pipeline
286
+
287
+ on:
288
+ push:
289
+ paths:
290
+ - 'models/**'
291
+ - 'data/**'
292
+
293
+ jobs:
294
+ train:
295
+ runs-on: ubuntu-latest
296
+ steps:
297
+ - uses: actions/checkout@v4
298
+
299
+ - name: Setup Python
300
+ uses: actions/setup-python@v4
301
+ with:
302
+ python-version: '3.10'
303
+
304
+ - name: Install dependencies
305
+ run: pip install -r requirements.txt
306
+
307
+ - name: Pull data
308
+ run: dvc pull
309
+
310
+ - name: Train model
311
+ run: python train.py
312
+
313
+ - name: Evaluate model
314
+ run: python evaluate.py
315
+
316
+ - name: Register model
317
+ if: github.ref == 'refs/heads/main'
318
+ run: python register_model.py
319
+ ```
320
+
321
+ ---
322
+
323
+ ## 7. Best Practices
324
+
325
+ ### Reproducibility Checklist
326
+
327
+ - [ ] Code versioned in Git
328
+ - [ ] Data versioned with DVC
329
+ - [ ] Dependencies pinned (requirements.txt)
330
+ - [ ] Random seeds set
331
+ - [ ] Experiments logged in MLflow
332
+ - [ ] Model artifacts stored
333
+
334
+ ### Model Validation Checklist
335
+
336
+ - [ ] Performance metrics acceptable
337
+ - [ ] No data leakage
338
+ - [ ] Fairness metrics checked
339
+ - [ ] Edge cases tested
340
+ - [ ] Latency requirements met
341
+ - [ ] Memory usage acceptable
342
+
343
+ ### Production Checklist
344
+
345
+ - [ ] Model card documented
346
+ - [ ] API versioned
347
+ - [ ] Health checks implemented
348
+ - [ ] Monitoring in place
349
+ - [ ] Rollback procedure defined
350
+ - [ ] A/B test framework ready
@@ -0,0 +1,246 @@
1
+ # Model Card Template
2
+
3
+ ## Overview
4
+
5
+ Template for documenting machine learning models following best practices.
6
+
7
+ ---
8
+
9
+ ## Model Card Document
10
+
11
+ ```markdown
12
+ # Model Card: [Model Name]
13
+
14
+ ## Model Details
15
+
16
+ ### Basic Information
17
+ | Field | Value |
18
+ |-------|-------|
19
+ | Model Name | [Name] |
20
+ | Version | [X.Y.Z] |
21
+ | Type | [Classification/Regression/etc.] |
22
+ | Framework | [TensorFlow/PyTorch/scikit-learn] |
23
+ | Date | YYYY-MM-DD |
24
+ | Authors | [Team/Names] |
25
+
26
+ ### Description
27
+ [Brief description of what the model does]
28
+
29
+ ### Intended Use
30
+ - **Primary Use Cases**: [What it's designed for]
31
+ - **Intended Users**: [Who should use it]
32
+ - **Out-of-Scope Uses**: [What it shouldn't be used for]
33
+
34
+ ---
35
+
36
+ ## Model Architecture
37
+
38
+ ### Overview
39
+ [Description of model architecture]
40
+
41
+ ### Inputs
42
+ | Name | Type | Shape | Description |
43
+ |------|------|-------|-------------|
44
+ | feature_1 | float | (1,) | Transaction amount |
45
+ | feature_2 | int | (1,) | Category code |
46
+
47
+ ### Outputs
48
+ | Name | Type | Shape | Description |
49
+ |------|------|-------|-------------|
50
+ | prediction | int | (1,) | Class label |
51
+ | probability | float | (n_classes,) | Class probabilities |
52
+
53
+ ### Hyperparameters
54
+ | Parameter | Value |
55
+ |-----------|-------|
56
+ | learning_rate | 0.001 |
57
+ | batch_size | 32 |
58
+ | epochs | 100 |
59
+
60
+ ---
61
+
62
+ ## Training Data
63
+
64
+ ### Dataset Description
65
+ | Field | Value |
66
+ |-------|-------|
67
+ | Name | [Dataset name] |
68
+ | Version | [Version] |
69
+ | Size | [N samples] |
70
+ | Date Range | [Start] to [End] |
71
+
72
+ ### Data Distribution
73
+ | Feature | Distribution |
74
+ |---------|--------------|
75
+ | Class 0 | 85% |
76
+ | Class 1 | 15% |
77
+
78
+ ### Preprocessing
79
+ - [Step 1]: [Description]
80
+ - [Step 2]: [Description]
81
+
82
+ ### Data Splits
83
+ | Split | Size | Purpose |
84
+ |-------|------|---------|
85
+ | Train | 70% | Model training |
86
+ | Validation | 15% | Hyperparameter tuning |
87
+ | Test | 15% | Final evaluation |
88
+
89
+ ---
90
+
91
+ ## Evaluation
92
+
93
+ ### Metrics
94
+ | Metric | Value | Threshold |
95
+ |--------|-------|-----------|
96
+ | Accuracy | 0.95 | > 0.90 |
97
+ | Precision | 0.92 | > 0.85 |
98
+ | Recall | 0.88 | > 0.80 |
99
+ | F1 Score | 0.90 | > 0.85 |
100
+ | AUC-ROC | 0.97 | > 0.90 |
101
+
102
+ ### Confusion Matrix
103
+ ```
104
+ Predicted
105
+ 0 1
106
+ Actual 0 850 50
107
+ 1 30 120
108
+ ```
109
+
110
+ ### Performance by Subgroup
111
+ | Subgroup | Accuracy | Size |
112
+ |----------|----------|------|
113
+ | Group A | 0.96 | 400 |
114
+ | Group B | 0.94 | 350 |
115
+ | Group C | 0.93 | 250 |
116
+
117
+ ---
118
+
119
+ ## Fairness & Bias
120
+
121
+ ### Evaluation
122
+ | Metric | Group A | Group B | Threshold |
123
+ |--------|---------|---------|-----------|
124
+ | TPR | 0.89 | 0.87 | Δ < 0.05 ✅ |
125
+ | FPR | 0.08 | 0.09 | Δ < 0.05 ✅ |
126
+ | PPV | 0.91 | 0.89 | Δ < 0.05 ✅ |
127
+
128
+ ### Mitigation Steps
129
+ - [Step taken to address bias]
130
+
131
+ ### Known Limitations
132
+ - [Limitation 1]
133
+ - [Limitation 2]
134
+
135
+ ---
136
+
137
+ ## Ethical Considerations
138
+
139
+ ### Potential Risks
140
+ - [Risk 1]: [Mitigation]
141
+ - [Risk 2]: [Mitigation]
142
+
143
+ ### Use Cases to Avoid
144
+ - [Should not be used for X]
145
+
146
+ ---
147
+
148
+ ## Deployment
149
+
150
+ ### Requirements
151
+ ```
152
+ python>=3.9
153
+ tensorflow==2.12.0
154
+ numpy==1.24.0
155
+ ```
156
+
157
+ ### Resource Requirements
158
+ | Resource | Minimum | Recommended |
159
+ |----------|---------|-------------|
160
+ | CPU | 2 cores | 4 cores |
161
+ | Memory | 2 GB | 4 GB |
162
+ | GPU | - | NVIDIA T4 |
163
+
164
+ ### Latency
165
+ | Percentile | Latency |
166
+ |------------|---------|
167
+ | p50 | 15ms |
168
+ | p95 | 45ms |
169
+ | p99 | 80ms |
170
+
171
+ ### Endpoints
172
+ | Endpoint | Method | Description |
173
+ |----------|--------|-------------|
174
+ | /predict | POST | Get prediction |
175
+ | /health | GET | Health check |
176
+
177
+ ---
178
+
179
+ ## Monitoring
180
+
181
+ ### Metrics to Track
182
+ - Prediction distribution
183
+ - Latency percentiles
184
+ - Error rate
185
+ - Data drift indicators
186
+
187
+ ### Alerting Thresholds
188
+ | Metric | Warning | Critical |
189
+ |--------|---------|----------|
190
+ | Latency p99 | > 100ms | > 200ms |
191
+ | Error Rate | > 1% | > 5% |
192
+ | Drift Score | > 0.1 | > 0.2 |
193
+
194
+ ### Retraining Triggers
195
+ - [Trigger 1]: [Condition]
196
+ - [Trigger 2]: [Condition]
197
+
198
+ ---
199
+
200
+ ## Version History
201
+
202
+ | Version | Date | Changes |
203
+ |---------|------|---------|
204
+ | 1.0.0 | YYYY-MM-DD | Initial release |
205
+ | 1.1.0 | YYYY-MM-DD | Added feature X |
206
+ | 2.0.0 | YYYY-MM-DD | Major architecture change |
207
+
208
+ ---
209
+
210
+ ## References
211
+
212
+ - [Link to training code]
213
+ - [Link to data documentation]
214
+ - [Link to related papers]
215
+ - [Link to API documentation]
216
+
217
+ ---
218
+
219
+ ## Contact
220
+
221
+ For questions or issues:
222
+ - Team: [Team name]
223
+ - Email: [Contact email]
224
+ - Slack: [Channel]
225
+ ```
226
+
227
+ ---
228
+
229
+ ## Quick Checklist
230
+
231
+ ### Before Release
232
+ - [ ] Model architecture documented
233
+ - [ ] Training data described
234
+ - [ ] Evaluation metrics included
235
+ - [ ] Fairness analysis completed
236
+ - [ ] Ethical risks assessed
237
+ - [ ] Deployment requirements listed
238
+ - [ ] Monitoring plan defined
239
+ - [ ] Version history updated
240
+
241
+ ### Review Questions
242
+ 1. Is the intended use clearly defined?
243
+ 2. Are limitations and risks documented?
244
+ 3. Can another team reproduce training?
245
+ 4. Are bias metrics acceptable?
246
+ 5. Is there a monitoring plan?