ma-agents 3.3.0 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/.opencode/skills/.ma-agents.json +99 -99
  2. package/.roo/skills/.ma-agents.json +99 -99
  3. package/README.md +19 -1
  4. package/bin/cli.js +55 -0
  5. package/lib/agents.js +23 -0
  6. package/lib/bmad-cache/cache-manifest.json +1 -1
  7. package/lib/bmad-customizations/bmm-demerzel.customize.yaml +36 -0
  8. package/lib/bmad-customizations/demerzel.md +32 -0
  9. package/lib/bmad-extension/module-help.csv +13 -0
  10. package/lib/bmad-extension/skills/bmad-ma-agent-ml/.gitkeep +0 -0
  11. package/lib/bmad-extension/skills/bmad-ma-agent-ml/SKILL.md +59 -0
  12. package/lib/bmad-extension/skills/bmad-ma-agent-ml/bmad-skill-manifest.yaml +11 -0
  13. package/lib/bmad-extension/skills/generate-backlog/.gitkeep +0 -0
  14. package/lib/bmad-extension/skills/ml-advise/.gitkeep +0 -0
  15. package/lib/bmad-extension/skills/ml-advise/SKILL.md +76 -0
  16. package/lib/bmad-extension/skills/ml-advise/bmad-skill-manifest.yaml +3 -0
  17. package/lib/bmad-extension/skills/ml-advise/skill.json +7 -0
  18. package/lib/bmad-extension/skills/ml-analysis/.gitkeep +0 -0
  19. package/lib/bmad-extension/skills/ml-analysis/SKILL.md +60 -0
  20. package/lib/bmad-extension/skills/ml-analysis/bmad-skill-manifest.yaml +3 -0
  21. package/lib/bmad-extension/skills/ml-analysis/skill.json +7 -0
  22. package/lib/bmad-extension/skills/ml-architecture/.gitkeep +0 -0
  23. package/lib/bmad-extension/skills/ml-architecture/SKILL.md +55 -0
  24. package/lib/bmad-extension/skills/ml-architecture/bmad-skill-manifest.yaml +3 -0
  25. package/lib/bmad-extension/skills/ml-architecture/skill.json +7 -0
  26. package/lib/bmad-extension/skills/ml-detailed-design/.gitkeep +0 -0
  27. package/lib/bmad-extension/skills/ml-detailed-design/SKILL.md +67 -0
  28. package/lib/bmad-extension/skills/ml-detailed-design/bmad-skill-manifest.yaml +3 -0
  29. package/lib/bmad-extension/skills/ml-detailed-design/skill.json +7 -0
  30. package/lib/bmad-extension/skills/ml-eda/.gitkeep +0 -0
  31. package/lib/bmad-extension/skills/ml-eda/SKILL.md +56 -0
  32. package/lib/bmad-extension/skills/ml-eda/bmad-skill-manifest.yaml +3 -0
  33. package/lib/bmad-extension/skills/ml-eda/scripts/baseline_classifier.py +522 -0
  34. package/lib/bmad-extension/skills/ml-eda/scripts/class_weights_calculator.py +295 -0
  35. package/lib/bmad-extension/skills/ml-eda/scripts/clustering_explorer.py +383 -0
  36. package/lib/bmad-extension/skills/ml-eda/scripts/eda_analyzer.py +654 -0
  37. package/lib/bmad-extension/skills/ml-eda/skill.json +7 -0
  38. package/lib/bmad-extension/skills/ml-experiment/.gitkeep +0 -0
  39. package/lib/bmad-extension/skills/ml-experiment/SKILL.md +74 -0
  40. package/lib/bmad-extension/skills/ml-experiment/assets/advanced_trainer_configs.py +430 -0
  41. package/lib/bmad-extension/skills/ml-experiment/assets/quick_trainer_setup.py +233 -0
  42. package/lib/bmad-extension/skills/ml-experiment/assets/template_datamodule.py +219 -0
  43. package/lib/bmad-extension/skills/ml-experiment/assets/template_gnn_module.py +341 -0
  44. package/lib/bmad-extension/skills/ml-experiment/assets/template_lightning_module.py +158 -0
  45. package/lib/bmad-extension/skills/ml-experiment/bmad-skill-manifest.yaml +3 -0
  46. package/lib/bmad-extension/skills/ml-experiment/skill.json +7 -0
  47. package/lib/bmad-extension/skills/ml-hparam/.gitkeep +0 -0
  48. package/lib/bmad-extension/skills/ml-hparam/SKILL.md +81 -0
  49. package/lib/bmad-extension/skills/ml-hparam/bmad-skill-manifest.yaml +3 -0
  50. package/lib/bmad-extension/skills/ml-hparam/skill.json +7 -0
  51. package/lib/bmad-extension/skills/ml-ideation/.gitkeep +0 -0
  52. package/lib/bmad-extension/skills/ml-ideation/SKILL.md +50 -0
  53. package/lib/bmad-extension/skills/ml-ideation/bmad-skill-manifest.yaml +3 -0
  54. package/lib/bmad-extension/skills/ml-ideation/scripts/validate_ml_prd.py +287 -0
  55. package/lib/bmad-extension/skills/ml-ideation/skill.json +7 -0
  56. package/lib/bmad-extension/skills/ml-infra/.gitkeep +0 -0
  57. package/lib/bmad-extension/skills/ml-infra/SKILL.md +58 -0
  58. package/lib/bmad-extension/skills/ml-infra/bmad-skill-manifest.yaml +3 -0
  59. package/lib/bmad-extension/skills/ml-infra/skill.json +7 -0
  60. package/lib/bmad-extension/skills/ml-retrospective/.gitkeep +0 -0
  61. package/lib/bmad-extension/skills/ml-retrospective/SKILL.md +63 -0
  62. package/lib/bmad-extension/skills/ml-retrospective/bmad-skill-manifest.yaml +3 -0
  63. package/lib/bmad-extension/skills/ml-retrospective/skill.json +7 -0
  64. package/lib/bmad-extension/skills/ml-revision/.gitkeep +0 -0
  65. package/lib/bmad-extension/skills/ml-revision/SKILL.md +82 -0
  66. package/lib/bmad-extension/skills/ml-revision/bmad-skill-manifest.yaml +3 -0
  67. package/lib/bmad-extension/skills/ml-revision/skill.json +7 -0
  68. package/lib/bmad-extension/skills/ml-techspec/.gitkeep +0 -0
  69. package/lib/bmad-extension/skills/ml-techspec/SKILL.md +80 -0
  70. package/lib/bmad-extension/skills/ml-techspec/bmad-skill-manifest.yaml +3 -0
  71. package/lib/bmad-extension/skills/ml-techspec/skill.json +7 -0
  72. package/lib/bmad.js +85 -8
  73. package/lib/skill-authoring.js +1 -1
  74. package/package.json +2 -2
  75. package/test/agent-injection-strategy.test.js +4 -4
  76. package/test/bmad-version-bump.test.js +34 -34
  77. package/test/build-bmad-args.test.js +13 -6
  78. package/test/convert-agents-to-skills.test.js +11 -1
  79. package/test/extension-module-restructure.test.js +31 -7
  80. package/test/migration-validation.test.js +14 -11
@@ -0,0 +1,522 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ baseline_classifier.py — BMAD DL Lifecycle
4
+ (Inspired by K-Dense claude-scientific-skills/scikit-learn/classification_pipeline.py)
5
+
6
+ Establishes sklearn baseline models before deep learning training.
7
+ Runs during TSK-001 (EDA) to set performance floors for REQ-PERF-* requirements.
8
+
9
+ Supports CSV tabular datasets. Outputs a markdown report with:
10
+ - Multi-model cross-validation comparison (LR, RF, GradientBoosting)
11
+ - Best model hyperparameter tuning
12
+ - Feature importance ranking
13
+ - Confusion matrix and classification report
14
+ - Pass/fail verdict against PRD REQ-PERF targets
15
+
16
+ Reads PRD performance requirements if provided, same as parse_training_logs.py.
17
+
18
+ Usage:
19
+ python3 scripts/baseline_classifier.py <data_csv> [prd_path] [--label-col LABEL] [--output report.md]
20
+ python3 scripts/baseline_classifier.py data/features.csv docs/prd/01_PRD.md --label-col defective
21
+
22
+ Exit codes:
23
+ 0 — success
24
+ 1 — no suitable models found or all requirements failed
25
+ 2 — file/format error
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ import argparse
31
+ import csv
32
+ import json
33
+ import re
34
+ import sys
35
+ import warnings
36
+ from dataclasses import dataclass, field
37
+ from pathlib import Path
38
+ from typing import Any
39
+
40
+ warnings.filterwarnings("ignore")
41
+
42
+ # ── Optional deps ──────────────────────────────────────────────────────────────
43
+
44
+ try:
45
+ import numpy as np
46
+ HAS_NUMPY = True
47
+ except ImportError:
48
+ HAS_NUMPY = False
49
+
50
+ try:
51
+ from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
52
+ from sklearn.preprocessing import StandardScaler, LabelEncoder
53
+ from sklearn.impute import SimpleImputer
54
+ from sklearn.pipeline import Pipeline
55
+ from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
56
+ from sklearn.linear_model import LogisticRegression
57
+ from sklearn.metrics import (
58
+ accuracy_score, precision_score, recall_score, f1_score,
59
+ confusion_matrix, classification_report, roc_auc_score,
60
+ )
61
+ HAS_SKLEARN = True
62
+ except ImportError:
63
+ HAS_SKLEARN = False
64
+
65
+
66
+ # ── Data structures ────────────────────────────────────────────────────────────
67
+
68
+ @dataclass
69
+ class ModelResult:
70
+ name: str
71
+ cv_mean: float
72
+ cv_std: float
73
+ test_accuracy: float
74
+ test_f1: float
75
+ test_precision: float
76
+ test_recall: float
77
+ roc_auc: float | None = None
78
+ best_params: dict = field(default_factory=dict)
79
+ feature_importances: list[tuple[str, float]] = field(default_factory=list)
80
+ confusion: list[list[int]] = field(default_factory=list)
81
+ classification_report_str: str = ""
82
+
83
+
84
+ @dataclass
85
+ class PerfRequirement:
86
+ req_id: str
87
+ description: str
88
+ acceptance_criteria: str
89
+ metric_keyword: str | None
90
+
91
+
92
+ # ── PRD parsing (same logic as parse_training_logs.py) ────────────────────────
93
+
94
+ OPERATOR_PATTERN = re.compile(r"(>=|<=|>|<|=)\s*([\d.]+)")
95
+ PERF_REQ_PATTERN = re.compile(r"REQ-PERF-\d+")
96
+
97
+ METRIC_MAP = [
98
+ (["f1", "f1-score", "f1 score"], "f1"),
99
+ (["accuracy", "acc"], "accuracy"),
100
+ (["precision"], "precision"),
101
+ (["recall", "sensitivity"], "recall"),
102
+ (["auc", "roc"], "roc_auc"),
103
+ ]
104
+
105
+
106
+ def _guess_metric(text: str) -> str | None:
107
+ text_lower = text.lower()
108
+ for keywords, mapped in METRIC_MAP:
109
+ if any(kw in text_lower for kw in keywords):
110
+ return mapped
111
+ return None
112
+
113
+
114
+ def _evaluate(criteria: str, achieved: float) -> str:
115
+ match = OPERATOR_PATTERN.search(criteria)
116
+ if not match:
117
+ return "UNKNOWN"
118
+ op, threshold = match.group(1), float(match.group(2))
119
+ checks = {
120
+ ">=": achieved >= threshold, "<=": achieved <= threshold,
121
+ ">": achieved > threshold, "<": achieved < threshold,
122
+ "=": abs(achieved - threshold) < 1e-6,
123
+ }
124
+ return "PASS" if checks.get(op, False) else "FAIL"
125
+
126
+
127
+ def parse_perf_requirements(prd_path: Path) -> list[PerfRequirement]:
128
+ if not prd_path or not prd_path.exists():
129
+ return []
130
+ text = prd_path.read_text(encoding="utf-8")
131
+ reqs: list[PerfRequirement] = []
132
+ in_table = False
133
+ for line in text.splitlines():
134
+ if re.search(r"\|\s*Requirement\s*ID", line, re.IGNORECASE):
135
+ in_table = True
136
+ continue
137
+ if not in_table:
138
+ continue
139
+ if re.match(r"^\s*\|[\s\-:|]+\|\s*$", line):
140
+ continue
141
+ if not line.strip().startswith("|"):
142
+ in_table = False
143
+ continue
144
+ cells = [c.strip().strip("`*[]") for c in line.split("|")]
145
+ if len(cells) < 5:
146
+ continue
147
+ req_id = cells[1]
148
+ if not PERF_REQ_PATTERN.match(req_id):
149
+ continue
150
+ reqs.append(PerfRequirement(
151
+ req_id=req_id,
152
+ description=cells[3],
153
+ acceptance_criteria=cells[4],
154
+ metric_keyword=_guess_metric(cells[3] + " " + cells[4]),
155
+ ))
156
+ return reqs
157
+
158
+
159
+ # ── CSV loading ────────────────────────────────────────────────────────────────
160
+
161
+ def load_csv(path: Path, label_col: str | None) -> tuple[list[str], list[list], list]:
162
+ """Return (feature_names, X_rows, y_list)."""
163
+ with path.open(newline="", encoding="utf-8", errors="replace") as f:
164
+ reader = csv.DictReader(f)
165
+ rows = list(reader)
166
+ if not rows:
167
+ raise ValueError("CSV is empty")
168
+
169
+ columns = list(rows[0].keys())
170
+
171
+ # Find label column
172
+ if label_col and label_col in columns:
173
+ target = label_col
174
+ else:
175
+ target = next(
176
+ (c for c in columns if c.lower() in ("label", "class", "target", "y", "category")),
177
+ None,
178
+ )
179
+ if target is None:
180
+ raise ValueError(
181
+ "No label column found. Specify with --label-col or use standard names: "
182
+ "label, class, target, y, category"
183
+ )
184
+
185
+ feature_names = [c for c in columns if c != target]
186
+ X_raw = [[row.get(c, "") for c in feature_names] for row in rows]
187
+ y_raw = [row[target].strip() for row in rows]
188
+
189
+ return feature_names, X_raw, y_raw
190
+
191
+
192
+ def _to_numeric_matrix(X_raw: list[list], feature_names: list[str]):
193
+ """Return (X_numeric, numeric_feature_names) keeping only float-parseable columns."""
194
+ if not HAS_NUMPY:
195
+ raise RuntimeError("numpy required for baseline_classifier")
196
+ numeric_cols: list[int] = []
197
+ for col_idx in range(len(feature_names)):
198
+ vals = []
199
+ for row in X_raw:
200
+ v = row[col_idx].strip()
201
+ try:
202
+ vals.append(float(v))
203
+ except ValueError:
204
+ break
205
+ else:
206
+ numeric_cols.append(col_idx)
207
+
208
+ if not numeric_cols:
209
+ raise ValueError("No numeric feature columns found in CSV")
210
+
211
+ X = np.array([
212
+ [float(row[ci]) if row[ci].strip() else float("nan") for ci in numeric_cols]
213
+ for row in X_raw
214
+ ], dtype=float)
215
+ names = [feature_names[i] for i in numeric_cols]
216
+ return X, names
217
+
218
+
219
+ # ── Model training ─────────────────────────────────────────────────────────────
220
+
221
+ def _get_models() -> dict:
222
+ return {
223
+ "Logistic Regression": (
224
+ LogisticRegression(max_iter=1000, random_state=42),
225
+ {"classifier__C": [0.1, 1.0, 10.0]},
226
+ ),
227
+ "Random Forest": (
228
+ RandomForestClassifier(n_estimators=100, random_state=42),
229
+ {"classifier__n_estimators": [100, 200], "classifier__max_depth": [10, None]},
230
+ ),
231
+ "Gradient Boosting": (
232
+ GradientBoostingClassifier(n_estimators=100, random_state=42),
233
+ {"classifier__n_estimators": [100, 200], "classifier__learning_rate": [0.05, 0.1]},
234
+ ),
235
+ }
236
+
237
+
238
+ def build_pipeline(estimator) -> "Pipeline":
239
+ return Pipeline([
240
+ ("imputer", SimpleImputer(strategy="median")),
241
+ ("scaler", StandardScaler()),
242
+ ("classifier", estimator),
243
+ ])
244
+
245
+
246
+ def run_baseline(
247
+ X, y_encoded, feature_names: list[str], class_names: list[str], test_size: float = 0.2
248
+ ) -> list[ModelResult]:
249
+ X_train, X_test, y_train, y_test = train_test_split(
250
+ X, y_encoded, test_size=test_size, stratify=y_encoded, random_state=42
251
+ )
252
+
253
+ results: list[ModelResult] = []
254
+ cv_scores: dict[str, float] = {}
255
+ models = _get_models()
256
+
257
+ for name, (estimator, param_grid) in models.items():
258
+ pipe = build_pipeline(estimator)
259
+ scores = cross_val_score(pipe, X_train, y_train, cv=5, scoring="f1_weighted")
260
+ cv_scores[name] = scores.mean()
261
+
262
+ best_name = max(cv_scores, key=cv_scores.get)
263
+
264
+ for name, (estimator, param_grid) in models.items():
265
+ pipe = build_pipeline(estimator)
266
+ scores = cross_val_score(pipe, X_train, y_train, cv=5, scoring="f1_weighted")
267
+
268
+ # Tune best model; fit others without tuning
269
+ if name == best_name:
270
+ gs = GridSearchCV(pipe, param_grid, cv=5, scoring="f1_weighted", n_jobs=-1)
271
+ gs.fit(X_train, y_train)
272
+ fitted = gs.best_estimator_
273
+ best_params = gs.best_params_
274
+ else:
275
+ pipe.fit(X_train, y_train)
276
+ fitted = pipe
277
+ best_params = {}
278
+
279
+ y_pred = fitted.predict(X_test)
280
+ is_binary = len(class_names) == 2
281
+
282
+ acc = accuracy_score(y_test, y_pred)
283
+ f1 = f1_score(y_test, y_pred, average="weighted", zero_division=0)
284
+ prec = precision_score(y_test, y_pred, average="weighted", zero_division=0)
285
+ rec = recall_score(y_test, y_pred, average="weighted", zero_division=0)
286
+
287
+ roc = None
288
+ if is_binary and hasattr(fitted, "predict_proba"):
289
+ try:
290
+ y_prob = fitted.predict_proba(X_test)[:, 1]
291
+ roc = roc_auc_score(y_test, y_prob)
292
+ except Exception:
293
+ pass
294
+
295
+ # Feature importance
296
+ fi: list[tuple[str, float]] = []
297
+ clf = fitted.named_steps["classifier"]
298
+ if hasattr(clf, "feature_importances_"):
299
+ fi = sorted(
300
+ zip(feature_names, clf.feature_importances_),
301
+ key=lambda x: -x[1],
302
+ )[:10]
303
+ elif hasattr(clf, "coef_"):
304
+ coefs = clf.coef_[0] if clf.coef_.ndim > 1 else clf.coef_
305
+ fi = sorted(
306
+ zip(feature_names, abs(coefs)),
307
+ key=lambda x: -x[1],
308
+ )[:10]
309
+
310
+ cm = confusion_matrix(y_test, y_pred).tolist()
311
+ cr = classification_report(y_test, y_pred, target_names=class_names, zero_division=0)
312
+
313
+ results.append(ModelResult(
314
+ name=name,
315
+ cv_mean=scores.mean(),
316
+ cv_std=scores.std(),
317
+ test_accuracy=acc,
318
+ test_f1=f1,
319
+ test_precision=prec,
320
+ test_recall=rec,
321
+ roc_auc=roc,
322
+ best_params=best_params,
323
+ feature_importances=fi,
324
+ confusion=cm,
325
+ classification_report_str=cr,
326
+ ))
327
+
328
+ return sorted(results, key=lambda r: -r.test_f1)
329
+
330
+
331
+ # ── Report ─────────────────────────────────────────────────────────────────────
332
+
333
+ def generate_report(
334
+ results: list[ModelResult],
335
+ feature_names: list[str],
336
+ class_names: list[str],
337
+ data_path: Path,
338
+ reqs: list[PerfRequirement],
339
+ n_samples: int,
340
+ ) -> str:
341
+ lines: list[str] = []
342
+
343
+ lines += [
344
+ "# Baseline Classifier Report",
345
+ f"*Dataset: `{data_path.name}` | {n_samples} samples | {len(class_names)} classes | {len(feature_names)} features*",
346
+ "",
347
+ "---",
348
+ "",
349
+ "## A. Model Comparison (5-Fold CV on F1-weighted)",
350
+ "",
351
+ "| Model | CV F1 (mean±std) | Test Acc | Test F1 | Test Prec | Test Recall | ROC AUC |",
352
+ "| :--- | :--- | ---: | ---: | ---: | ---: | ---: |",
353
+ ]
354
+ best = results[0]
355
+ for r in results:
356
+ marker = " 🏆" if r is best else ""
357
+ roc_str = f"{r.roc_auc:.4f}" if r.roc_auc is not None else "—"
358
+ lines.append(
359
+ f"| {r.name}{marker} | {r.cv_mean:.4f} ± {r.cv_std:.4f} | "
360
+ f"{r.test_accuracy:.4f} | {r.test_f1:.4f} | {r.test_precision:.4f} | "
361
+ f"{r.test_recall:.4f} | {roc_str} |"
362
+ )
363
+
364
+ lines += [
365
+ "",
366
+ f"**Best model: {best.name}**",
367
+ "",
368
+ ]
369
+ if best.best_params:
370
+ lines.append(f"Tuned hyperparameters: `{best.best_params}`")
371
+ lines.append("")
372
+
373
+ # Feature importance
374
+ if best.feature_importances:
375
+ lines += [
376
+ "---",
377
+ "",
378
+ "## B. Top Feature Importances (Best Model)",
379
+ "",
380
+ "| Rank | Feature | Importance |",
381
+ "| ---: | :--- | ---: |",
382
+ ]
383
+ for rank, (feat, imp) in enumerate(best.feature_importances, 1):
384
+ bar = "█" * max(1, int(imp * 40))
385
+ lines.append(f"| {rank} | {feat} | {imp:.4f} {bar} |")
386
+ lines.append("")
387
+
388
+ # Confusion matrix
389
+ lines += [
390
+ "---",
391
+ "",
392
+ "## C. Confusion Matrix (Best Model)",
393
+ "",
394
+ f"Classes: {', '.join(class_names)}",
395
+ "",
396
+ "```",
397
+ "Predicted →",
398
+ ]
399
+ header_row = "Actual ↓ " + " ".join(f"{c[:8]:>8}" for c in class_names)
400
+ lines.append(header_row)
401
+ for i, row in enumerate(best.confusion):
402
+ row_str = f"{class_names[i][:8]:>8} " + " ".join(f"{v:>8}" for v in row)
403
+ lines.append(row_str)
404
+ lines += ["```", ""]
405
+
406
+ # Classification report
407
+ lines += [
408
+ "## D. Classification Report (Best Model)",
409
+ "",
410
+ "```",
411
+ best.classification_report_str,
412
+ "```",
413
+ "",
414
+ ]
415
+
416
+ # PRD requirement comparison
417
+ if reqs:
418
+ lines += [
419
+ "---",
420
+ "",
421
+ "## E. PRD Requirement Status (Baseline)",
422
+ "",
423
+ "| Req ID | Description | Target | Achieved | Status |",
424
+ "| :--- | :--- | :--- | :--- | :--- |",
425
+ ]
426
+ metric_map = {
427
+ "f1": best.test_f1,
428
+ "accuracy": best.test_accuracy,
429
+ "precision": best.test_precision,
430
+ "recall": best.test_recall,
431
+ "roc_auc": best.roc_auc,
432
+ }
433
+ for req in reqs:
434
+ val = metric_map.get(req.metric_keyword or "")
435
+ if val is not None:
436
+ status = _evaluate(req.acceptance_criteria, val)
437
+ val_str = f"{val:.4f}"
438
+ else:
439
+ status = "UNKNOWN"
440
+ val_str = "N/A"
441
+ icon = {"PASS": "✓", "FAIL": "✗", "UNKNOWN": "?"}.get(status, "?")
442
+ lines.append(
443
+ f"| {req.req_id} | {req.description[:35]} | `{req.acceptance_criteria}` | {val_str} | {icon} {status} |"
444
+ )
445
+
446
+ fails = sum(1 for r in reqs if metric_map.get(r.metric_keyword or "") is not None
447
+ and _evaluate(r.acceptance_criteria, metric_map[r.metric_keyword]) == "FAIL")
448
+ lines += [
449
+ "",
450
+ f"> **Note:** This is a *baseline* result from classical ML. Deep learning is expected to improve on these scores.",
451
+ f"> {'⚠ ' + str(fails) + ' requirement(s) not met even by baseline — review PRD targets.' if fails else '✓ Baseline meets all tracked requirements.'}",
452
+ ]
453
+
454
+ lines += [
455
+ "",
456
+ "---",
457
+ "",
458
+ "*Generated by `baseline_classifier.py` — BMAD DL Lifecycle (TSK-001)*",
459
+ ]
460
+ return "\n".join(lines)
461
+
462
+
463
+ # ── Main ───────────────────────────────────────────────────────────────────────
464
+
465
+ def main() -> int:
466
+ parser = argparse.ArgumentParser(description="Sklearn baseline classifier for BMAD DL")
467
+ parser.add_argument("data_csv", type=Path)
468
+ parser.add_argument("prd_path", type=Path, nargs="?", default=None)
469
+ parser.add_argument("--label-col", type=str, default=None)
470
+ parser.add_argument("--output", type=Path, default=None)
471
+ parser.add_argument("--test-size", type=float, default=0.2)
472
+ args = parser.parse_args()
473
+
474
+ if not HAS_SKLEARN:
475
+ print("Error: scikit-learn not installed. Run: pip install scikit-learn", file=sys.stderr)
476
+ return 2
477
+ if not HAS_NUMPY:
478
+ print("Error: numpy not installed. Run: pip install numpy", file=sys.stderr)
479
+ return 2
480
+ if not args.data_csv.exists():
481
+ print(f"Error: File not found: {args.data_csv}", file=sys.stderr)
482
+ return 2
483
+
484
+ try:
485
+ feature_names, X_raw, y_raw = load_csv(args.data_csv, args.label_col)
486
+ except Exception as e:
487
+ print(f"Error loading CSV: {e}", file=sys.stderr)
488
+ return 2
489
+
490
+ try:
491
+ X, numeric_feature_names = _to_numeric_matrix(X_raw, feature_names)
492
+ except Exception as e:
493
+ print(f"Error converting features: {e}", file=sys.stderr)
494
+ return 2
495
+
496
+ le = LabelEncoder()
497
+ y = le.fit_transform(y_raw)
498
+ class_names = list(le.classes_)
499
+
500
+ print(f"Dataset: {len(y)} samples, {len(numeric_feature_names)} features, {len(class_names)} classes")
501
+ print(f"Classes: {class_names}")
502
+ print("Running cross-validation and tuning best model...")
503
+
504
+ try:
505
+ results = run_baseline(X, y, numeric_feature_names, class_names, args.test_size)
506
+ except Exception as e:
507
+ print(f"Error during training: {e}", file=sys.stderr)
508
+ return 2
509
+
510
+ reqs = parse_perf_requirements(args.prd_path) if args.prd_path else []
511
+ report = generate_report(results, numeric_feature_names, class_names,
512
+ args.data_csv, reqs, len(y))
513
+
514
+ output = args.output or args.data_csv.parent / f"{args.data_csv.stem}_baseline_report.md"
515
+ output.write_text(report, encoding="utf-8")
516
+ print(f"\n✓ Report written to: {output}")
517
+ print(f" Best model: {results[0].name} | F1: {results[0].test_f1:.4f}")
518
+ return 0
519
+
520
+
521
+ if __name__ == "__main__":
522
+ sys.exit(main())