claude-turing 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/.claude-plugin/plugin.json +2 -2
  2. package/README.md +48 -7
  3. package/commands/brief.md +13 -1
  4. package/commands/card.md +36 -0
  5. package/commands/init.md +13 -0
  6. package/commands/train.md +16 -7
  7. package/commands/turing.md +4 -2
  8. package/package.json +1 -1
  9. package/src/install.js +1 -1
  10. package/src/verify.js +1 -0
  11. package/templates/model_contract.md +49 -0
  12. package/templates/model_registry.yaml +69 -0
  13. package/templates/program.md +2 -0
  14. package/templates/scripts/__pycache__/cost_frontier.cpython-314.pyc +0 -0
  15. package/templates/scripts/__pycache__/generate_brief.cpython-314.pyc +0 -0
  16. package/templates/scripts/__pycache__/generate_model_card.cpython-314.pyc +0 -0
  17. package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
  18. package/templates/scripts/cleanup.py +599 -0
  19. package/templates/scripts/cost_frontier.py +292 -0
  20. package/templates/scripts/diff_configs.py +534 -0
  21. package/templates/scripts/export_results.py +457 -0
  22. package/templates/scripts/generate_brief.py +54 -0
  23. package/templates/scripts/generate_model_card.py +342 -0
  24. package/templates/scripts/leaderboard.py +508 -0
  25. package/templates/scripts/plot_trajectory.py +611 -0
  26. package/templates/scripts/scaffold.py +9 -0
  27. package/templates/scripts/show_metrics.py +23 -2
  28. package/templates/tests/__pycache__/__init__.cpython-314.pyc +0 -0
  29. package/templates/tests/__pycache__/conftest.cpython-314-pytest-9.0.2.pyc +0 -0
  30. package/templates/tests/__pycache__/test_cost_frontier.cpython-314-pytest-9.0.2.pyc +0 -0
  31. package/templates/tests/test_cost_frontier.py +222 -0
@@ -0,0 +1,611 @@
1
+ #!/usr/bin/env python3
2
+ """Metric trajectory chart generator for the autoresearch pipeline.
3
+
4
+ Reads experiments/log.jsonl and plots the primary metric over experiment
5
+ sequence. Produces publication-ready SVG or PNG charts suitable for papers
6
+ and slides, or opens an interactive window for exploratory analysis.
7
+
8
+ Chart anatomy:
9
+ - Green dots: kept experiments (passed evaluation)
10
+ - Red dots: discarded experiments (failed evaluation or were dropped)
11
+ - Blue step line: "best so far" running maximum (or minimum, if lower_is_better)
12
+ - Dashed horizontal line: convergence threshold band around best value
13
+ - Star annotation: best experiment ID and value
14
+
15
+ Usage:
16
+ python scripts/plot_trajectory.py # Interactive
17
+ python scripts/plot_trajectory.py --output trajectory.svg # SVG for papers
18
+ python scripts/plot_trajectory.py --output trajectory.png --dpi 300 # High-res PNG
19
+ python scripts/plot_trajectory.py --last 20 --no-discarded # Clean recent view
20
+ python scripts/plot_trajectory.py --metric f1_weighted # Specific metric
21
+
22
+ Exit codes:
23
+ 0 = success
24
+ 1 = error (no experiments, missing metric, bad args)
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import argparse
30
+ import json
31
+ import sys
32
+ from pathlib import Path
33
+ from typing import Optional
34
+
35
+
36
+ # ---------------------------------------------------------------------------
37
+ # Config loading
38
+ # ---------------------------------------------------------------------------
39
+
40
+
41
+ def load_config(config_path: str) -> dict:
42
+ """Load relevant settings from config.yaml.
43
+
44
+ Returns dict with keys: primary_metric, lower_is_better, patience,
45
+ improvement_threshold, project_name.
46
+
47
+ Falls back to safe defaults if config is missing or malformed.
48
+ """
49
+ defaults: dict = {
50
+ "primary_metric": "accuracy",
51
+ "lower_is_better": False,
52
+ "patience": 3,
53
+ "improvement_threshold": 0.005,
54
+ "project_name": "ML Project",
55
+ }
56
+
57
+ path = Path(config_path)
58
+ if not path.exists():
59
+ return defaults
60
+
61
+ try:
62
+ import yaml # yaml is already in the autoresearch env
63
+
64
+ with open(path) as f:
65
+ config = yaml.safe_load(f) or {}
66
+
67
+ eval_cfg = config.get("evaluation", {})
68
+ conv_cfg = config.get("convergence", {})
69
+
70
+ # Attempt to derive a human-readable project name from the data source
71
+ # or fall back to the directory name of the config file.
72
+ data_source = config.get("data", {}).get("source", "")
73
+ project_name = (
74
+ Path(data_source).stem.replace("_", " ").title()
75
+ if data_source and not data_source.startswith("{{")
76
+ else Path(config_path).parent.name.replace("_", " ").title()
77
+ )
78
+
79
+ return {
80
+ "primary_metric": eval_cfg.get("primary_metric", defaults["primary_metric"]),
81
+ "lower_is_better": eval_cfg.get("lower_is_better", defaults["lower_is_better"]),
82
+ "patience": conv_cfg.get("patience", defaults["patience"]),
83
+ "improvement_threshold": conv_cfg.get(
84
+ "improvement_threshold", defaults["improvement_threshold"]
85
+ ),
86
+ "project_name": project_name,
87
+ }
88
+ except Exception as exc: # pragma: no cover
89
+ print(f"plot_trajectory: Warning — could not parse config: {exc}", file=sys.stderr)
90
+ return defaults
91
+
92
+
93
+ # ---------------------------------------------------------------------------
94
+ # Experiment log loading
95
+ # ---------------------------------------------------------------------------
96
+
97
+
98
+ def load_experiments(log_path: str, metric: str) -> list[dict]:
99
+ """Load all experiments from log.jsonl that contain the requested metric.
100
+
101
+ Args:
102
+ log_path: Path to experiments/log.jsonl.
103
+ metric: Metric name to extract.
104
+
105
+ Returns:
106
+ List of dicts, each with keys:
107
+ index int — 1-based chronological sequence number
108
+ experiment_id str
109
+ value float — metric value
110
+ status str — "kept" | "discarded" | other
111
+ """
112
+ path = Path(log_path)
113
+ if not path.exists():
114
+ return []
115
+
116
+ results = []
117
+ seq = 0
118
+ with open(path) as f:
119
+ for line in f:
120
+ line = line.strip()
121
+ if not line:
122
+ continue
123
+ try:
124
+ entry = json.loads(line)
125
+ except json.JSONDecodeError:
126
+ continue
127
+
128
+ value = entry.get("metrics", {}).get(metric)
129
+ if value is None:
130
+ continue # Skip entries without the requested metric
131
+
132
+ try:
133
+ value = float(value)
134
+ except (TypeError, ValueError):
135
+ continue
136
+
137
+ seq += 1
138
+ results.append({
139
+ "index": seq,
140
+ "experiment_id": entry.get("experiment_id", f"exp-{seq:03d}"),
141
+ "value": value,
142
+ "status": entry.get("status", "unknown"),
143
+ })
144
+
145
+ return results
146
+
147
+
148
+ # ---------------------------------------------------------------------------
149
+ # Best-so-far computation
150
+ # ---------------------------------------------------------------------------
151
+
152
+
153
+ def compute_best_so_far(
154
+ experiments: list[dict],
155
+ lower_is_better: bool,
156
+ ) -> list[float]:
157
+ """Return a list of running-best values aligned with experiments.
158
+
159
+ For each position i, best_so_far[i] is the best metric value seen
160
+ among experiments[0..i] (inclusive), considering all statuses.
161
+ """
162
+ best: Optional[float] = None
163
+ result: list[float] = []
164
+ for exp in experiments:
165
+ v = exp["value"]
166
+ if best is None:
167
+ best = v
168
+ else:
169
+ if lower_is_better:
170
+ best = min(best, v)
171
+ else:
172
+ best = max(best, v)
173
+ result.append(best)
174
+ return result
175
+
176
+
177
+ # ---------------------------------------------------------------------------
178
+ # Convergence detection (reused logic from check_convergence.py)
179
+ # ---------------------------------------------------------------------------
180
+
181
+
182
+ def detect_convergence(
183
+ experiments: list[dict],
184
+ patience: int,
185
+ improvement_threshold: float,
186
+ lower_is_better: bool,
187
+ ) -> bool:
188
+ """Return True if the last *patience* kept experiments show no meaningful gain."""
189
+ kept = [e for e in experiments if e["status"] == "kept"]
190
+ if len(kept) < patience:
191
+ return False
192
+
193
+ for i in range(len(kept) - patience, len(kept)):
194
+ prior_values = [e["value"] for e in kept[:i]]
195
+ if not prior_values:
196
+ return False
197
+ prior_best = min(prior_values) if lower_is_better else max(prior_values)
198
+ current = kept[i]["value"]
199
+ if prior_best == 0:
200
+ improvement = 1.0 if current != 0 else 0.0
201
+ elif lower_is_better:
202
+ improvement = (prior_best - current) / abs(prior_best)
203
+ else:
204
+ improvement = (current - prior_best) / abs(prior_best)
205
+
206
+ if improvement >= improvement_threshold:
207
+ return False # At least one non-trivial improvement in the window
208
+
209
+ return True
210
+
211
+
212
+ # ---------------------------------------------------------------------------
213
+ # Plotting
214
+ # ---------------------------------------------------------------------------
215
+
216
+
217
+ def parse_figsize(figsize_str: str) -> tuple[float, float]:
218
+ """Parse a WxH string (e.g. '10x6') into a (width, height) tuple."""
219
+ try:
220
+ w, h = figsize_str.lower().split("x")
221
+ return float(w), float(h)
222
+ except (ValueError, AttributeError) as exc:
223
+ raise argparse.ArgumentTypeError(
224
+ f"Invalid --figsize '{figsize_str}'. Expected format: WxH (e.g. 10x6)"
225
+ ) from exc
226
+
227
+
228
+ def _best_experiment(
229
+ experiments: list[dict],
230
+ lower_is_better: bool,
231
+ ) -> dict:
232
+ """Return the experiment with the single best metric value."""
233
+ return (min if lower_is_better else max)(experiments, key=lambda e: e["value"])
234
+
235
+
236
+ def plot_trajectory(
237
+ experiments: list[dict],
238
+ metric: str,
239
+ config: dict,
240
+ args: argparse.Namespace,
241
+ ) -> None:
242
+ """Build and emit the trajectory chart.
243
+
244
+ Args:
245
+ experiments: Filtered experiment list (already sliced by --last).
246
+ metric: Metric name being plotted.
247
+ config: Parsed config dict.
248
+ args: CLI arguments namespace.
249
+ """
250
+ import matplotlib.pyplot as plt
251
+ import matplotlib.patches as mpatches
252
+
253
+ # Style — use seaborn-v0_8-whitegrid when available, fall back gracefully
254
+ available_styles = plt.style.available
255
+ for candidate in ("seaborn-v0_8-whitegrid", "seaborn-whitegrid", "ggplot", "default"):
256
+ if candidate in available_styles or candidate == "default":
257
+ plt.style.use(candidate)
258
+ break
259
+
260
+ lower_is_better: bool = config["lower_is_better"]
261
+ patience: int = config["patience"]
262
+ improvement_threshold: float = config["improvement_threshold"]
263
+
264
+ # Separate kept vs discarded
265
+ kept = [e for e in experiments if e["status"] == "kept"]
266
+ discarded = [e for e in experiments if e["status"] != "kept"]
267
+
268
+ # Best-so-far step line (computed over all experiments)
269
+ best_so_far = compute_best_so_far(experiments, lower_is_better)
270
+ xs_all = [e["index"] for e in experiments]
271
+
272
+ # Best experiment overall
273
+ best_exp = _best_experiment(experiments, lower_is_better)
274
+ best_value = best_exp["value"]
275
+
276
+ # Convergence threshold band: best ± threshold × |best|
277
+ threshold_delta = improvement_threshold * abs(best_value)
278
+ if lower_is_better:
279
+ threshold_y = best_value + threshold_delta
280
+ else:
281
+ threshold_y = best_value - threshold_delta
282
+
283
+ # Convergence detection
284
+ converged = detect_convergence(
285
+ experiments, patience, improvement_threshold, lower_is_better
286
+ )
287
+
288
+ # -----------------------------------------------------------------------
289
+ # Figure setup
290
+ # -----------------------------------------------------------------------
291
+ figsize = parse_figsize(args.figsize)
292
+ fig, ax = plt.subplots(figsize=figsize)
293
+
294
+ # -----------------------------------------------------------------------
295
+ # Best-so-far step function
296
+ # -----------------------------------------------------------------------
297
+ ax.step(
298
+ xs_all,
299
+ best_so_far,
300
+ where="post",
301
+ color="#2166ac",
302
+ linewidth=1.8,
303
+ alpha=0.85,
304
+ label="Best so far",
305
+ zorder=2,
306
+ )
307
+
308
+ # -----------------------------------------------------------------------
309
+ # Convergence threshold dashed line
310
+ # -----------------------------------------------------------------------
311
+ ax.axhline(
312
+ threshold_y,
313
+ color="#762a83",
314
+ linestyle="--",
315
+ linewidth=1.2,
316
+ alpha=0.7,
317
+ label=f"Convergence threshold ({improvement_threshold * 100:.1f}% from best)",
318
+ zorder=2,
319
+ )
320
+
321
+ # -----------------------------------------------------------------------
322
+ # Scatter: kept experiments (green)
323
+ # -----------------------------------------------------------------------
324
+ if kept:
325
+ ax.scatter(
326
+ [e["index"] for e in kept],
327
+ [e["value"] for e in kept],
328
+ color="#4dac26",
329
+ s=55,
330
+ zorder=4,
331
+ alpha=0.9,
332
+ edgecolors="white",
333
+ linewidths=0.6,
334
+ label=f"Kept ({len(kept)})",
335
+ )
336
+
337
+ # -----------------------------------------------------------------------
338
+ # Scatter: discarded experiments (red) — hidden if --no-discarded
339
+ # -----------------------------------------------------------------------
340
+ if discarded and not args.no_discarded:
341
+ ax.scatter(
342
+ [e["index"] for e in discarded],
343
+ [e["value"] for e in discarded],
344
+ color="#d01c8b",
345
+ s=40,
346
+ zorder=3,
347
+ alpha=0.65,
348
+ marker="x",
349
+ linewidths=1.2,
350
+ label=f"Discarded ({len(discarded)})",
351
+ )
352
+
353
+ # -----------------------------------------------------------------------
354
+ # Annotate best experiment
355
+ # -----------------------------------------------------------------------
356
+ best_x = best_exp["index"]
357
+ ax.scatter(
358
+ [best_x],
359
+ [best_value],
360
+ color="#d73027",
361
+ s=130,
362
+ zorder=5,
363
+ marker="*",
364
+ edgecolors="#7f0000",
365
+ linewidths=0.6,
366
+ )
367
+
368
+ # Choose annotation offset direction: push up for higher-is-better, down for lower
369
+ vert_offset = 0.015 * (ax.get_ylim()[1] - ax.get_ylim()[0] or 1.0)
370
+ annotation_y = best_value + vert_offset if not lower_is_better else best_value - vert_offset
371
+
372
+ ax.annotate(
373
+ f"Best: {best_exp['experiment_id']}\n{best_value:.4f}",
374
+ xy=(best_x, best_value),
375
+ xytext=(best_x + max(1, len(experiments) * 0.04), annotation_y),
376
+ fontsize=8,
377
+ color="#7f0000",
378
+ arrowprops=dict(
379
+ arrowstyle="->",
380
+ color="#7f0000",
381
+ lw=1.0,
382
+ ),
383
+ bbox=dict(
384
+ boxstyle="round,pad=0.25",
385
+ facecolor="white",
386
+ edgecolor="#7f0000",
387
+ alpha=0.85,
388
+ linewidth=0.8,
389
+ ),
390
+ zorder=6,
391
+ )
392
+
393
+ # -----------------------------------------------------------------------
394
+ # Converged annotation
395
+ # -----------------------------------------------------------------------
396
+ if converged:
397
+ ax.text(
398
+ 0.98,
399
+ 0.05,
400
+ f"Converged\n(patience={patience})",
401
+ transform=ax.transAxes,
402
+ fontsize=8,
403
+ color="#762a83",
404
+ ha="right",
405
+ va="bottom",
406
+ bbox=dict(
407
+ boxstyle="round,pad=0.3",
408
+ facecolor="#f7f4f9",
409
+ edgecolor="#762a83",
410
+ alpha=0.9,
411
+ linewidth=0.8,
412
+ ),
413
+ )
414
+
415
+ # -----------------------------------------------------------------------
416
+ # Labels, title, legend
417
+ # -----------------------------------------------------------------------
418
+ direction_hint = "(lower is better)" if lower_is_better else "(higher is better)"
419
+ ax.set_xlabel("Experiment #", fontsize=11)
420
+ ax.set_ylabel(f"{metric} {direction_hint}", fontsize=11)
421
+
422
+ if args.title:
423
+ title = args.title
424
+ else:
425
+ project_name = config.get("project_name", "ML Project")
426
+ title = f"{project_name} — {metric} trajectory"
427
+
428
+ ax.set_title(title, fontsize=13, fontweight="bold", pad=12)
429
+
430
+ # Integer x-ticks only
431
+ ax.xaxis.get_major_locator().set_params(integer=True) # type: ignore[attr-defined]
432
+
433
+ ax.legend(fontsize=9, framealpha=0.85, loc="best")
434
+ ax.spines["top"].set_visible(False)
435
+ ax.spines["right"].set_visible(False)
436
+
437
+ fig.tight_layout()
438
+
439
+ # -----------------------------------------------------------------------
440
+ # Output
441
+ # -----------------------------------------------------------------------
442
+ if args.output:
443
+ out_path = Path(args.output)
444
+ ext = out_path.suffix.lower()
445
+ if ext == ".png":
446
+ fig.savefig(out_path, dpi=args.dpi, bbox_inches="tight")
447
+ elif ext == ".svg":
448
+ fig.savefig(out_path, format="svg", bbox_inches="tight")
449
+ else:
450
+ print(
451
+ f"plot_trajectory: Warning — unknown extension '{ext}', saving as PNG.",
452
+ file=sys.stderr,
453
+ )
454
+ fig.savefig(out_path, dpi=args.dpi, bbox_inches="tight")
455
+ plt.close(fig)
456
+ print(f"plot_trajectory: Saved to {out_path}")
457
+ else:
458
+ plt.show()
459
+ plt.close(fig)
460
+
461
+
462
+ # ---------------------------------------------------------------------------
463
+ # CLI
464
+ # ---------------------------------------------------------------------------
465
+
466
+
467
+ def build_parser() -> argparse.ArgumentParser:
468
+ parser = argparse.ArgumentParser(
469
+ description=(
470
+ "Plot primary metric trajectory from experiments/log.jsonl.\n"
471
+ "Output can be SVG (default, best for papers), PNG, or interactive."
472
+ ),
473
+ formatter_class=argparse.RawDescriptionHelpFormatter,
474
+ epilog=(
475
+ "Examples:\n"
476
+ " python scripts/plot_trajectory.py\n"
477
+ " python scripts/plot_trajectory.py --output trajectory.svg\n"
478
+ " python scripts/plot_trajectory.py --output trajectory.png --dpi 300\n"
479
+ " python scripts/plot_trajectory.py --last 20 --no-discarded\n"
480
+ " python scripts/plot_trajectory.py --metric f1_weighted\n"
481
+ ),
482
+ )
483
+ parser.add_argument(
484
+ "--log",
485
+ default="experiments/log.jsonl",
486
+ metavar="PATH",
487
+ help="Path to experiment log (default: experiments/log.jsonl)",
488
+ )
489
+ parser.add_argument(
490
+ "--config",
491
+ default="config.yaml",
492
+ metavar="PATH",
493
+ help="Path to config.yaml (default: config.yaml)",
494
+ )
495
+ parser.add_argument(
496
+ "--output",
497
+ default=None,
498
+ metavar="FILE",
499
+ help="Output file path (.svg or .png). Omit for interactive display.",
500
+ )
501
+ parser.add_argument(
502
+ "--metric",
503
+ default=None,
504
+ metavar="NAME",
505
+ help="Metric to plot (default: primary_metric from config.yaml)",
506
+ )
507
+ parser.add_argument(
508
+ "--last",
509
+ type=int,
510
+ default=None,
511
+ metavar="N",
512
+ help="Only plot the last N experiments",
513
+ )
514
+ parser.add_argument(
515
+ "--no-discarded",
516
+ action="store_true",
517
+ help="Hide discarded experiments from the chart",
518
+ )
519
+ parser.add_argument(
520
+ "--title",
521
+ default=None,
522
+ metavar="TEXT",
523
+ help="Override the auto-generated chart title",
524
+ )
525
+ parser.add_argument(
526
+ "--figsize",
527
+ default="10x6",
528
+ metavar="WxH",
529
+ help="Figure size in inches, width x height (default: 10x6)",
530
+ )
531
+ parser.add_argument(
532
+ "--dpi",
533
+ type=int,
534
+ default=150,
535
+ help="Resolution for PNG output in dots per inch (default: 150)",
536
+ )
537
+ return parser
538
+
539
+
540
+ def main() -> None:
541
+ """CLI entry point."""
542
+ # Guard: matplotlib must be importable
543
+ try:
544
+ import matplotlib # noqa: F401
545
+ except ImportError:
546
+ print(
547
+ "plot_trajectory: Error — matplotlib is not installed.\n"
548
+ " Install it with: pip install matplotlib",
549
+ file=sys.stderr,
550
+ )
551
+ sys.exit(1)
552
+
553
+ parser = build_parser()
554
+ args = parser.parse_args()
555
+
556
+ # Validate --figsize early so we get a clean error message
557
+ try:
558
+ parse_figsize(args.figsize)
559
+ except argparse.ArgumentTypeError as exc:
560
+ parser.error(str(exc))
561
+
562
+ # Load config
563
+ config = load_config(args.config)
564
+
565
+ # Resolve metric name
566
+ metric = args.metric if args.metric else config["primary_metric"]
567
+
568
+ # Load experiments
569
+ experiments = load_experiments(args.log, metric)
570
+
571
+ if not experiments:
572
+ log_path = Path(args.log)
573
+ if not log_path.exists():
574
+ print(
575
+ f"plot_trajectory: Error — log not found at '{args.log}'.\n"
576
+ " Run at least one experiment first.",
577
+ file=sys.stderr,
578
+ )
579
+ else:
580
+ print(
581
+ f"plot_trajectory: Error — no experiments with metric '{metric}' "
582
+ f"found in '{args.log}'.\n"
583
+ " Check the metric name or run experiments first.",
584
+ file=sys.stderr,
585
+ )
586
+ sys.exit(1)
587
+
588
+ # Apply --last filter
589
+ if args.last is not None:
590
+ if args.last < 1:
591
+ parser.error("--last must be a positive integer")
592
+ experiments = experiments[-args.last :]
593
+
594
+ # Re-index so x-axis is contiguous after slicing
595
+ if args.last is not None:
596
+ for i, exp in enumerate(experiments, start=1):
597
+ exp["index"] = i
598
+
599
+ n_kept = sum(1 for e in experiments if e["status"] == "kept")
600
+ n_disc = sum(1 for e in experiments if e["status"] != "kept")
601
+ print(
602
+ f"plot_trajectory: Plotting {len(experiments)} experiments "
603
+ f"({n_kept} kept, {n_disc} discarded) for metric '{metric}'",
604
+ file=sys.stderr,
605
+ )
606
+
607
+ plot_trajectory(experiments, metric, config, args)
608
+
609
+
610
+ if __name__ == "__main__":
611
+ main()
@@ -46,6 +46,8 @@ TEMPLATE_FILES = [
46
46
  "program.md",
47
47
  "README.md",
48
48
  "MEMORY.md",
49
+ "model_registry.yaml",
50
+ "model_contract.md",
49
51
  "requirements.txt",
50
52
  "pyproject.toml",
51
53
  ]
@@ -80,6 +82,13 @@ TEMPLATE_DIRS = {
80
82
  "show_environment.py",
81
83
  "turing_io.py",
82
84
  "preflight.py",
85
+ "cleanup.py",
86
+ "generate_model_card.py",
87
+ "cost_frontier.py",
88
+ "leaderboard.py",
89
+ "diff_configs.py",
90
+ "export_results.py",
91
+ "plot_trajectory.py",
83
92
  ],
84
93
  "tests": ["__init__.py", "conftest.py"],
85
94
  }
@@ -44,9 +44,16 @@ def format_table(experiments: list[dict], best_id: str | None, metric_names: lis
44
44
  if not experiments:
45
45
  return "No experiments logged yet."
46
46
 
47
+ # Detect if any experiment has train_seconds
48
+ has_train_seconds = any(
49
+ exp.get("metrics", {}).get("train_seconds") is not None
50
+ for exp in experiments
51
+ )
52
+
47
53
  # Build dynamic header based on configured metrics
48
54
  metric_headers = "".join(f"{m:>12}" for m in metric_names)
49
- header = f"{'ID':<10} {'Status':<10} {'Model':<15}{metric_headers} {'Timestamp':<22}"
55
+ time_header = f"{'Time':>10}" if has_train_seconds else ""
56
+ header = f"{'ID':<10} {'Status':<10} {'Model':<15}{metric_headers}{time_header} {'Timestamp':<22}"
50
57
  sep = "-" * len(header)
51
58
  lines = [header, sep]
52
59
 
@@ -62,9 +69,23 @@ def format_table(experiments: list[dict], best_id: str | None, metric_names: lis
62
69
  else:
63
70
  metric_values += f"{'N/A':>12}"
64
71
 
72
+ time_col = ""
73
+ if has_train_seconds:
74
+ train_secs = metrics.get("train_seconds")
75
+ if train_secs is not None:
76
+ if train_secs < 60:
77
+ time_col = f"{train_secs:.1f}s"
78
+ elif train_secs < 3600:
79
+ time_col = f"{train_secs / 60:.1f}m"
80
+ else:
81
+ time_col = f"{train_secs / 3600:.1f}h"
82
+ time_col = f"{time_col:>10}"
83
+ else:
84
+ time_col = f"{'N/A':>10}"
85
+
65
86
  ts = exp.get("timestamp", "")[:19]
66
87
  marker = " *BEST*" if exp.get("experiment_id") == best_id else ""
67
- line = f"{exp.get('experiment_id', '?'):<10} {exp.get('status', '?'):<10} {model_type:<15}{metric_values} {ts}{marker}"
88
+ line = f"{exp.get('experiment_id', '?'):<10} {exp.get('status', '?'):<10} {model_type:<15}{metric_values}{time_col} {ts}{marker}"
68
89
  lines.append(line)
69
90
 
70
91
  return "\n".join(lines)