claude-turing 2.3.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -309,6 +309,23 @@ def load_regression_checks(regress_dir: str = "experiments/regressions") -> list
309
309
  return reports
310
310
 
311
311
 
312
+ def load_ensemble_results(ensemble_dir: str = "experiments/ensembles") -> list[dict]:
313
+ """Load ensemble result reports from YAML files."""
314
+ path = Path(ensemble_dir)
315
+ if not path.exists():
316
+ return []
317
+ reports = []
318
+ for f in sorted(path.glob("ensemble-*.yaml")):
319
+ try:
320
+ with open(f) as fh:
321
+ report = yaml.safe_load(fh)
322
+ if report and isinstance(report, dict):
323
+ reports.append(report)
324
+ except (yaml.YAMLError, OSError):
325
+ continue
326
+ return reports
327
+
328
+
312
329
  def format_brief(
313
330
  campaign: dict,
314
331
  best: dict | None,
@@ -327,6 +344,7 @@ def format_brief(
327
344
  profiles: list[dict] | None = None,
328
345
  queue_summary: dict | None = None,
329
346
  regression_checks: list[dict] | None = None,
347
+ ensemble_results: list[dict] | None = None,
330
348
  ) -> str:
331
349
  """Format the research briefing as markdown."""
332
350
  direction = "lower" if lower_is_better else "higher"
@@ -546,6 +564,21 @@ def format_brief(
546
564
  if auto_hyps:
547
565
  lines.append(f"\n*{auto_hyps} auto-generated hypotheses from failure analysis.*")
548
566
 
567
+ # Ensemble results
568
+ if ensemble_results:
569
+ lines.extend(["", "## Ensembles", ""])
570
+ for ens in ensemble_results:
571
+ best_method = ens.get("best_method", "?")
572
+ improvement = ens.get("improvement", 0)
573
+ n_models = ens.get("n_candidates", 0)
574
+ if best_method != "best_single" and improvement > 0:
575
+ lines.append(
576
+ f"- **{best_method}** ({n_models} models): "
577
+ f"{metric} improvement {improvement:+.4f} over best single"
578
+ )
579
+ else:
580
+ lines.append(f"- {n_models}-model ensemble: no improvement over best single")
581
+
549
582
  # Regression check history (stability)
550
583
  if regression_checks:
551
584
  lines.extend(["", "## Stability", ""])
@@ -636,6 +669,7 @@ def generate_brief(
636
669
  profiles = load_profiles()
637
670
  queue_summary = load_queue_summary()
638
671
  regression_checks = load_regression_checks()
672
+ ensemble_results = load_ensemble_results()
639
673
 
640
674
  return format_brief(
641
675
  campaign, best, trajectory, model_types, hypotheses,
@@ -648,6 +682,7 @@ def generate_brief(
648
682
  profiles=profiles if profiles else None,
649
683
  queue_summary=queue_summary,
650
684
  regression_checks=regression_checks if regression_checks else None,
685
+ ensemble_results=ensemble_results if ensemble_results else None,
651
686
  )
652
687
 
653
688
 
@@ -0,0 +1,457 @@
1
+ #!/usr/bin/env python3
2
+ """Pipeline composition manager for the autoresearch pipeline.
3
+
4
+ Decomposes ML pipelines into swappable stages (preprocess, features,
5
+ model, postprocess). Each stage can be independently varied, cached,
6
+ and reused across experiments.
7
+
8
+ Usage:
9
+ python scripts/pipeline_manager.py show
10
+ python scripts/pipeline_manager.py swap model --from exp-031
11
+ python scripts/pipeline_manager.py cache
12
+ python scripts/pipeline_manager.py run
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import argparse
18
+ import hashlib
19
+ import json
20
+ import sys
21
+ from datetime import datetime, timezone
22
+ from pathlib import Path
23
+
24
+ import yaml
25
+
26
+ from scripts.turing_io import load_config, load_experiments
27
+
28
+ DEFAULT_LOG_PATH = "experiments/log.jsonl"
29
+ DEFAULT_CACHE_DIR = "experiments/cache"
30
+ DEFAULT_STAGES = ["preprocess", "features", "model", "postprocess"]
31
+
32
+
33
+ # --- Stage Definition ---
34
+
35
+
36
+ def define_stages(config: dict) -> list[dict]:
37
+ """Extract pipeline stage definitions from config.
38
+
39
+ Args:
40
+ config: Project config dict.
41
+
42
+ Returns:
43
+ List of stage dicts with name, type, config_keys, and description.
44
+ """
45
+ pipeline_cfg = config.get("pipeline", {})
46
+ stage_names = pipeline_cfg.get("stages", DEFAULT_STAGES)
47
+
48
+ stages = []
49
+ for name in stage_names:
50
+ stage = {
51
+ "name": name,
52
+ "config": pipeline_cfg.get(name, {}),
53
+ "description": _describe_stage(name, config),
54
+ }
55
+ stage["hash"] = compute_stage_hash(stage)
56
+ stages.append(stage)
57
+
58
+ return stages
59
+
60
+
61
+ def _describe_stage(name: str, config: dict) -> str:
62
+ """Generate a human-readable description of a pipeline stage."""
63
+ if name == "preprocess":
64
+ model_cfg = config.get("model", {})
65
+ steps = []
66
+ if model_cfg.get("hyperparams", {}).get("handle_missing"):
67
+ steps.append("handle_missing")
68
+ if model_cfg.get("hyperparams", {}).get("scaler"):
69
+ steps.append(model_cfg["hyperparams"]["scaler"])
70
+ return ", ".join(steps) if steps else "default preprocessing"
71
+
72
+ elif name == "features":
73
+ model_cfg = config.get("model", {})
74
+ features = model_cfg.get("features", [])
75
+ if features:
76
+ return ", ".join(features[:5])
77
+ return "raw features"
78
+
79
+ elif name == "model":
80
+ model_cfg = config.get("model", {})
81
+ model_type = model_cfg.get("type", "unknown")
82
+ hyperparams = model_cfg.get("hyperparams", {})
83
+ key_params = []
84
+ for k in ("max_depth", "n_estimators", "learning_rate", "hidden_size"):
85
+ if k in hyperparams:
86
+ key_params.append(f"{k}={hyperparams[k]}")
87
+ return f"{model_type}({', '.join(key_params)})" if key_params else model_type
88
+
89
+ elif name == "postprocess":
90
+ return config.get("postprocess", {}).get("type", "none")
91
+
92
+ return "unknown"
93
+
94
+
95
+ def compute_stage_hash(stage: dict) -> str:
96
+ """Compute a content hash for a stage configuration.
97
+
98
+ Used to detect when a stage hasn't changed and skip re-computation.
99
+ """
100
+ content = json.dumps(stage.get("config", {}), sort_keys=True)
101
+ content += stage.get("description", "")
102
+ return hashlib.md5(content.encode()).hexdigest()[:8]
103
+
104
+
105
+ # --- Stage Operations ---
106
+
107
+
108
+ def extract_stage_from_experiment(
109
+ experiment: dict,
110
+ stage_name: str,
111
+ ) -> dict:
112
+ """Extract a specific stage's configuration from an experiment.
113
+
114
+ Args:
115
+ experiment: Experiment dict from log.
116
+ stage_name: Name of the stage to extract.
117
+
118
+ Returns:
119
+ Stage config dict.
120
+ """
121
+ config = experiment.get("config", {})
122
+ hyperparams = config.get("hyperparams", {})
123
+
124
+ if stage_name == "model":
125
+ return {
126
+ "name": "model",
127
+ "config": {
128
+ "type": config.get("model_type", "unknown"),
129
+ "hyperparams": hyperparams,
130
+ },
131
+ "source_experiment": experiment.get("experiment_id"),
132
+ }
133
+
134
+ elif stage_name == "preprocess":
135
+ preprocess_keys = {"scaler", "handle_missing", "imputer", "normalize"}
136
+ preprocess_cfg = {k: v for k, v in hyperparams.items() if k in preprocess_keys}
137
+ return {
138
+ "name": "preprocess",
139
+ "config": preprocess_cfg,
140
+ "source_experiment": experiment.get("experiment_id"),
141
+ }
142
+
143
+ elif stage_name == "features":
144
+ feature_keys = {"features", "feature_engineering", "polynomial", "interactions"}
145
+ feature_cfg = {k: v for k, v in hyperparams.items() if k in feature_keys}
146
+ return {
147
+ "name": "features",
148
+ "config": feature_cfg,
149
+ "source_experiment": experiment.get("experiment_id"),
150
+ }
151
+
152
+ elif stage_name == "postprocess":
153
+ post_keys = {"calibration", "threshold", "postprocess"}
154
+ post_cfg = {k: v for k, v in hyperparams.items() if k in post_keys}
155
+ return {
156
+ "name": "postprocess",
157
+ "config": post_cfg,
158
+ "source_experiment": experiment.get("experiment_id"),
159
+ }
160
+
161
+ return {"name": stage_name, "config": {}, "source_experiment": experiment.get("experiment_id")}
162
+
163
+
164
+ def swap_stage(
165
+ current_stages: list[dict],
166
+ stage_name: str,
167
+ new_stage: dict,
168
+ ) -> list[dict]:
169
+ """Replace a stage in the pipeline with a new one.
170
+
171
+ Args:
172
+ current_stages: Current pipeline stages.
173
+ stage_name: Name of stage to replace.
174
+ new_stage: New stage configuration.
175
+
176
+ Returns:
177
+ Updated pipeline stages.
178
+ """
179
+ result = []
180
+ for stage in current_stages:
181
+ if stage["name"] == stage_name:
182
+ new_stage["hash"] = compute_stage_hash(new_stage)
183
+ result.append(new_stage)
184
+ else:
185
+ result.append(stage)
186
+ return result
187
+
188
+
189
+ # --- Cache Management ---
190
+
191
+
192
+ def get_cache_path(stage: dict, cache_dir: str = DEFAULT_CACHE_DIR) -> Path:
193
+ """Get the cache file path for a stage."""
194
+ stage_hash = stage.get("hash", compute_stage_hash(stage))
195
+ return Path(cache_dir) / f"{stage['name']}-{stage_hash}"
196
+
197
+
198
+ def check_cache(stage: dict, cache_dir: str = DEFAULT_CACHE_DIR) -> bool:
199
+ """Check if a stage's output is cached."""
200
+ cache_path = get_cache_path(stage, cache_dir)
201
+ return cache_path.exists()
202
+
203
+
204
+ def get_cache_stats(cache_dir: str = DEFAULT_CACHE_DIR) -> dict:
205
+ """Get cache directory statistics."""
206
+ path = Path(cache_dir)
207
+ if not path.exists():
208
+ return {"total_files": 0, "total_size_bytes": 0, "stages": {}}
209
+
210
+ total_size = 0
211
+ total_files = 0
212
+ stages = {}
213
+
214
+ for entry in path.iterdir():
215
+ if entry.is_dir():
216
+ size = sum(f.stat().st_size for f in entry.rglob("*") if f.is_file())
217
+ n_files = sum(1 for f in entry.rglob("*") if f.is_file())
218
+ stage_name = entry.name.rsplit("-", 1)[0] if "-" in entry.name else entry.name
219
+ stages[entry.name] = {"size_bytes": size, "n_files": n_files}
220
+ total_size += size
221
+ total_files += n_files
222
+ elif entry.is_file():
223
+ total_size += entry.stat().st_size
224
+ total_files += 1
225
+
226
+ return {
227
+ "total_files": total_files,
228
+ "total_size_bytes": total_size,
229
+ "total_size_mb": round(total_size / (1024 * 1024), 2),
230
+ "stages": stages,
231
+ }
232
+
233
+
234
+ # --- Pipeline Composition ---
235
+
236
+
237
+ def compose_pipeline(
238
+ action: str,
239
+ stage_name: str | None = None,
240
+ from_exp: str | None = None,
241
+ config_path: str = "config.yaml",
242
+ log_path: str = DEFAULT_LOG_PATH,
243
+ cache_dir: str = DEFAULT_CACHE_DIR,
244
+ ) -> dict:
245
+ """Execute a pipeline composition action.
246
+
247
+ Args:
248
+ action: One of show, swap, cache, run.
249
+ stage_name: Stage to operate on (for swap).
250
+ from_exp: Experiment ID to take stage from (for swap).
251
+ config_path: Path to config.yaml.
252
+ log_path: Path to experiment log.
253
+ cache_dir: Cache directory path.
254
+
255
+ Returns:
256
+ Action result dict.
257
+ """
258
+ config = load_config(config_path)
259
+
260
+ if action == "show":
261
+ stages = define_stages(config)
262
+ cache_status = {}
263
+ for stage in stages:
264
+ cache_status[stage["name"]] = check_cache(stage, cache_dir)
265
+
266
+ return {
267
+ "action": "show",
268
+ "stages": [
269
+ {
270
+ "name": s["name"],
271
+ "description": s["description"],
272
+ "hash": s["hash"],
273
+ "cached": cache_status.get(s["name"], False),
274
+ }
275
+ for s in stages
276
+ ],
277
+ "cache_stats": get_cache_stats(cache_dir),
278
+ }
279
+
280
+ elif action == "swap":
281
+ if not stage_name:
282
+ return {"error": "Stage name required for swap action"}
283
+ if not from_exp:
284
+ return {"error": "Source experiment ID required (--from)"}
285
+
286
+ experiments = load_experiments(log_path)
287
+ source = None
288
+ for exp in experiments:
289
+ if exp.get("experiment_id") == from_exp:
290
+ source = exp
291
+ break
292
+
293
+ if not source:
294
+ return {"error": f"Experiment {from_exp} not found"}
295
+
296
+ current_stages = define_stages(config)
297
+ new_stage = extract_stage_from_experiment(source, stage_name)
298
+ updated = swap_stage(current_stages, stage_name, new_stage)
299
+
300
+ return {
301
+ "action": "swap",
302
+ "stage": stage_name,
303
+ "source_experiment": from_exp,
304
+ "old_stage": next((s for s in current_stages if s["name"] == stage_name), None),
305
+ "new_stage": new_stage,
306
+ "updated_pipeline": [
307
+ {"name": s["name"], "description": s.get("description", ""), "hash": s.get("hash", "")}
308
+ for s in updated
309
+ ],
310
+ }
311
+
312
+ elif action == "cache":
313
+ stages = define_stages(config)
314
+ return {
315
+ "action": "cache",
316
+ "stages": [
317
+ {
318
+ "name": s["name"],
319
+ "hash": s["hash"],
320
+ "cache_path": str(get_cache_path(s, cache_dir)),
321
+ "already_cached": check_cache(s, cache_dir),
322
+ }
323
+ for s in stages
324
+ ],
325
+ "cache_dir": cache_dir,
326
+ "stats": get_cache_stats(cache_dir),
327
+ }
328
+
329
+ elif action == "run":
330
+ stages = define_stages(config)
331
+ skippable = [s["name"] for s in stages if check_cache(s, cache_dir)]
332
+ return {
333
+ "action": "run",
334
+ "total_stages": len(stages),
335
+ "cached_stages": skippable,
336
+ "stages_to_run": [s["name"] for s in stages if s["name"] not in skippable],
337
+ "message": f"Would skip {len(skippable)} cached stage(s) and run {len(stages) - len(skippable)}",
338
+ }
339
+
340
+ return {"error": f"Unknown action: {action}"}
341
+
342
+
343
+ # --- Report Formatting ---
344
+
345
+
346
+ def format_pipeline_report(report: dict) -> str:
347
+ """Format pipeline report as markdown."""
348
+ if "error" in report:
349
+ return f"ERROR: {report['error']}"
350
+
351
+ action = report.get("action", "?")
352
+ lines = [f"# Pipeline: {action.title()}", ""]
353
+
354
+ if action == "show":
355
+ lines.extend(["## Pipeline Stages", ""])
356
+ lines.append("| # | Stage | Description | Hash | Cached |")
357
+ lines.append("|---|-------|-------------|------|--------|")
358
+ for i, s in enumerate(report.get("stages", []), 1):
359
+ cached = "yes" if s.get("cached") else "no"
360
+ lines.append(f"| {i} | {s['name']} | {s['description']} | {s['hash']} | {cached} |")
361
+
362
+ stats = report.get("cache_stats", {})
363
+ if stats.get("total_files", 0) > 0:
364
+ lines.extend([
365
+ "",
366
+ f"**Cache:** {stats.get('total_files', 0)} files, "
367
+ f"{stats.get('total_size_mb', 0):.1f} MB",
368
+ ])
369
+
370
+ elif action == "swap":
371
+ old = report.get("old_stage", {})
372
+ new = report.get("new_stage", {})
373
+ lines.extend([
374
+ f"**Swapped stage:** {report.get('stage')}",
375
+ f"**Source experiment:** {report.get('source_experiment')}",
376
+ "",
377
+ f"Old: {old.get('description', 'N/A')} (hash: {old.get('hash', '?')})",
378
+ f"New: {new.get('config', {})} from {report.get('source_experiment')}",
379
+ "",
380
+ "## Updated Pipeline",
381
+ "",
382
+ ])
383
+ for s in report.get("updated_pipeline", []):
384
+ lines.append(f"- {s['name']}: {s['description']} ({s['hash']})")
385
+
386
+ elif action == "cache":
387
+ lines.append("| Stage | Hash | Path | Status |")
388
+ lines.append("|-------|------|------|--------|")
389
+ for s in report.get("stages", []):
390
+ status = "cached" if s.get("already_cached") else "to cache"
391
+ lines.append(f"| {s['name']} | {s['hash']} | {s['cache_path']} | {status} |")
392
+
393
+ elif action == "run":
394
+ cached = report.get("cached_stages", [])
395
+ to_run = report.get("stages_to_run", [])
396
+ lines.append(f"**{report.get('message')}**")
397
+ lines.append("")
398
+ if cached:
399
+ lines.append(f"Skip (cached): {', '.join(cached)}")
400
+ if to_run:
401
+ lines.append(f"Run: {', '.join(to_run)}")
402
+
403
+ return "\n".join(lines)
404
+
405
+
406
+ def main() -> None:
407
+ """CLI entry point."""
408
+ parser = argparse.ArgumentParser(
409
+ description="Pipeline composition manager",
410
+ )
411
+ parser.add_argument(
412
+ "action", choices=["show", "swap", "cache", "run"],
413
+ help="Pipeline action",
414
+ )
415
+ parser.add_argument(
416
+ "stage", nargs="?",
417
+ help="Stage name (for swap)",
418
+ )
419
+ parser.add_argument(
420
+ "--from", dest="from_exp",
421
+ help="Source experiment ID (for swap)",
422
+ )
423
+ parser.add_argument(
424
+ "--config", default="config.yaml",
425
+ help="Path to config.yaml",
426
+ )
427
+ parser.add_argument(
428
+ "--log", default=DEFAULT_LOG_PATH,
429
+ help="Path to experiment log",
430
+ )
431
+ parser.add_argument(
432
+ "--cache-dir", default=DEFAULT_CACHE_DIR,
433
+ help=f"Cache directory (default: {DEFAULT_CACHE_DIR})",
434
+ )
435
+ parser.add_argument(
436
+ "--json", action="store_true",
437
+ help="Output raw JSON instead of formatted report",
438
+ )
439
+ args = parser.parse_args()
440
+
441
+ report = compose_pipeline(
442
+ action=args.action,
443
+ stage_name=args.stage,
444
+ from_exp=args.from_exp,
445
+ config_path=args.config,
446
+ log_path=args.log,
447
+ cache_dir=args.cache_dir,
448
+ )
449
+
450
+ if args.json:
451
+ print(json.dumps(report, indent=2, default=str))
452
+ else:
453
+ print(format_pipeline_report(report))
454
+
455
+
456
+ if __name__ == "__main__":
457
+ main()
@@ -110,6 +110,9 @@ TEMPLATE_DIRS = {
110
110
  "experiment_diff.py",
111
111
  "training_monitor.py",
112
112
  "regression_gate.py",
113
+ "build_ensemble.py",
114
+ "pipeline_manager.py",
115
+ "warm_start.py",
113
116
  ],
114
117
  "tests": ["__init__.py", "conftest.py"],
115
118
  }
@@ -133,6 +136,9 @@ DIRECTORIES_TO_CREATE = [
133
136
  "experiments/diffs",
134
137
  "experiments/monitors",
135
138
  "experiments/regressions",
139
+ "experiments/ensembles",
140
+ "experiments/cache",
141
+ "experiments/warm_starts",
136
142
  "experiments/logs",
137
143
  "models/best",
138
144
  "models/archive",