claude-turing 4.2.0 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,549 @@
1
+ #!/usr/bin/env python3
2
+ """Model registry and lifecycle management for the autoresearch pipeline.
3
+
4
+ Tracks which model version is production, staging, candidate, or archived.
5
+ Promotion workflow with automated gates: candidate → staging → production.
6
+ Prevents the "which pickle file is deployed?" problem.
7
+
8
+ Usage:
9
+ python scripts/model_lifecycle.py list
10
+ python scripts/model_lifecycle.py register exp-095 --version v4.1
11
+ python scripts/model_lifecycle.py promote exp-089 staging
12
+ python scripts/model_lifecycle.py demote exp-089 candidate
13
+ python scripts/model_lifecycle.py history
14
+ python scripts/model_lifecycle.py --json
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import argparse
20
+ import json
21
+ import sys
22
+ from datetime import datetime, timezone
23
+ from pathlib import Path
24
+
25
+ import yaml
26
+
27
+ from scripts.turing_io import load_config, load_experiments
28
+
29
+ DEFAULT_LOG_PATH = "experiments/log.jsonl"
30
+ DEFAULT_REGISTRY_PATH = "experiments/registry.yaml"
31
+
32
+ STAGES = ["candidate", "staging", "production", "archived"]
33
+ PROMOTION_ORDER = {"candidate": "staging", "staging": "production"}
34
+ DEMOTION_ORDER = {"production": "staging", "staging": "candidate"}
35
+
36
+ # Gate requirements for each promotion
37
+ PROMOTION_GATES = {
38
+ "candidate_to_staging": ["regression", "seed_study"],
39
+ "staging_to_production": ["audit", "calibration"],
40
+ }
41
+
42
+
43
+ # --- Registry IO ---
44
+
45
+
46
+ def load_registry(registry_path: str = DEFAULT_REGISTRY_PATH) -> dict:
47
+ """Load the model registry.
48
+
49
+ Returns:
50
+ Registry dict with 'models' list and 'history' list.
51
+ """
52
+ path = Path(registry_path)
53
+ if not path.exists():
54
+ return {"models": [], "history": []}
55
+ try:
56
+ with open(path) as f:
57
+ data = yaml.safe_load(f)
58
+ except (yaml.YAMLError, OSError):
59
+ return {"models": [], "history": []}
60
+ if not isinstance(data, dict):
61
+ return {"models": [], "history": []}
62
+ if "models" not in data:
63
+ data["models"] = []
64
+ if "history" not in data:
65
+ data["history"] = []
66
+ return data
67
+
68
+
69
+ def save_registry(registry: dict, registry_path: str = DEFAULT_REGISTRY_PATH) -> Path:
70
+ """Save the model registry."""
71
+ path = Path(registry_path)
72
+ path.parent.mkdir(parents=True, exist_ok=True)
73
+ with open(path, "w") as f:
74
+ yaml.dump(registry, f, default_flow_style=False, sort_keys=False)
75
+ return path
76
+
77
+
78
+ # --- Registry Operations ---
79
+
80
+
81
+ def register_model(
82
+ registry: dict,
83
+ exp_id: str,
84
+ version: str,
85
+ metric: float | None = None,
86
+ metric_name: str = "accuracy",
87
+ stage: str = "candidate",
88
+ ) -> dict:
89
+ """Register a new model in the registry.
90
+
91
+ Args:
92
+ registry: Current registry state.
93
+ exp_id: Experiment ID.
94
+ version: Model version label.
95
+ metric: Primary metric value.
96
+ metric_name: Primary metric name.
97
+ stage: Initial stage (default: candidate).
98
+
99
+ Returns:
100
+ Updated registry.
101
+ """
102
+ if stage not in STAGES:
103
+ return {"error": f"Invalid stage: {stage}. Must be one of {STAGES}"}
104
+
105
+ # Check for duplicate
106
+ for model in registry["models"]:
107
+ if model["exp_id"] == exp_id:
108
+ return {"error": f"Model {exp_id} already registered (stage: {model['stage']})"}
109
+
110
+ now = datetime.now(timezone.utc).isoformat()
111
+
112
+ model_entry = {
113
+ "exp_id": exp_id,
114
+ "version": version,
115
+ "stage": stage,
116
+ "metric_name": metric_name,
117
+ "metric": metric,
118
+ "registered_at": now,
119
+ "last_promoted": now,
120
+ "gates_passed": [],
121
+ }
122
+
123
+ registry["models"].append(model_entry)
124
+ registry["history"].append({
125
+ "action": "register",
126
+ "exp_id": exp_id,
127
+ "version": version,
128
+ "stage": stage,
129
+ "timestamp": now,
130
+ })
131
+
132
+ return registry
133
+
134
+
135
+ def promote_model(
136
+ registry: dict,
137
+ exp_id: str,
138
+ target_stage: str,
139
+ gate_results: dict[str, str] | None = None,
140
+ force: bool = False,
141
+ ) -> dict:
142
+ """Promote a model to the next stage.
143
+
144
+ Args:
145
+ registry: Current registry state.
146
+ exp_id: Experiment ID to promote.
147
+ target_stage: Target stage.
148
+ gate_results: Gate check results {gate_name: "PASS"/"FAIL"}.
149
+ force: Skip gate checks.
150
+
151
+ Returns:
152
+ Updated registry or error dict.
153
+ """
154
+ model = _find_model(registry, exp_id)
155
+ if model is None:
156
+ return {"error": f"Model {exp_id} not found in registry"}
157
+
158
+ current_stage = model["stage"]
159
+
160
+ # Validate promotion path
161
+ expected_target = PROMOTION_ORDER.get(current_stage)
162
+ if expected_target is None:
163
+ return {"error": f"Cannot promote from {current_stage} — already at highest stage or archived"}
164
+
165
+ if target_stage != expected_target:
166
+ return {"error": f"Cannot promote {current_stage} → {target_stage}. Expected: {current_stage} → {expected_target}"}
167
+
168
+ # Check gates
169
+ gate_key = f"{current_stage}_to_{target_stage}"
170
+ required_gates = PROMOTION_GATES.get(gate_key, [])
171
+
172
+ if not force and required_gates:
173
+ if gate_results is None:
174
+ return {
175
+ "error": "Gate checks required",
176
+ "required_gates": required_gates,
177
+ "suggestion": f"Run gate checks or use --force to skip",
178
+ }
179
+
180
+ failed_gates = []
181
+ for gate in required_gates:
182
+ result = gate_results.get(gate, "NOT_RUN")
183
+ if result != "PASS":
184
+ failed_gates.append({"gate": gate, "result": result})
185
+
186
+ if failed_gates:
187
+ return {
188
+ "error": "Gate checks failed",
189
+ "failed_gates": failed_gates,
190
+ "required_gates": required_gates,
191
+ }
192
+
193
+ # Perform promotion
194
+ now = datetime.now(timezone.utc).isoformat()
195
+ model["stage"] = target_stage
196
+ model["last_promoted"] = now
197
+ model["gates_passed"].extend(required_gates)
198
+
199
+ registry["history"].append({
200
+ "action": "promote",
201
+ "exp_id": exp_id,
202
+ "from_stage": current_stage,
203
+ "to_stage": target_stage,
204
+ "gate_results": gate_results or {},
205
+ "forced": force,
206
+ "timestamp": now,
207
+ })
208
+
209
+ return registry
210
+
211
+
212
+ def demote_model(
213
+ registry: dict,
214
+ exp_id: str,
215
+ target_stage: str,
216
+ reason: str = "",
217
+ ) -> dict:
218
+ """Demote a model to a lower stage.
219
+
220
+ Args:
221
+ registry: Current registry state.
222
+ exp_id: Experiment ID to demote.
223
+ target_stage: Target stage.
224
+ reason: Reason for demotion.
225
+
226
+ Returns:
227
+ Updated registry or error dict.
228
+ """
229
+ model = _find_model(registry, exp_id)
230
+ if model is None:
231
+ return {"error": f"Model {exp_id} not found in registry"}
232
+
233
+ current_stage = model["stage"]
234
+
235
+ if target_stage not in STAGES:
236
+ return {"error": f"Invalid stage: {target_stage}"}
237
+
238
+ if STAGES.index(target_stage) >= STAGES.index(current_stage):
239
+ return {"error": f"Cannot demote {current_stage} → {target_stage} (not a demotion)"}
240
+
241
+ now = datetime.now(timezone.utc).isoformat()
242
+ model["stage"] = target_stage
243
+ model["last_promoted"] = now
244
+
245
+ registry["history"].append({
246
+ "action": "demote",
247
+ "exp_id": exp_id,
248
+ "from_stage": current_stage,
249
+ "to_stage": target_stage,
250
+ "reason": reason,
251
+ "timestamp": now,
252
+ })
253
+
254
+ return registry
255
+
256
+
257
+ def archive_model(
258
+ registry: dict,
259
+ exp_id: str,
260
+ reason: str = "",
261
+ ) -> dict:
262
+ """Archive a model (remove from active lifecycle).
263
+
264
+ Args:
265
+ registry: Current registry state.
266
+ exp_id: Experiment ID to archive.
267
+ reason: Reason for archiving.
268
+
269
+ Returns:
270
+ Updated registry or error dict.
271
+ """
272
+ model = _find_model(registry, exp_id)
273
+ if model is None:
274
+ return {"error": f"Model {exp_id} not found in registry"}
275
+
276
+ now = datetime.now(timezone.utc).isoformat()
277
+ old_stage = model["stage"]
278
+ model["stage"] = "archived"
279
+ model["last_promoted"] = now
280
+
281
+ registry["history"].append({
282
+ "action": "archive",
283
+ "exp_id": exp_id,
284
+ "from_stage": old_stage,
285
+ "reason": reason,
286
+ "timestamp": now,
287
+ })
288
+
289
+ return registry
290
+
291
+
292
+ def _find_model(registry: dict, exp_id: str) -> dict | None:
293
+ """Find a model in the registry by experiment ID."""
294
+ for model in registry.get("models", []):
295
+ if model["exp_id"] == exp_id:
296
+ return model
297
+ return None
298
+
299
+
300
+ # --- Query Operations ---
301
+
302
+
303
+ def list_models(registry: dict, stage: str | None = None) -> list[dict]:
304
+ """List registered models, optionally filtered by stage."""
305
+ models = registry.get("models", [])
306
+ if stage:
307
+ models = [m for m in models if m["stage"] == stage]
308
+ return sorted(models, key=lambda m: STAGES.index(m["stage"]) if m["stage"] in STAGES else 99)
309
+
310
+
311
+ def get_model_at_stage(registry: dict, stage: str) -> dict | None:
312
+ """Get the model currently at a specific stage."""
313
+ for model in registry.get("models", []):
314
+ if model["stage"] == stage:
315
+ return model
316
+ return None
317
+
318
+
319
+ def get_history(registry: dict, exp_id: str | None = None) -> list[dict]:
320
+ """Get promotion/demotion history, optionally for a specific model."""
321
+ history = registry.get("history", [])
322
+ if exp_id:
323
+ history = [h for h in history if h.get("exp_id") == exp_id]
324
+ return history
325
+
326
+
327
+ # --- Gate Checking ---
328
+
329
+
330
+ def check_gates(
331
+ exp_id: str,
332
+ gate_names: list[str],
333
+ experiments: list[dict] | None = None,
334
+ regression_dir: str = "experiments/regressions",
335
+ seed_dir: str = "experiments/seed_studies",
336
+ audit_dir: str = "experiments/audits",
337
+ calibration_dir: str = "experiments/calibration",
338
+ ) -> dict[str, str]:
339
+ """Check promotion gates for a model.
340
+
341
+ Looks for existing gate artifacts. Returns {gate_name: PASS/FAIL/NOT_RUN}.
342
+ """
343
+ results = {}
344
+
345
+ for gate in gate_names:
346
+ if gate == "regression":
347
+ results[gate] = _check_artifact(regression_dir, exp_id, ["regress-*.yaml", "regression-*.yaml"])
348
+ elif gate == "seed_study":
349
+ results[gate] = _check_artifact(seed_dir, exp_id, [f"{exp_id}-seeds.yaml"])
350
+ elif gate == "audit":
351
+ results[gate] = _check_artifact(audit_dir, exp_id, ["audit-*.yaml"])
352
+ elif gate == "calibration":
353
+ results[gate] = _check_artifact(calibration_dir, exp_id, [f"{exp_id}-calibration.yaml", "calibration-*.yaml"])
354
+ else:
355
+ results[gate] = "NOT_RUN"
356
+
357
+ return results
358
+
359
+
360
+ def _check_artifact(directory: str, exp_id: str, patterns: list[str]) -> str:
361
+ """Check if a gate artifact exists."""
362
+ dir_path = Path(directory)
363
+ if not dir_path.exists():
364
+ return "NOT_RUN"
365
+
366
+ for pattern in patterns:
367
+ matches = list(dir_path.glob(pattern))
368
+ if matches:
369
+ # Check the most recent artifact for pass/fail
370
+ latest = sorted(matches)[-1]
371
+ try:
372
+ with open(latest) as f:
373
+ data = yaml.safe_load(f)
374
+ if isinstance(data, dict):
375
+ verdict = data.get("verdict", data.get("status", data.get("result", "")))
376
+ if isinstance(verdict, str) and verdict.upper() in ("PASS", "PASSED"):
377
+ return "PASS"
378
+ elif isinstance(verdict, str) and verdict.upper() in ("FAIL", "FAILED"):
379
+ return "FAIL"
380
+ # Artifact exists but no clear verdict — count as PASS
381
+ return "PASS"
382
+ except (yaml.YAMLError, OSError):
383
+ pass
384
+
385
+ return "NOT_RUN"
386
+
387
+
388
+ # --- Report Formatting ---
389
+
390
+
391
+ def format_registry_list(models: list[dict]) -> str:
392
+ """Format model list as markdown table."""
393
+ if not models:
394
+ return "No models registered. Use `python scripts/model_lifecycle.py register <exp-id> --version <v>` to register."
395
+
396
+ lines = ["# Model Registry", ""]
397
+ lines.append("| Stage | Exp ID | Version | Metric | Registered |")
398
+ lines.append("|-------|--------|---------|--------|------------|")
399
+
400
+ for m in models:
401
+ metric = f"{m['metric']:.4f}" if m.get("metric") is not None else "—"
402
+ registered = m.get("registered_at", "")[:10]
403
+ lines.append(f"| {m['stage']} | {m['exp_id']} | {m.get('version', '—')} | {metric} | {registered} |")
404
+
405
+ return "\n".join(lines)
406
+
407
+
408
+ def format_history(history: list[dict]) -> str:
409
+ """Format promotion history."""
410
+ if not history:
411
+ return "No history entries."
412
+
413
+ lines = ["# Model Lifecycle History", ""]
414
+ lines.append("| Time | Action | Exp ID | Details |")
415
+ lines.append("|------|--------|--------|---------|")
416
+
417
+ for h in history:
418
+ ts = h.get("timestamp", "")[:19]
419
+ action = h.get("action", "?")
420
+ exp_id = h.get("exp_id", "?")
421
+
422
+ if action == "promote":
423
+ detail = f"{h.get('from_stage')} → {h.get('to_stage')}"
424
+ if h.get("forced"):
425
+ detail += " (forced)"
426
+ elif action == "demote":
427
+ detail = f"{h.get('from_stage')} → {h.get('to_stage')}: {h.get('reason', '')}"
428
+ elif action == "archive":
429
+ detail = f"archived from {h.get('from_stage')}"
430
+ elif action == "register":
431
+ detail = f"registered as {h.get('stage')} ({h.get('version', '?')})"
432
+ else:
433
+ detail = ""
434
+
435
+ lines.append(f"| {ts} | {action} | {exp_id} | {detail} |")
436
+
437
+ return "\n".join(lines)
438
+
439
+
440
+ # --- CLI ---
441
+
442
+
443
+ def main():
444
+ parser = argparse.ArgumentParser(
445
+ description="Model registry — track, promote, and govern model lifecycle"
446
+ )
447
+ parser.add_argument("action", nargs="?", choices=["list", "register", "promote", "demote", "archive", "history"],
448
+ help="Registry action")
449
+ parser.add_argument("exp_id", nargs="?", help="Experiment ID")
450
+ parser.add_argument("target", nargs="?", help="Target stage (for promote/demote)")
451
+ parser.add_argument("--version", help="Model version label")
452
+ parser.add_argument("--reason", default="", help="Reason for demotion/archiving")
453
+ parser.add_argument("--force", action="store_true", help="Skip gate checks")
454
+ parser.add_argument("--stage", help="Filter by stage (for list)")
455
+ parser.add_argument("--registry", default=DEFAULT_REGISTRY_PATH, help="Registry file path")
456
+ parser.add_argument("--config", default="config.yaml", help="Path to config.yaml")
457
+ parser.add_argument("--log", default=DEFAULT_LOG_PATH, help="Path to experiment log")
458
+ parser.add_argument("--json", action="store_true", help="Output raw JSON")
459
+
460
+ args = parser.parse_args()
461
+
462
+ if not args.action:
463
+ parser.error("Please provide an action: list, register, promote, demote, archive, history")
464
+
465
+ registry = load_registry(args.registry)
466
+
467
+ if args.action == "list":
468
+ models = list_models(registry, args.stage)
469
+ if args.json:
470
+ print(json.dumps(models, indent=2))
471
+ else:
472
+ print(format_registry_list(models))
473
+
474
+ elif args.action == "register":
475
+ if not args.exp_id:
476
+ parser.error("register requires an experiment ID")
477
+ version = args.version or "v1"
478
+
479
+ # Look up metric from experiment log
480
+ config = load_config(args.config)
481
+ metric_name = config.get("evaluation", {}).get("primary_metric", "accuracy")
482
+ experiments = load_experiments(args.log)
483
+ metric = None
484
+ for exp in experiments:
485
+ if exp.get("experiment_id") == args.exp_id:
486
+ metric = exp.get("metrics", {}).get(metric_name)
487
+ break
488
+
489
+ result = register_model(registry, args.exp_id, version, metric, metric_name)
490
+ if "error" in result:
491
+ print(f"ERROR: {result['error']}")
492
+ sys.exit(1)
493
+ save_registry(result, args.registry)
494
+ print(f"Registered {args.exp_id} as {version} (candidate)")
495
+
496
+ elif args.action == "promote":
497
+ if not args.exp_id or not args.target:
498
+ parser.error("promote requires <exp-id> <target-stage>")
499
+
500
+ # Check gates
501
+ gate_results = None
502
+ if not args.force:
503
+ model = _find_model(registry, args.exp_id)
504
+ if model:
505
+ gate_key = f"{model['stage']}_to_{args.target}"
506
+ required = PROMOTION_GATES.get(gate_key, [])
507
+ if required:
508
+ gate_results = check_gates(args.exp_id, required)
509
+
510
+ result = promote_model(registry, args.exp_id, args.target, gate_results, args.force)
511
+ if "error" in result:
512
+ print(f"ERROR: {result['error']}")
513
+ if "failed_gates" in result:
514
+ for g in result["failed_gates"]:
515
+ print(f" {g['gate']}: {g['result']}")
516
+ sys.exit(1)
517
+ save_registry(result, args.registry)
518
+ print(f"Promoted {args.exp_id} → {args.target}")
519
+
520
+ elif args.action == "demote":
521
+ if not args.exp_id or not args.target:
522
+ parser.error("demote requires <exp-id> <target-stage>")
523
+ result = demote_model(registry, args.exp_id, args.target, args.reason)
524
+ if "error" in result:
525
+ print(f"ERROR: {result['error']}")
526
+ sys.exit(1)
527
+ save_registry(result, args.registry)
528
+ print(f"Demoted {args.exp_id} → {args.target}")
529
+
530
+ elif args.action == "archive":
531
+ if not args.exp_id:
532
+ parser.error("archive requires an experiment ID")
533
+ result = archive_model(registry, args.exp_id, args.reason)
534
+ if "error" in result:
535
+ print(f"ERROR: {result['error']}")
536
+ sys.exit(1)
537
+ save_registry(result, args.registry)
538
+ print(f"Archived {args.exp_id}")
539
+
540
+ elif args.action == "history":
541
+ history = get_history(registry, args.exp_id)
542
+ if args.json:
543
+ print(json.dumps(history, indent=2))
544
+ else:
545
+ print(format_history(history))
546
+
547
+
548
+ if __name__ == "__main__":
549
+ main()