claude-turing 2.5.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,618 @@
1
+ #!/usr/bin/env python3
2
+ """Cross-project knowledge transfer for the autoresearch pipeline.
3
+
4
+ Scans prior Turing projects for similar task characteristics and surfaces
5
+ what worked. Builds institutional ML memory across projects — the system
6
+ remembers what the researcher would otherwise rediscover.
7
+
8
+ Usage:
9
+ python scripts/knowledge_transfer.py
10
+ python scripts/knowledge_transfer.py --from ~/projects/fraud-detection
11
+ python scripts/knowledge_transfer.py --auto
12
+ python scripts/knowledge_transfer.py --json
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import argparse
18
+ import hashlib
19
+ import json
20
+ import os
21
+ import sys
22
+ from datetime import datetime, timezone
23
+ from pathlib import Path
24
+
25
+ import yaml
26
+
27
+ from scripts.turing_io import load_config, load_experiments
28
+
29
+ DEFAULT_LOG_PATH = "experiments/log.jsonl"
30
+ DEFAULT_INDEX_PATH = os.path.expanduser("~/.turing/project_index.yaml")
31
+ SCAN_DEPTH = 4 # Max directory depth to search for projects
32
+
33
+
34
+ # --- Project Signature ---
35
+
36
+
37
+ def extract_project_signature(
38
+ config_path: str = "config.yaml",
39
+ log_path: str = DEFAULT_LOG_PATH,
40
+ ) -> dict:
41
+ """Extract a project signature from config and experiment history.
42
+
43
+ The signature captures task type, dataset characteristics, best model,
44
+ and key insights — enough to match against other projects.
45
+
46
+ Returns:
47
+ Project signature dict.
48
+ """
49
+ config = load_config(config_path)
50
+ experiments = load_experiments(log_path)
51
+
52
+ eval_cfg = config.get("evaluation", {})
53
+ primary_metric = eval_cfg.get("primary_metric", "accuracy")
54
+ lower_is_better = eval_cfg.get("lower_is_better", False)
55
+ task_type = config.get("task", {}).get("type", "classification")
56
+
57
+ # Dataset characteristics
58
+ dataset = config.get("dataset", config.get("data", {}))
59
+ dataset_sig = {
60
+ "task_type": task_type,
61
+ "n_samples": dataset.get("n_samples", dataset.get("size")),
62
+ "n_features": dataset.get("n_features", dataset.get("dimensionality")),
63
+ "class_balance": dataset.get("class_balance"),
64
+ "feature_types": dataset.get("feature_types", "mixed"),
65
+ }
66
+
67
+ # Best experiment
68
+ kept = [e for e in experiments if e.get("status") == "kept"]
69
+ best = None
70
+ if kept:
71
+ if lower_is_better:
72
+ best = min(kept, key=lambda e: e.get("metrics", {}).get(primary_metric, float("inf")))
73
+ else:
74
+ best = max(kept, key=lambda e: e.get("metrics", {}).get(primary_metric, float("-inf")))
75
+
76
+ best_sig = None
77
+ if best:
78
+ best_sig = {
79
+ "experiment_id": best.get("experiment_id"),
80
+ "model_type": best.get("config", {}).get("model_type", "unknown"),
81
+ "primary_metric": primary_metric,
82
+ "metric_value": best.get("metrics", {}).get(primary_metric),
83
+ "hyperparams": best.get("config", {}).get("hyperparams", {}),
84
+ }
85
+
86
+ # What worked and what didn't
87
+ model_stats = {}
88
+ for exp in experiments:
89
+ mt = exp.get("config", {}).get("model_type", "unknown")
90
+ if mt not in model_stats:
91
+ model_stats[mt] = {"kept": 0, "discarded": 0, "total": 0}
92
+ model_stats[mt]["total"] += 1
93
+ if exp.get("status") == "kept":
94
+ model_stats[mt]["kept"] += 1
95
+ elif exp.get("status") == "discarded":
96
+ model_stats[mt]["discarded"] += 1
97
+
98
+ # Key insights (from experiment patterns)
99
+ insights = _extract_insights(experiments, model_stats, primary_metric)
100
+
101
+ return {
102
+ "extracted_at": datetime.now(timezone.utc).isoformat(),
103
+ "primary_metric": primary_metric,
104
+ "lower_is_better": lower_is_better,
105
+ "dataset": dataset_sig,
106
+ "best_experiment": best_sig,
107
+ "model_stats": model_stats,
108
+ "total_experiments": len(experiments),
109
+ "kept_experiments": len(kept),
110
+ "insights": insights,
111
+ }
112
+
113
+
114
+ def _extract_insights(
115
+ experiments: list[dict],
116
+ model_stats: dict,
117
+ primary_metric: str,
118
+ ) -> list[str]:
119
+ """Extract key insights from experiment history."""
120
+ insights = []
121
+
122
+ # Best model family
123
+ best_family = None
124
+ best_rate = 0
125
+ for mt, stats in model_stats.items():
126
+ if stats["total"] >= 2:
127
+ rate = stats["kept"] / stats["total"]
128
+ if rate > best_rate:
129
+ best_rate = rate
130
+ best_family = mt
131
+ if best_family:
132
+ insights.append(f"{best_family} had highest keep rate ({best_rate:.0%})")
133
+
134
+ # Worst model family
135
+ worst_family = None
136
+ worst_rate = 1.0
137
+ for mt, stats in model_stats.items():
138
+ if stats["total"] >= 2:
139
+ rate = stats["kept"] / stats["total"]
140
+ if rate < worst_rate:
141
+ worst_rate = rate
142
+ worst_family = mt
143
+ if worst_family and worst_family != best_family:
144
+ insights.append(f"{worst_family} had lowest keep rate ({worst_rate:.0%})")
145
+
146
+ # Experiment count
147
+ if len(experiments) > 20:
148
+ insights.append(f"Extensive search ({len(experiments)} experiments)")
149
+ elif len(experiments) < 5:
150
+ insights.append(f"Limited exploration ({len(experiments)} experiments)")
151
+
152
+ return insights
153
+
154
+
155
+ # --- Project Similarity ---
156
+
157
+
158
+ def compute_similarity(sig_a: dict, sig_b: dict) -> float:
159
+ """Compute similarity between two project signatures.
160
+
161
+ Uses a weighted combination of task type match, dataset similarity,
162
+ and feature type overlap.
163
+
164
+ Returns:
165
+ Similarity score in [0, 1].
166
+ """
167
+ scores = []
168
+ weights = []
169
+
170
+ # Task type (exact match)
171
+ ds_a = sig_a.get("dataset", {})
172
+ ds_b = sig_b.get("dataset", {})
173
+ task_a = ds_a.get("task_type")
174
+ task_b = ds_b.get("task_type")
175
+ task_match = 1.0 if (task_a and task_b and task_a == task_b) else 0.0
176
+ scores.append(task_match)
177
+ weights.append(3.0) # High weight
178
+
179
+ # Feature types
180
+ ft_a = ds_a.get("feature_types")
181
+ ft_b = ds_b.get("feature_types")
182
+ if ft_a and ft_b:
183
+ ft_match = 1.0 if ft_a == ft_b else 0.3
184
+ else:
185
+ ft_match = 0.0
186
+ scores.append(ft_match)
187
+ weights.append(1.0)
188
+
189
+ # Dataset size similarity (log scale)
190
+ n_a = ds_a.get("n_samples")
191
+ n_b = ds_b.get("n_samples")
192
+ if n_a and n_b and n_a > 0 and n_b > 0:
193
+ import math
194
+ log_ratio = abs(math.log10(n_a) - math.log10(n_b))
195
+ size_sim = max(0, 1 - log_ratio / 3) # 1000x difference = 0
196
+ scores.append(size_sim)
197
+ weights.append(1.0)
198
+
199
+ # Dimensionality similarity
200
+ d_a = ds_a.get("n_features")
201
+ d_b = ds_b.get("n_features")
202
+ if d_a and d_b and d_a > 0 and d_b > 0:
203
+ import math
204
+ log_ratio = abs(math.log10(d_a) - math.log10(d_b))
205
+ dim_sim = max(0, 1 - log_ratio / 2)
206
+ scores.append(dim_sim)
207
+ weights.append(0.5)
208
+
209
+ # Class balance similarity
210
+ bal_a = ds_a.get("class_balance")
211
+ bal_b = ds_b.get("class_balance")
212
+ if bal_a and bal_b:
213
+ if bal_a == bal_b:
214
+ scores.append(1.0)
215
+ elif isinstance(bal_a, str) and isinstance(bal_b, str):
216
+ # Both "balanced" or "imbalanced"
217
+ scores.append(0.5)
218
+ else:
219
+ scores.append(0.0)
220
+ weights.append(1.0)
221
+
222
+ if not weights:
223
+ return 0.0
224
+
225
+ weighted_sum = sum(s * w for s, w in zip(scores, weights))
226
+ return round(weighted_sum / sum(weights), 4)
227
+
228
+
229
+ # --- Project Index ---
230
+
231
+
232
+ def load_project_index(index_path: str = DEFAULT_INDEX_PATH) -> list[dict]:
233
+ """Load the cross-project index."""
234
+ path = Path(index_path)
235
+ if not path.exists():
236
+ return []
237
+
238
+ with open(path) as f:
239
+ data = yaml.safe_load(f)
240
+
241
+ return data if isinstance(data, list) else []
242
+
243
+
244
+ def save_project_index(index: list[dict], index_path: str = DEFAULT_INDEX_PATH) -> None:
245
+ """Save the cross-project index."""
246
+ path = Path(index_path)
247
+ path.parent.mkdir(parents=True, exist_ok=True)
248
+
249
+ with open(path, "w") as f:
250
+ yaml.dump(index, f, default_flow_style=False, sort_keys=False)
251
+
252
+
253
+ def index_project(
254
+ project_path: str,
255
+ signature: dict,
256
+ index_path: str = DEFAULT_INDEX_PATH,
257
+ ) -> None:
258
+ """Add or update a project in the index."""
259
+ index = load_project_index(index_path)
260
+
261
+ # Remove existing entry for this path
262
+ index = [p for p in index if p.get("path") != project_path]
263
+
264
+ index.append({
265
+ "path": project_path,
266
+ "indexed_at": datetime.now(timezone.utc).isoformat(),
267
+ "signature": signature,
268
+ })
269
+
270
+ save_project_index(index, index_path)
271
+
272
+
273
+ def scan_for_projects(
274
+ search_roots: list[str] | None = None,
275
+ max_depth: int = SCAN_DEPTH,
276
+ ) -> list[str]:
277
+ """Scan for Turing projects on the machine.
278
+
279
+ Looks for directories containing both config.yaml and experiments/log.jsonl.
280
+
281
+ Returns:
282
+ List of project directory paths.
283
+ """
284
+ if search_roots is None:
285
+ home = os.path.expanduser("~")
286
+ search_roots = [
287
+ os.path.join(home, "projects"),
288
+ os.path.join(home, "ml"),
289
+ os.path.join(home, "research"),
290
+ os.getcwd(),
291
+ ]
292
+
293
+ projects = []
294
+ seen = set()
295
+
296
+ for root in search_roots:
297
+ if not os.path.isdir(root):
298
+ continue
299
+ _scan_dir(root, projects, seen, 0, max_depth)
300
+
301
+ return projects
302
+
303
+
304
+ def _scan_dir(
305
+ path: str,
306
+ projects: list[str],
307
+ seen: set[str],
308
+ depth: int,
309
+ max_depth: int,
310
+ ) -> None:
311
+ """Recursively scan for Turing projects."""
312
+ if depth > max_depth:
313
+ return
314
+
315
+ real_path = os.path.realpath(path)
316
+ if real_path in seen:
317
+ return
318
+ seen.add(real_path)
319
+
320
+ config_path = os.path.join(path, "config.yaml")
321
+ log_path = os.path.join(path, "experiments", "log.jsonl")
322
+
323
+ if os.path.isfile(config_path) and os.path.isfile(log_path):
324
+ projects.append(path)
325
+ return # Don't recurse into projects
326
+
327
+ try:
328
+ entries = os.listdir(path)
329
+ except PermissionError:
330
+ return
331
+
332
+ for entry in sorted(entries):
333
+ if entry.startswith(".") or entry in ("node_modules", ".venv", "__pycache__", "venv"):
334
+ continue
335
+ child = os.path.join(path, entry)
336
+ if os.path.isdir(child):
337
+ _scan_dir(child, projects, seen, depth + 1, max_depth)
338
+
339
+
340
+ # --- Transfer Recommendations ---
341
+
342
+
343
+ def generate_recommendations(
344
+ current_sig: dict,
345
+ similar_projects: list[dict],
346
+ top_k: int = 3,
347
+ ) -> list[dict]:
348
+ """Generate transfer recommendations from similar projects.
349
+
350
+ Args:
351
+ current_sig: Current project signature.
352
+ similar_projects: List of {path, similarity, signature} dicts.
353
+ top_k: Number of top recommendations.
354
+
355
+ Returns:
356
+ List of recommendation dicts.
357
+ """
358
+ recommendations = []
359
+
360
+ for proj in similar_projects[:top_k]:
361
+ sig = proj.get("signature", {})
362
+ best = sig.get("best_experiment")
363
+ insights = sig.get("insights", [])
364
+
365
+ rec = {
366
+ "project_path": proj.get("path", "?"),
367
+ "similarity": proj.get("similarity", 0),
368
+ "task_type": sig.get("dataset", {}).get("task_type", "?"),
369
+ "total_experiments": sig.get("total_experiments", 0),
370
+ }
371
+
372
+ if best:
373
+ rec["winner"] = {
374
+ "model_type": best.get("model_type", "?"),
375
+ "metric_value": best.get("metric_value"),
376
+ "metric_name": best.get("primary_metric", "?"),
377
+ }
378
+
379
+ # Generate hypothesis from winner
380
+ model_type = best.get("model_type", "")
381
+ hypothesis = f"Try {model_type}"
382
+ hyperparams = best.get("hyperparams", {})
383
+ key_params = []
384
+ for k in ("max_depth", "n_estimators", "learning_rate", "hidden_size"):
385
+ if k in hyperparams:
386
+ key_params.append(f"{k}={hyperparams[k]}")
387
+ if key_params:
388
+ hypothesis += f" with {', '.join(key_params)}"
389
+ hypothesis += f" (transferred from {os.path.basename(proj.get('path', '?'))})"
390
+ rec["hypothesis"] = hypothesis
391
+
392
+ rec["insights"] = insights
393
+ recommendations.append(rec)
394
+
395
+ return recommendations
396
+
397
+
398
+ # --- Full Pipeline ---
399
+
400
+
401
+ def knowledge_transfer(
402
+ from_path: str | None = None,
403
+ auto_queue: bool = False,
404
+ config_path: str = "config.yaml",
405
+ log_path: str = DEFAULT_LOG_PATH,
406
+ index_path: str = DEFAULT_INDEX_PATH,
407
+ ) -> dict:
408
+ """Run cross-project knowledge transfer.
409
+
410
+ Args:
411
+ from_path: Specific project path to transfer from.
412
+ auto_queue: Auto-queue hypotheses from recommendations.
413
+ config_path: Current project config.
414
+ log_path: Current project log.
415
+ index_path: Cross-project index path.
416
+
417
+ Returns:
418
+ Transfer report dict.
419
+ """
420
+ # Extract current project signature
421
+ current_sig = extract_project_signature(config_path, log_path)
422
+
423
+ # Index current project
424
+ cwd = os.getcwd()
425
+ index_project(cwd, current_sig, index_path)
426
+
427
+ if from_path:
428
+ # Transfer from specific project
429
+ from_config = os.path.join(from_path, "config.yaml")
430
+ from_log = os.path.join(from_path, "experiments", "log.jsonl")
431
+ if not os.path.isfile(from_config):
432
+ return {"error": f"No config.yaml found at {from_path}"}
433
+ if not os.path.isfile(from_log):
434
+ return {"error": f"No experiments/log.jsonl found at {from_path}"}
435
+
436
+ from_sig = extract_project_signature(from_config, from_log)
437
+ similarity = compute_similarity(current_sig, from_sig)
438
+ similar = [{"path": from_path, "similarity": similarity, "signature": from_sig}]
439
+ else:
440
+ # Search index for similar projects
441
+ index = load_project_index(index_path)
442
+ similar = []
443
+ for entry in index:
444
+ if entry.get("path") == cwd:
445
+ continue # Skip self
446
+ sig = entry.get("signature", {})
447
+ sim = compute_similarity(current_sig, sig)
448
+ if sim > 0.3: # Minimum similarity threshold
449
+ similar.append({
450
+ "path": entry["path"],
451
+ "similarity": sim,
452
+ "signature": sig,
453
+ })
454
+
455
+ similar.sort(key=lambda x: x["similarity"], reverse=True)
456
+
457
+ recommendations = generate_recommendations(current_sig, similar)
458
+
459
+ report = {
460
+ "generated_at": datetime.now(timezone.utc).isoformat(),
461
+ "current_project": cwd,
462
+ "current_signature": {
463
+ "task_type": current_sig.get("dataset", {}).get("task_type"),
464
+ "total_experiments": current_sig.get("total_experiments", 0),
465
+ "primary_metric": current_sig.get("primary_metric"),
466
+ },
467
+ "similar_projects_found": len(similar),
468
+ "recommendations": recommendations,
469
+ }
470
+
471
+ if auto_queue and recommendations:
472
+ report["auto_queued"] = [r.get("hypothesis") for r in recommendations if r.get("hypothesis")]
473
+
474
+ return report
475
+
476
+
477
+ # --- Report Formatting ---
478
+
479
+
480
+ def save_transfer_report(report: dict, output_dir: str = "experiments/transfers") -> Path:
481
+ """Save transfer report to YAML."""
482
+ out_path = Path(output_dir)
483
+ out_path.mkdir(parents=True, exist_ok=True)
484
+
485
+ timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
486
+ filepath = out_path / f"transfer-{timestamp}.yaml"
487
+
488
+ with open(filepath, "w") as f:
489
+ yaml.dump(report, f, default_flow_style=False, sort_keys=False)
490
+
491
+ return filepath
492
+
493
+
494
+ def format_transfer_report(report: dict) -> str:
495
+ """Format transfer report as markdown."""
496
+ if "error" in report:
497
+ return f"ERROR: {report['error']}"
498
+
499
+ lines = [
500
+ "# Knowledge Transfer",
501
+ "",
502
+ f"*Generated {report.get('generated_at', 'N/A')[:19]}*",
503
+ "",
504
+ ]
505
+
506
+ n_found = report.get("similar_projects_found", 0)
507
+ if n_found == 0:
508
+ lines.extend([
509
+ "No similar prior projects found.",
510
+ "",
511
+ "Run `/turing:transfer` again after completing more projects,",
512
+ "or specify a project directly with `--from /path/to/project`.",
513
+ ])
514
+ return "\n".join(lines)
515
+
516
+ lines.append(f"**{n_found} similar project(s) found.**")
517
+ lines.append("")
518
+
519
+ for i, rec in enumerate(report.get("recommendations", []), 1):
520
+ sim = rec.get("similarity", 0)
521
+ path = rec.get("project_path", "?")
522
+ task = rec.get("task_type", "?")
523
+ n_exp = rec.get("total_experiments", 0)
524
+
525
+ lines.extend([
526
+ f"## {i}. {os.path.basename(path)} (similarity: {sim:.2f})",
527
+ "",
528
+ f"**Path:** {path}",
529
+ f"**Task:** {task}, {n_exp} experiments",
530
+ ])
531
+
532
+ winner = rec.get("winner")
533
+ if winner:
534
+ val = winner.get("metric_value")
535
+ val_str = f"{val:.4f}" if isinstance(val, float) else str(val)
536
+ lines.append(
537
+ f"**Winner:** {winner.get('model_type', '?')}, "
538
+ f"{winner.get('metric_name', '?')}={val_str}"
539
+ )
540
+
541
+ insights = rec.get("insights", [])
542
+ if insights:
543
+ lines.append("")
544
+ lines.append("**Insights:**")
545
+ for ins in insights:
546
+ lines.append(f"- {ins}")
547
+
548
+ hypothesis = rec.get("hypothesis")
549
+ if hypothesis:
550
+ lines.extend(["", f"**Suggested hypothesis:** {hypothesis}"])
551
+
552
+ lines.append("")
553
+
554
+ # Auto-queued
555
+ queued = report.get("auto_queued", [])
556
+ if queued:
557
+ lines.extend([
558
+ "## Auto-Queued Hypotheses",
559
+ "",
560
+ ])
561
+ for h in queued:
562
+ lines.append(f"- {h}")
563
+ lines.append("")
564
+
565
+ return "\n".join(lines)
566
+
567
+
568
+ def main() -> None:
569
+ """CLI entry point."""
570
+ parser = argparse.ArgumentParser(
571
+ description="Cross-project knowledge transfer",
572
+ )
573
+ parser.add_argument(
574
+ "--from", dest="from_path",
575
+ help="Specific project path to transfer from",
576
+ )
577
+ parser.add_argument(
578
+ "--auto", action="store_true",
579
+ help="Auto-queue hypotheses from transfer recommendations",
580
+ )
581
+ parser.add_argument(
582
+ "--config", default="config.yaml",
583
+ help="Path to config.yaml",
584
+ )
585
+ parser.add_argument(
586
+ "--log", default=DEFAULT_LOG_PATH,
587
+ help="Path to experiment log",
588
+ )
589
+ parser.add_argument(
590
+ "--index", default=DEFAULT_INDEX_PATH,
591
+ help=f"Path to project index (default: {DEFAULT_INDEX_PATH})",
592
+ )
593
+ parser.add_argument(
594
+ "--json", action="store_true",
595
+ help="Output raw JSON instead of formatted report",
596
+ )
597
+ args = parser.parse_args()
598
+
599
+ report = knowledge_transfer(
600
+ from_path=args.from_path,
601
+ auto_queue=args.auto,
602
+ config_path=args.config,
603
+ log_path=args.log,
604
+ index_path=args.index,
605
+ )
606
+
607
+ if "error" not in report:
608
+ filepath = save_transfer_report(report)
609
+ print(f"Saved to {filepath}", file=sys.stderr)
610
+
611
+ if args.json:
612
+ print(json.dumps(report, indent=2, default=str))
613
+ else:
614
+ print(format_transfer_report(report))
615
+
616
+
617
+ if __name__ == "__main__":
618
+ main()