claude-turing 2.0.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,421 @@
1
+ #!/usr/bin/env python3
2
+ """Literature integration for ML experiments.
3
+
4
+ Targeted literature search scoped to the current experiment's domain.
5
+ Three modes: free query, baseline SOTA comparison, related papers.
6
+
7
+ Uses Semantic Scholar API (free, no key required for basic search)
8
+ with fallback to local-only mode when offline.
9
+
10
+ Usage:
11
+ python scripts/literature_search.py "gradient boosting tabular" # Free query
12
+ python scripts/literature_search.py --baseline # SOTA comparison
13
+ python scripts/literature_search.py --related exp-042 # Related papers
14
+ python scripts/literature_search.py --auto-queue "query" # Queue hypotheses
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import argparse
20
+ import json
21
+ import sys
22
+ import urllib.error
23
+ import urllib.parse
24
+ import urllib.request
25
+ from datetime import datetime, timezone
26
+ from pathlib import Path
27
+
28
+ import yaml
29
+
30
+ from scripts.turing_io import load_config, load_experiments
31
+
32
+
33
+ SEMANTIC_SCHOLAR_API = "https://api.semanticscholar.org/graph/v1"
34
+ DEFAULT_RESULT_COUNT = 5
35
+ REQUEST_TIMEOUT = 15
36
+
37
+
38
+ def search_semantic_scholar(
39
+ query: str,
40
+ limit: int = DEFAULT_RESULT_COUNT,
41
+ fields: str = "title,authors,year,venue,abstract,citationCount,externalIds",
42
+ ) -> list[dict]:
43
+ """Search Semantic Scholar for papers matching a query.
44
+
45
+ Returns list of paper dicts with title, authors, year, venue,
46
+ abstract, citation_count, and URLs.
47
+ """
48
+ params = urllib.parse.urlencode({
49
+ "query": query,
50
+ "limit": limit,
51
+ "fields": fields,
52
+ })
53
+ url = f"{SEMANTIC_SCHOLAR_API}/paper/search?{params}"
54
+
55
+ try:
56
+ req = urllib.request.Request(url, headers={"User-Agent": "turing-ml/2.0"})
57
+ with urllib.request.urlopen(req, timeout=REQUEST_TIMEOUT) as resp:
58
+ data = json.loads(resp.read().decode())
59
+ except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError, json.JSONDecodeError) as e:
60
+ return [{"error": f"Semantic Scholar API failed: {e}"}]
61
+
62
+ papers = []
63
+ for item in data.get("data", []):
64
+ authors = [a.get("name", "?") for a in (item.get("authors") or [])]
65
+ ext_ids = item.get("externalIds") or {}
66
+
67
+ paper = {
68
+ "title": item.get("title", "Untitled"),
69
+ "authors": authors[:5],
70
+ "year": item.get("year"),
71
+ "venue": item.get("venue") or "N/A",
72
+ "abstract": (item.get("abstract") or "")[:300],
73
+ "citation_count": item.get("citationCount", 0),
74
+ "paper_id": item.get("paperId"),
75
+ "doi": ext_ids.get("DOI"),
76
+ "arxiv_id": ext_ids.get("ArXiv"),
77
+ "url": f"https://www.semanticscholar.org/paper/{item.get('paperId', '')}" if item.get("paperId") else None,
78
+ }
79
+ papers.append(paper)
80
+
81
+ return papers
82
+
83
+
84
+ def build_query_from_config(config: dict) -> str:
85
+ """Build a search query from project config."""
86
+ parts = []
87
+
88
+ task_desc = config.get("task_description", "")
89
+ if task_desc:
90
+ parts.append(task_desc)
91
+
92
+ model_type = config.get("model", {}).get("type", "")
93
+ if model_type:
94
+ parts.append(model_type)
95
+
96
+ primary_metric = config.get("evaluation", {}).get("primary_metric", "")
97
+ if primary_metric:
98
+ parts.append(primary_metric)
99
+
100
+ data_source = config.get("data", {}).get("source", "")
101
+ if data_source and not data_source.startswith("{"):
102
+ parts.append(data_source)
103
+
104
+ return " ".join(parts) if parts else "machine learning"
105
+
106
+
107
+ def build_query_from_experiment(experiment: dict) -> str:
108
+ """Build a search query from experiment metadata."""
109
+ parts = []
110
+
111
+ model_type = experiment.get("config", {}).get("model_type", "")
112
+ if model_type:
113
+ parts.append(model_type)
114
+
115
+ description = experiment.get("description", "")
116
+ if description:
117
+ parts.append(description[:100])
118
+
119
+ return " ".join(parts) if parts else "machine learning experiment"
120
+
121
+
122
+ def search_baseline(
123
+ config: dict,
124
+ experiments: list[dict],
125
+ primary_metric: str,
126
+ lower_is_better: bool,
127
+ ) -> dict:
128
+ """Search for SOTA baselines and compare against current best.
129
+
130
+ Returns dict with SOTA results and gap analysis.
131
+ """
132
+ query = build_query_from_config(config)
133
+ query += " state of the art benchmark"
134
+
135
+ papers = search_semantic_scholar(query, limit=10)
136
+ if papers and "error" in papers[0]:
137
+ return {"error": papers[0]["error"], "query": query}
138
+
139
+ # Find current best
140
+ best = None
141
+ best_val = float("inf") if lower_is_better else float("-inf")
142
+ for exp in experiments:
143
+ if exp.get("status") != "kept":
144
+ continue
145
+ val = exp.get("metrics", {}).get(primary_metric)
146
+ if val is None:
147
+ continue
148
+ if (lower_is_better and val < best_val) or (not lower_is_better and val > best_val):
149
+ best_val = val
150
+ best = exp
151
+
152
+ result = {
153
+ "query": query,
154
+ "papers": papers,
155
+ "current_best": {
156
+ "experiment_id": best.get("experiment_id") if best else None,
157
+ "metric": primary_metric,
158
+ "value": round(best_val, 4) if best else None,
159
+ },
160
+ }
161
+
162
+ return result
163
+
164
+
165
+ def search_related(
166
+ experiment: dict,
167
+ limit: int = DEFAULT_RESULT_COUNT,
168
+ ) -> dict:
169
+ """Find papers related to a specific experiment."""
170
+ query = build_query_from_experiment(experiment)
171
+ papers = search_semantic_scholar(query, limit=limit)
172
+
173
+ return {
174
+ "experiment_id": experiment.get("experiment_id", "?"),
175
+ "query": query,
176
+ "papers": papers,
177
+ }
178
+
179
+
180
+ def generate_literature_hypotheses(papers: list[dict]) -> list[dict]:
181
+ """Generate hypotheses from literature findings.
182
+
183
+ Extracts methodological suggestions from paper titles/abstracts.
184
+ """
185
+ hypotheses = []
186
+ for i, paper in enumerate(papers):
187
+ if "error" in paper:
188
+ continue
189
+ title = paper.get("title", "")
190
+ if not title:
191
+ continue
192
+
193
+ hypotheses.append({
194
+ "id": f"hyp-lit-{i+1:03d}",
195
+ "description": f"Investigate approach from: {title}",
196
+ "source": "literature",
197
+ "status": "queued",
198
+ "priority": "normal",
199
+ "rationale": f"Paper: {title} ({paper.get('year', '?')}, {paper.get('citation_count', 0)} citations)",
200
+ "paper_url": paper.get("url"),
201
+ "created_at": datetime.now(timezone.utc).isoformat(),
202
+ })
203
+
204
+ return hypotheses[:5]
205
+
206
+
207
+ def save_literature_results(results: dict, output_dir: str = "experiments/literature") -> Path:
208
+ """Save literature search results to markdown file."""
209
+ out_path = Path(output_dir)
210
+ out_path.mkdir(parents=True, exist_ok=True)
211
+
212
+ timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d-%H%M%S")
213
+ filepath = out_path / f"query-{timestamp}.md"
214
+
215
+ with open(filepath, "w") as f:
216
+ f.write(format_literature_report(results))
217
+
218
+ return filepath
219
+
220
+
221
+ def format_literature_report(results: dict) -> str:
222
+ """Format literature search results as markdown."""
223
+ if "error" in results:
224
+ return f"ERROR: {results['error']}"
225
+
226
+ mode = results.get("mode", "query")
227
+ query = results.get("query", "")
228
+ papers = results.get("papers", [])
229
+
230
+ lines = [
231
+ f"# Literature Search",
232
+ "",
233
+ f"*Query: {query}*",
234
+ f"*Mode: {mode}*",
235
+ "",
236
+ ]
237
+
238
+ if not papers:
239
+ lines.append("No papers found.")
240
+ return "\n".join(lines)
241
+
242
+ if any("error" in p for p in papers):
243
+ error_paper = next(p for p in papers if "error" in p)
244
+ lines.append(f"**API Error:** {error_paper['error']}")
245
+ lines.append("")
246
+ lines.append("*Search may be offline. Try again later or use a manual search.*")
247
+ return "\n".join(lines)
248
+
249
+ # Papers table
250
+ lines.extend([
251
+ "## Results",
252
+ "",
253
+ "| # | Title | Year | Venue | Citations |",
254
+ "|---|-------|------|-------|-----------|",
255
+ ])
256
+
257
+ for i, paper in enumerate(papers, 1):
258
+ title = paper.get("title", "Untitled")
259
+ year = paper.get("year", "?")
260
+ venue = paper.get("venue", "N/A")
261
+ cites = paper.get("citation_count", 0)
262
+ lines.append(f"| {i} | {title} | {year} | {venue} | {cites} |")
263
+
264
+ # Paper details
265
+ lines.extend(["", "## Details", ""])
266
+ for i, paper in enumerate(papers, 1):
267
+ title = paper.get("title", "Untitled")
268
+ authors = ", ".join(paper.get("authors", [])[:3])
269
+ if len(paper.get("authors", [])) > 3:
270
+ authors += " et al."
271
+ abstract = paper.get("abstract", "")
272
+ url = paper.get("url", "")
273
+
274
+ lines.append(f"### {i}. {title}")
275
+ lines.append("")
276
+ lines.append(f"**Authors:** {authors}")
277
+ lines.append(f"**Year:** {paper.get('year', '?')} | **Venue:** {paper.get('venue', 'N/A')} | **Citations:** {paper.get('citation_count', 0)}")
278
+ if url:
279
+ lines.append(f"**URL:** {url}")
280
+ if abstract:
281
+ lines.append(f"**Abstract:** {abstract}...")
282
+ lines.append("")
283
+
284
+ # Baseline comparison
285
+ baseline = results.get("current_best")
286
+ if baseline and baseline.get("value") is not None:
287
+ lines.extend([
288
+ "## Current Performance",
289
+ "",
290
+ f"- **Best experiment:** {baseline.get('experiment_id', '?')}",
291
+ f"- **{baseline['metric']}:** {baseline['value']:.4f}",
292
+ "",
293
+ "*Compare against reported baselines in the papers above.*",
294
+ ])
295
+
296
+ return "\n".join(lines)
297
+
298
+
299
+ def queue_literature_hypotheses(hypotheses: list[dict], queue_path: str = "hypotheses.yaml") -> int:
300
+ """Append literature hypotheses to the queue."""
301
+ path = Path(queue_path)
302
+ existing = []
303
+ if path.exists() and path.stat().st_size > 0:
304
+ with open(path) as f:
305
+ data = yaml.safe_load(f)
306
+ if isinstance(data, list):
307
+ existing = data
308
+
309
+ existing_ids = {h.get("id") for h in existing}
310
+ new = [h for h in hypotheses if h["id"] not in existing_ids]
311
+
312
+ if new:
313
+ existing.extend(new)
314
+ with open(path, "w") as f:
315
+ yaml.dump(existing, f, default_flow_style=False, sort_keys=False)
316
+
317
+ return len(new)
318
+
319
+
320
+ def run_literature_search(
321
+ query: str | None = None,
322
+ baseline: bool = False,
323
+ related_exp_id: str | None = None,
324
+ auto_queue: bool = False,
325
+ config_path: str = "config.yaml",
326
+ log_path: str = "experiments/log.jsonl",
327
+ limit: int = DEFAULT_RESULT_COUNT,
328
+ ) -> dict:
329
+ """Run a literature search in the appropriate mode.
330
+
331
+ Args:
332
+ query: Free-text search query.
333
+ baseline: If True, search for SOTA baselines.
334
+ related_exp_id: If set, find papers related to this experiment.
335
+ auto_queue: Auto-queue hypotheses from findings.
336
+ config_path: Path to config.yaml.
337
+ log_path: Path to experiment log.
338
+ limit: Maximum number of results.
339
+
340
+ Returns:
341
+ Literature search result dict.
342
+ """
343
+ config = load_config(config_path)
344
+ eval_cfg = config.get("evaluation", {})
345
+ primary_metric = eval_cfg.get("primary_metric", "accuracy")
346
+ lower_is_better = eval_cfg.get("lower_is_better", False)
347
+ experiments = load_experiments(log_path)
348
+
349
+ if baseline:
350
+ result = search_baseline(config, experiments, primary_metric, lower_is_better)
351
+ result["mode"] = "baseline"
352
+ elif related_exp_id:
353
+ target = None
354
+ for exp in experiments:
355
+ if exp.get("experiment_id") == related_exp_id:
356
+ target = exp
357
+ break
358
+ if not target:
359
+ return {"error": f"Experiment {related_exp_id} not found", "mode": "related"}
360
+ result = search_related(target, limit=limit)
361
+ result["mode"] = "related"
362
+ elif query:
363
+ papers = search_semantic_scholar(query, limit=limit)
364
+ result = {"query": query, "papers": papers, "mode": "query"}
365
+ else:
366
+ # Default: search based on config
367
+ query = build_query_from_config(config)
368
+ papers = search_semantic_scholar(query, limit=limit)
369
+ result = {"query": query, "papers": papers, "mode": "query"}
370
+
371
+ result["timestamp"] = datetime.now(timezone.utc).isoformat()
372
+
373
+ # Generate and optionally queue hypotheses
374
+ papers = result.get("papers", [])
375
+ if papers and not any("error" in p for p in papers):
376
+ hypotheses = generate_literature_hypotheses(papers)
377
+ result["hypotheses"] = hypotheses
378
+
379
+ if auto_queue and hypotheses:
380
+ n_added = queue_literature_hypotheses(hypotheses)
381
+ result["hypotheses_queued"] = n_added
382
+ print(f"Queued {n_added} hypotheses from literature", file=sys.stderr)
383
+
384
+ return result
385
+
386
+
387
+ def main() -> None:
388
+ """CLI entry point."""
389
+ parser = argparse.ArgumentParser(description="Literature search for ML experiments")
390
+ parser.add_argument("query", nargs="?", default=None, help="Free-text search query")
391
+ parser.add_argument("--baseline", action="store_true", help="Search for SOTA baselines")
392
+ parser.add_argument("--related", default=None, metavar="EXP_ID", help="Find related papers for experiment")
393
+ parser.add_argument("--auto-queue", action="store_true", help="Auto-queue hypotheses from findings")
394
+ parser.add_argument("--config", default="config.yaml", help="Path to config.yaml")
395
+ parser.add_argument("--log", default="experiments/log.jsonl", help="Path to experiment log")
396
+ parser.add_argument("--limit", type=int, default=DEFAULT_RESULT_COUNT, help="Max results")
397
+ parser.add_argument("--json", action="store_true", help="Output raw JSON")
398
+ args = parser.parse_args()
399
+
400
+ result = run_literature_search(
401
+ query=args.query,
402
+ baseline=args.baseline,
403
+ related_exp_id=args.related,
404
+ auto_queue=args.auto_queue,
405
+ config_path=args.config,
406
+ log_path=args.log,
407
+ limit=args.limit,
408
+ )
409
+
410
+ if "error" not in result:
411
+ filepath = save_literature_results(result)
412
+ print(f"Saved to {filepath}", file=sys.stderr)
413
+
414
+ if args.json:
415
+ print(json.dumps(result, indent=2, default=str))
416
+ else:
417
+ print(format_literature_report(result))
418
+
419
+
420
+ if __name__ == "__main__":
421
+ main()
@@ -102,6 +102,11 @@ TEMPLATE_DIRS = {
102
102
  "equivalence_checker.py",
103
103
  "latency_benchmark.py",
104
104
  "export_card.py",
105
+ "literature_search.py",
106
+ "draft_paper_sections.py",
107
+ "experiment_queue.py",
108
+ "smart_retry.py",
109
+ "fork_experiment.py",
105
110
  ],
106
111
  "tests": ["__init__.py", "conftest.py"],
107
112
  }
@@ -118,6 +123,11 @@ DIRECTORIES_TO_CREATE = [
118
123
  "experiments/profiles",
119
124
  "experiments/checkpoints",
120
125
  "exports",
126
+ "experiments/literature",
127
+ "paper/sections",
128
+ "experiments/retries",
129
+ "experiments/forks",
130
+ "experiments/logs",
121
131
  "models/best",
122
132
  "models/archive",
123
133
  ]