claude-turing 3.4.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/.claude-plugin/plugin.json +2 -2
  2. package/README.md +12 -2
  3. package/commands/annotate.md +23 -0
  4. package/commands/archive.md +23 -0
  5. package/commands/changelog.md +22 -0
  6. package/commands/cite.md +23 -0
  7. package/commands/flashback.md +22 -0
  8. package/commands/present.md +23 -0
  9. package/commands/replay.md +23 -0
  10. package/commands/search.md +22 -0
  11. package/commands/template.md +22 -0
  12. package/commands/trend.md +21 -0
  13. package/commands/turing.md +20 -0
  14. package/package.json +1 -1
  15. package/src/install.js +2 -0
  16. package/src/verify.js +10 -0
  17. package/templates/scripts/__pycache__/citation_manager.cpython-314.pyc +0 -0
  18. package/templates/scripts/__pycache__/experiment_annotations.cpython-314.pyc +0 -0
  19. package/templates/scripts/__pycache__/experiment_archive.cpython-314.pyc +0 -0
  20. package/templates/scripts/__pycache__/experiment_replay.cpython-314.pyc +0 -0
  21. package/templates/scripts/__pycache__/experiment_search.cpython-314.pyc +0 -0
  22. package/templates/scripts/__pycache__/experiment_templates.cpython-314.pyc +0 -0
  23. package/templates/scripts/__pycache__/generate_changelog.cpython-314.pyc +0 -0
  24. package/templates/scripts/__pycache__/generate_figures.cpython-314.pyc +0 -0
  25. package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
  26. package/templates/scripts/__pycache__/session_flashback.cpython-314.pyc +0 -0
  27. package/templates/scripts/__pycache__/trend_analysis.cpython-314.pyc +0 -0
  28. package/templates/scripts/citation_manager.py +436 -0
  29. package/templates/scripts/experiment_annotations.py +392 -0
  30. package/templates/scripts/experiment_archive.py +534 -0
  31. package/templates/scripts/experiment_replay.py +592 -0
  32. package/templates/scripts/experiment_search.py +451 -0
  33. package/templates/scripts/experiment_templates.py +501 -0
  34. package/templates/scripts/generate_changelog.py +464 -0
  35. package/templates/scripts/generate_figures.py +597 -0
  36. package/templates/scripts/scaffold.py +17 -0
  37. package/templates/scripts/session_flashback.py +461 -0
  38. package/templates/scripts/trend_analysis.py +503 -0
@@ -0,0 +1,451 @@
1
+ #!/usr/bin/env python3
2
+ """Natural language experiment search with structured filters.
3
+
4
+ Parse a query like "accuracy > 0.85 random forest last week" into
5
+ semantic keywords + structured filters, then rank experiments by
6
+ relevance. Simple keyword matching — full semantic search requires
7
+ FAISS infrastructure from Phase 9.1.
8
+
9
+ Usage:
10
+ python scripts/experiment_search.py "accuracy > 0.85"
11
+ python scripts/experiment_search.py "random forest status:kept"
12
+ python scripts/experiment_search.py "family:baseline date:2025-01"
13
+ python scripts/experiment_search.py "best model last week" --top 5
14
+ python scripts/experiment_search.py --json
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import argparse
20
+ import json
21
+ import re
22
+ import sys
23
+ from datetime import datetime, timedelta, timezone
24
+ from pathlib import Path
25
+
26
+ import yaml
27
+
28
+ from scripts.turing_io import load_config, load_experiments
29
+
30
+ DEFAULT_LOG_PATH = "experiments/log.jsonl"
31
+
32
+
33
+ # --- Query Parsing ---
34
+
35
+
36
+ def parse_query(query: str) -> dict:
37
+ """Parse a natural language query into structured filters + keywords.
38
+
39
+ Extracts:
40
+ - Metric comparisons: accuracy>0.85, loss<0.5, f1>=0.9
41
+ - Status filters: status:kept, status:discarded
42
+ - Family filters: family:baseline
43
+ - Tag filters: tag:production
44
+ - Date ranges: date:2025-01, date:last-week, date:last-month
45
+ - Remaining text becomes keyword search terms.
46
+
47
+ Returns:
48
+ Dict with 'keywords', 'metric_filters', 'status', 'family',
49
+ 'tags', 'date_range'.
50
+ """
51
+ filters: dict = {
52
+ "keywords": [],
53
+ "metric_filters": [],
54
+ "status": None,
55
+ "family": None,
56
+ "tags": [],
57
+ "date_range": None,
58
+ }
59
+
60
+ tokens = query.split()
61
+ remaining = []
62
+
63
+ for token in tokens:
64
+ # Metric comparisons: accuracy>0.85, loss<=0.5
65
+ metric_match = re.match(r"^(\w+)(>=|<=|>|<|=)([0-9.]+)$", token)
66
+ if metric_match:
67
+ metric, op, val = metric_match.groups()
68
+ try:
69
+ filters["metric_filters"].append({
70
+ "metric": metric,
71
+ "operator": op,
72
+ "value": float(val),
73
+ })
74
+ except ValueError:
75
+ remaining.append(token)
76
+ continue
77
+
78
+ # Key:value filters
79
+ kv_match = re.match(r"^(\w+):(.+)$", token)
80
+ if kv_match:
81
+ key, value = kv_match.groups()
82
+ key_lower = key.lower()
83
+ if key_lower == "status":
84
+ filters["status"] = value
85
+ elif key_lower == "family":
86
+ filters["family"] = value
87
+ elif key_lower == "tag":
88
+ filters["tags"].append(value)
89
+ elif key_lower == "date":
90
+ filters["date_range"] = _parse_date_filter(value)
91
+ else:
92
+ # Treat unknown key:value as metric filter with equality
93
+ try:
94
+ filters["metric_filters"].append({
95
+ "metric": key,
96
+ "operator": "=",
97
+ "value": float(value),
98
+ })
99
+ except ValueError:
100
+ remaining.append(token)
101
+ continue
102
+
103
+ remaining.append(token)
104
+
105
+ # Parse natural language date references from remaining tokens
106
+ remaining_text = " ".join(remaining).lower()
107
+ if not filters["date_range"]:
108
+ if "last week" in remaining_text:
109
+ filters["date_range"] = _parse_date_filter("last-week")
110
+ remaining_text = remaining_text.replace("last week", "").strip()
111
+ elif "last month" in remaining_text:
112
+ filters["date_range"] = _parse_date_filter("last-month")
113
+ remaining_text = remaining_text.replace("last month", "").strip()
114
+ elif "today" in remaining_text:
115
+ filters["date_range"] = _parse_date_filter("today")
116
+ remaining_text = remaining_text.replace("today", "").strip()
117
+
118
+ # Clean up keywords — remove stopwords
119
+ stopwords = {"the", "a", "an", "and", "or", "with", "from", "in", "on",
120
+ "for", "to", "of", "is", "was", "best", "model", "experiment"}
121
+ filters["keywords"] = [
122
+ w for w in remaining_text.split()
123
+ if w and w not in stopwords
124
+ ]
125
+
126
+ return filters
127
+
128
+
129
+ def _parse_date_filter(value: str) -> dict | None:
130
+ """Parse date filter value into a start/end range."""
131
+ now = datetime.now(timezone.utc)
132
+
133
+ if value == "last-week":
134
+ start = now - timedelta(days=7)
135
+ return {"start": start.isoformat(), "end": now.isoformat()}
136
+ elif value == "last-month":
137
+ start = now - timedelta(days=30)
138
+ return {"start": start.isoformat(), "end": now.isoformat()}
139
+ elif value == "today":
140
+ start = now.replace(hour=0, minute=0, second=0, microsecond=0)
141
+ return {"start": start.isoformat(), "end": now.isoformat()}
142
+
143
+ # YYYY-MM format
144
+ ym_match = re.match(r"^(\d{4})-(\d{2})$", value)
145
+ if ym_match:
146
+ year, month = int(ym_match.group(1)), int(ym_match.group(2))
147
+ start = datetime(year, month, 1, tzinfo=timezone.utc)
148
+ if month == 12:
149
+ end = datetime(year + 1, 1, 1, tzinfo=timezone.utc)
150
+ else:
151
+ end = datetime(year, month + 1, 1, tzinfo=timezone.utc)
152
+ return {"start": start.isoformat(), "end": end.isoformat()}
153
+
154
+ # YYYY-MM-DD format
155
+ ymd_match = re.match(r"^(\d{4})-(\d{2})-(\d{2})$", value)
156
+ if ymd_match:
157
+ year, month, day = int(ymd_match.group(1)), int(ymd_match.group(2)), int(ymd_match.group(3))
158
+ start = datetime(year, month, day, tzinfo=timezone.utc)
159
+ end = start + timedelta(days=1)
160
+ return {"start": start.isoformat(), "end": end.isoformat()}
161
+
162
+ return None
163
+
164
+
165
+ # --- Filtering ---
166
+
167
+
168
+ def apply_filters(experiments: list[dict], filters: dict) -> list[dict]:
169
+ """Apply structured filters to experiment list.
170
+
171
+ Args:
172
+ experiments: Raw experiment list from log.
173
+ filters: Parsed filter dict from parse_query.
174
+
175
+ Returns:
176
+ Filtered experiments.
177
+ """
178
+ results = list(experiments)
179
+
180
+ # Status filter
181
+ if filters.get("status"):
182
+ results = [e for e in results
183
+ if e.get("status", "").lower() == filters["status"].lower()]
184
+
185
+ # Family filter
186
+ if filters.get("family"):
187
+ family = filters["family"].lower()
188
+ results = [e for e in results
189
+ if family in (e.get("family", "") or "").lower()]
190
+
191
+ # Date range filter
192
+ date_range = filters.get("date_range")
193
+ if date_range:
194
+ start = date_range.get("start", "")
195
+ end = date_range.get("end", "")
196
+ results = [e for e in results
197
+ if start <= e.get("timestamp", "") <= end]
198
+
199
+ # Tag filter
200
+ for tag in filters.get("tags", []):
201
+ tag_lower = tag.lower()
202
+ results = [e for e in results
203
+ if tag_lower in [t.lower() for t in e.get("tags", [])]]
204
+
205
+ # Metric filters
206
+ for mf in filters.get("metric_filters", []):
207
+ metric = mf["metric"]
208
+ op = mf["operator"]
209
+ threshold = mf["value"]
210
+ filtered = []
211
+ for e in results:
212
+ val = e.get("metrics", {}).get(metric)
213
+ if val is None:
214
+ continue
215
+ try:
216
+ val = float(val)
217
+ except (ValueError, TypeError):
218
+ continue
219
+ if _compare(val, op, threshold):
220
+ filtered.append(e)
221
+ results = filtered
222
+
223
+ return results
224
+
225
+
226
+ def _compare(val: float, op: str, threshold: float) -> bool:
227
+ """Apply comparison operator."""
228
+ if op == ">":
229
+ return val > threshold
230
+ elif op == ">=":
231
+ return val >= threshold
232
+ elif op == "<":
233
+ return val < threshold
234
+ elif op == "<=":
235
+ return val <= threshold
236
+ elif op == "=":
237
+ return abs(val - threshold) < 1e-6
238
+ return False
239
+
240
+
241
+ # --- Ranking ---
242
+
243
+
244
+ def rank_by_keywords(experiments: list[dict], keywords: list[str]) -> list[tuple[dict, float]]:
245
+ """Rank experiments by keyword relevance.
246
+
247
+ Simple TF-based scoring: count keyword hits across experiment fields.
248
+ Higher score = more relevant.
249
+
250
+ Returns:
251
+ List of (experiment, score) tuples sorted by descending score.
252
+ """
253
+ if not keywords:
254
+ return [(e, 1.0) for e in experiments]
255
+
256
+ scored = []
257
+ for exp in experiments:
258
+ searchable = _build_searchable_text(exp)
259
+ score = 0.0
260
+ for kw in keywords:
261
+ kw_lower = kw.lower()
262
+ count = searchable.count(kw_lower)
263
+ if count > 0:
264
+ score += 1.0 + 0.5 * (count - 1)
265
+ scored.append((exp, score))
266
+
267
+ # Sort by score descending, then by timestamp descending for ties
268
+ scored.sort(key=lambda x: (x[1], x[0].get("timestamp", "")), reverse=True)
269
+ return scored
270
+
271
+
272
+ def _build_searchable_text(exp: dict) -> str:
273
+ """Build a single searchable string from experiment fields."""
274
+ parts = [
275
+ exp.get("experiment_id", ""),
276
+ exp.get("description", ""),
277
+ exp.get("family", ""),
278
+ exp.get("status", ""),
279
+ str(exp.get("config", {}).get("model_type", "")),
280
+ str(exp.get("config", {}).get("experiment_type", "")),
281
+ " ".join(exp.get("tags", [])),
282
+ ]
283
+ return " ".join(parts).lower()
284
+
285
+
286
+ # --- Report ---
287
+
288
+
289
+ def format_search_report(results: list[tuple[dict, float]], filters: dict, query: str) -> str:
290
+ """Format search results as a ranked markdown table."""
291
+ lines = [
292
+ "# Experiment Search Results",
293
+ "",
294
+ f"**Query:** `{query}`",
295
+ ]
296
+
297
+ # Show parsed filters
298
+ active_filters = []
299
+ if filters.get("status"):
300
+ active_filters.append(f"status={filters['status']}")
301
+ if filters.get("family"):
302
+ active_filters.append(f"family={filters['family']}")
303
+ for mf in filters.get("metric_filters", []):
304
+ active_filters.append(f"{mf['metric']}{mf['operator']}{mf['value']}")
305
+ if filters.get("date_range"):
306
+ active_filters.append(f"date range applied")
307
+ if filters.get("keywords"):
308
+ active_filters.append(f"keywords: {', '.join(filters['keywords'])}")
309
+
310
+ if active_filters:
311
+ lines.append(f"**Filters:** {' | '.join(active_filters)}")
312
+ lines.extend(["", f"**{len(results)} result(s)**", ""])
313
+
314
+ if not results:
315
+ lines.append("No experiments matched the query.")
316
+ return "\n".join(lines)
317
+
318
+ lines.extend([
319
+ "| Rank | Experiment | Status | Family | Score | Key Metrics |",
320
+ "|------|-----------|--------|--------|-------|-------------|",
321
+ ])
322
+
323
+ for i, (exp, score) in enumerate(results, 1):
324
+ eid = exp.get("experiment_id", "?")
325
+ status = exp.get("status", "?")
326
+ family = exp.get("family", "—") or "—"
327
+ metrics = exp.get("metrics", {})
328
+ # Show top 3 metrics
329
+ metric_strs = []
330
+ for k, v in list(metrics.items())[:3]:
331
+ if isinstance(v, float):
332
+ metric_strs.append(f"{k}={v:.4f}")
333
+ else:
334
+ metric_strs.append(f"{k}={v}")
335
+ metrics_display = ", ".join(metric_strs) or "—"
336
+ score_display = f"{score:.1f}" if score > 0 else "—"
337
+ lines.append(f"| {i} | {eid} | {status} | {family} | {score_display} | {metrics_display} |")
338
+
339
+ return "\n".join(lines)
340
+
341
+
342
+ def save_search_report(report: dict, path: str = "experiments/searches") -> Path:
343
+ """Save search report to YAML."""
344
+ p = Path(path)
345
+ p.mkdir(parents=True, exist_ok=True)
346
+ out = p / f"search-{datetime.now(timezone.utc).strftime('%Y%m%d-%H%M%S')}.yaml"
347
+ with open(out, "w") as f:
348
+ yaml.dump(report, f, default_flow_style=False, sort_keys=False)
349
+ return out
350
+
351
+
352
+ # --- Orchestration ---
353
+
354
+
355
+ def run_search(
356
+ query: str,
357
+ top: int = 20,
358
+ log_path: str = DEFAULT_LOG_PATH,
359
+ config_path: str = "config.yaml",
360
+ ) -> dict:
361
+ """Run experiment search.
362
+
363
+ Args:
364
+ query: Natural language search query.
365
+ top: Maximum results to return.
366
+ log_path: Path to experiment log.
367
+ config_path: Path to config.yaml.
368
+
369
+ Returns:
370
+ Search result dict.
371
+ """
372
+ config = load_config(config_path)
373
+ experiments = load_experiments(log_path)
374
+
375
+ if not experiments:
376
+ return {
377
+ "timestamp": datetime.now(timezone.utc).isoformat(),
378
+ "query": query,
379
+ "error": "No experiments found",
380
+ }
381
+
382
+ filters = parse_query(query)
383
+ filtered = apply_filters(experiments, filters)
384
+ ranked = rank_by_keywords(filtered, filters.get("keywords", []))
385
+
386
+ # Apply top-N limit
387
+ ranked = ranked[:top]
388
+
389
+ return {
390
+ "timestamp": datetime.now(timezone.utc).isoformat(),
391
+ "query": query,
392
+ "filters": filters,
393
+ "total_experiments": len(experiments),
394
+ "matched": len(ranked),
395
+ "results": [
396
+ {
397
+ "experiment_id": exp.get("experiment_id"),
398
+ "status": exp.get("status"),
399
+ "family": exp.get("family"),
400
+ "timestamp": exp.get("timestamp"),
401
+ "metrics": exp.get("metrics", {}),
402
+ "score": score,
403
+ }
404
+ for exp, score in ranked
405
+ ],
406
+ }
407
+
408
+
409
+ def main() -> None:
410
+ """CLI entry point."""
411
+ parser = argparse.ArgumentParser(description="Natural language experiment search")
412
+ parser.add_argument("query", nargs="?", default=None,
413
+ help="Search query (e.g., 'accuracy>0.85 random forest')")
414
+ parser.add_argument("--top", type=int, default=20,
415
+ help="Maximum number of results (default: 20)")
416
+ parser.add_argument("--config", default="config.yaml", help="Path to config.yaml")
417
+ parser.add_argument("--log", default=DEFAULT_LOG_PATH, help="Path to experiment log")
418
+ parser.add_argument("--json", action="store_true", help="Output raw JSON")
419
+ args = parser.parse_args()
420
+
421
+ if not args.query:
422
+ print("Usage: experiment_search.py 'query string'", file=sys.stderr)
423
+ print("Examples:", file=sys.stderr)
424
+ print(" experiment_search.py 'accuracy>0.85'", file=sys.stderr)
425
+ print(" experiment_search.py 'status:kept family:baseline'", file=sys.stderr)
426
+ print(" experiment_search.py 'random forest last week'", file=sys.stderr)
427
+ sys.exit(1)
428
+
429
+ report = run_search(
430
+ query=args.query,
431
+ top=args.top,
432
+ log_path=args.log,
433
+ config_path=args.config,
434
+ )
435
+
436
+ if args.json:
437
+ print(json.dumps(report, indent=2, default=str))
438
+ else:
439
+ if "error" in report:
440
+ print(f"ERROR: {report['error']}", file=sys.stderr)
441
+ sys.exit(1)
442
+ filters = report.get("filters", {})
443
+ results = [
444
+ (r, r.get("score", 0))
445
+ for r in report.get("results", [])
446
+ ]
447
+ print(format_search_report(results, filters, args.query))
448
+
449
+
450
+ if __name__ == "__main__":
451
+ main()