claude-turing 3.3.0 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/.claude-plugin/plugin.json +2 -2
  2. package/README.md +13 -2
  3. package/commands/annotate.md +23 -0
  4. package/commands/archive.md +23 -0
  5. package/commands/cite.md +23 -0
  6. package/commands/flashback.md +22 -0
  7. package/commands/merge.md +24 -0
  8. package/commands/present.md +23 -0
  9. package/commands/prune.md +26 -0
  10. package/commands/quantize.md +24 -0
  11. package/commands/replay.md +23 -0
  12. package/commands/search.md +22 -0
  13. package/commands/surgery.md +27 -0
  14. package/commands/template.md +22 -0
  15. package/commands/trend.md +21 -0
  16. package/commands/turing.md +22 -0
  17. package/package.json +1 -1
  18. package/src/install.js +2 -0
  19. package/src/verify.js +11 -0
  20. package/templates/scripts/__pycache__/architecture_surgery.cpython-314.pyc +0 -0
  21. package/templates/scripts/__pycache__/experiment_annotations.cpython-314.pyc +0 -0
  22. package/templates/scripts/__pycache__/experiment_archive.cpython-314.pyc +0 -0
  23. package/templates/scripts/__pycache__/experiment_replay.cpython-314.pyc +0 -0
  24. package/templates/scripts/__pycache__/experiment_search.cpython-314.pyc +0 -0
  25. package/templates/scripts/__pycache__/experiment_templates.cpython-314.pyc +0 -0
  26. package/templates/scripts/__pycache__/model_merger.cpython-314.pyc +0 -0
  27. package/templates/scripts/__pycache__/model_pruning.cpython-314.pyc +0 -0
  28. package/templates/scripts/__pycache__/model_quantization.cpython-314.pyc +0 -0
  29. package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
  30. package/templates/scripts/__pycache__/session_flashback.cpython-314.pyc +0 -0
  31. package/templates/scripts/__pycache__/trend_analysis.cpython-314.pyc +0 -0
  32. package/templates/scripts/architecture_surgery.py +238 -0
  33. package/templates/scripts/citation_manager.py +436 -0
  34. package/templates/scripts/experiment_annotations.py +392 -0
  35. package/templates/scripts/experiment_archive.py +534 -0
  36. package/templates/scripts/experiment_replay.py +592 -0
  37. package/templates/scripts/experiment_search.py +451 -0
  38. package/templates/scripts/experiment_templates.py +501 -0
  39. package/templates/scripts/generate_changelog.py +464 -0
  40. package/templates/scripts/generate_figures.py +597 -0
  41. package/templates/scripts/model_merger.py +277 -0
  42. package/templates/scripts/model_pruning.py +182 -0
  43. package/templates/scripts/model_quantization.py +177 -0
  44. package/templates/scripts/scaffold.py +20 -0
  45. package/templates/scripts/session_flashback.py +461 -0
  46. package/templates/scripts/trend_analysis.py +503 -0
@@ -0,0 +1,238 @@
1
+ #!/usr/bin/env python3
2
+ """Architecture modification for the autoresearch pipeline.
3
+
4
+ Programmatic architecture changes: add/remove layers, widen/narrow,
5
+ swap activation functions, inject skip connections, change normalization.
6
+ Produces a modified config and instructions for the modified experiment.
7
+
8
+ Usage:
9
+ python scripts/architecture_surgery.py exp-042 --op widen 2
10
+ python scripts/architecture_surgery.py exp-042 --op add-layer
11
+ python scripts/architecture_surgery.py exp-042 --op swap-activation relu gelu
12
+ python scripts/architecture_surgery.py --json
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import argparse
18
+ import json
19
+ import math
20
+ import sys
21
+ from datetime import datetime, timezone
22
+ from pathlib import Path
23
+
24
+ import yaml
25
+
26
+ from scripts.turing_io import load_config, load_experiments
27
+
28
+ DEFAULT_LOG_PATH = "experiments/log.jsonl"
29
+ OPERATIONS = ["add-layer", "remove-layer", "widen", "narrow", "swap-activation",
30
+ "add-skip", "add-norm", "deepen", "swap-objective"]
31
+
32
+
33
+ def plan_operation(
34
+ operation: str,
35
+ config: dict,
36
+ hyperparams: dict,
37
+ model_type: str,
38
+ args: list[str] | None = None,
39
+ ) -> dict:
40
+ """Plan an architecture modification.
41
+
42
+ Returns a plan dict with new config, parameter count change, and instructions.
43
+ """
44
+ args = args or []
45
+ plan = {
46
+ "operation": operation,
47
+ "model_type": model_type,
48
+ "original_config": hyperparams.copy(),
49
+ "new_config": hyperparams.copy(),
50
+ "instructions": [],
51
+ "param_change": None,
52
+ }
53
+
54
+ is_tree = any(t in model_type.lower() for t in ("xgboost", "lightgbm", "forest", "gbm", "catboost"))
55
+ is_neural = any(t in model_type.lower() for t in ("mlp", "nn", "pytorch", "tensorflow", "transformer"))
56
+
57
+ if operation == "widen":
58
+ factor = float(args[0]) if args else 2.0
59
+ if is_neural:
60
+ hs = hyperparams.get("hidden_size", 256)
61
+ new_hs = int(hs * factor)
62
+ plan["new_config"]["hidden_size"] = new_hs
63
+ plan["instructions"].append(f"Multiply hidden dimensions: {hs} → {new_hs} ({factor}x)")
64
+ plan["param_change"] = f"+{(factor**2 - 1)*100:.0f}% parameters (quadratic in width)"
65
+ elif is_tree:
66
+ n = hyperparams.get("n_estimators", 100)
67
+ new_n = int(n * factor)
68
+ plan["new_config"]["n_estimators"] = new_n
69
+ plan["instructions"].append(f"Increase estimators: {n} → {new_n}")
70
+ plan["param_change"] = f"+{(factor - 1)*100:.0f}% trees"
71
+ else:
72
+ plan["instructions"].append(f"Widen by {factor}x — adjust model-specific width parameters")
73
+
74
+ elif operation == "narrow":
75
+ factor = float(args[0]) if args else 0.5
76
+ if is_neural:
77
+ hs = hyperparams.get("hidden_size", 256)
78
+ new_hs = max(8, int(hs * factor))
79
+ plan["new_config"]["hidden_size"] = new_hs
80
+ plan["instructions"].append(f"Reduce hidden dimensions: {hs} → {new_hs} ({factor}x)")
81
+ elif is_tree:
82
+ n = hyperparams.get("n_estimators", 100)
83
+ new_n = max(1, int(n * factor))
84
+ plan["new_config"]["n_estimators"] = new_n
85
+ plan["instructions"].append(f"Reduce estimators: {n} → {new_n}")
86
+
87
+ elif operation == "add-layer":
88
+ if is_neural:
89
+ n_layers = hyperparams.get("n_layers", hyperparams.get("layers", 3))
90
+ plan["new_config"]["n_layers"] = n_layers + 1
91
+ plan["instructions"].extend([
92
+ f"Add layer: {n_layers} → {n_layers + 1}",
93
+ "New layer initialized with default weights",
94
+ "Auto warm-start: existing layers loaded from source",
95
+ ])
96
+ plan["param_change"] = f"+1 layer ({n_layers} → {n_layers + 1})"
97
+ else:
98
+ plan["instructions"].append("add-layer not applicable for non-neural models")
99
+
100
+ elif operation == "remove-layer":
101
+ if is_neural:
102
+ n_layers = hyperparams.get("n_layers", hyperparams.get("layers", 3))
103
+ if n_layers > 1:
104
+ plan["new_config"]["n_layers"] = n_layers - 1
105
+ plan["instructions"].append(f"Remove layer: {n_layers} → {n_layers - 1}")
106
+ else:
107
+ plan["instructions"].append("Cannot remove — only 1 layer remaining")
108
+ else:
109
+ plan["instructions"].append("remove-layer not applicable for non-neural models")
110
+
111
+ elif operation == "deepen":
112
+ if is_tree:
113
+ depth = hyperparams.get("max_depth", 6)
114
+ new_depth = depth + 2
115
+ plan["new_config"]["max_depth"] = new_depth
116
+ plan["instructions"].append(f"Increase max depth: {depth} → {new_depth}")
117
+ elif is_neural:
118
+ n_layers = hyperparams.get("n_layers", 3)
119
+ plan["new_config"]["n_layers"] = n_layers + 2
120
+ plan["instructions"].append(f"Add 2 layers: {n_layers} → {n_layers + 2}")
121
+
122
+ elif operation == "swap-activation":
123
+ if len(args) >= 2:
124
+ from_act, to_act = args[0], args[1]
125
+ else:
126
+ from_act, to_act = "relu", "gelu"
127
+ plan["new_config"]["activation"] = to_act
128
+ plan["instructions"].append(f"Swap activation: {from_act} → {to_act}")
129
+
130
+ elif operation == "add-skip":
131
+ plan["new_config"]["skip_connections"] = True
132
+ plan["instructions"].append("Inject residual/skip connections between layers")
133
+
134
+ elif operation == "add-norm":
135
+ norm_type = args[0] if args else "batch_norm"
136
+ plan["new_config"]["normalization"] = norm_type
137
+ plan["instructions"].append(f"Add {norm_type} after each layer")
138
+
139
+ elif operation == "swap-objective":
140
+ if len(args) >= 2:
141
+ from_obj, to_obj = args[0], args[1]
142
+ else:
143
+ from_obj, to_obj = hyperparams.get("objective", "logloss"), "focal"
144
+ plan["new_config"]["objective"] = to_obj
145
+ plan["instructions"].append(f"Swap objective: {from_obj} → {to_obj}")
146
+
147
+ else:
148
+ plan["instructions"].append(f"Unknown operation: {operation}")
149
+
150
+ return plan
151
+
152
+
153
+ def surgery_report(
154
+ exp_id: str,
155
+ operation: str,
156
+ op_args: list[str] | None = None,
157
+ config_path: str = "config.yaml",
158
+ log_path: str = DEFAULT_LOG_PATH,
159
+ ) -> dict:
160
+ """Generate a surgery report."""
161
+ experiments = load_experiments(log_path)
162
+ exp = next((e for e in experiments if e.get("experiment_id") == exp_id), None)
163
+
164
+ if not exp:
165
+ return {"error": f"Experiment {exp_id} not found"}
166
+
167
+ config = exp.get("config", {})
168
+ model_type = config.get("model_type", "unknown")
169
+ hyperparams = config.get("hyperparams", {})
170
+
171
+ plan = plan_operation(operation, config, hyperparams, model_type, op_args)
172
+
173
+ return {
174
+ "generated_at": datetime.now(timezone.utc).isoformat(),
175
+ "experiment_id": exp_id,
176
+ "plan": plan,
177
+ "warm_start_from": exp_id,
178
+ }
179
+
180
+
181
+ def save_surgery_report(report: dict, output_dir: str = "experiments/surgery") -> Path:
182
+ out = Path(output_dir); out.mkdir(parents=True, exist_ok=True)
183
+ exp_id = report.get("experiment_id", "unknown")
184
+ op = report.get("plan", {}).get("operation", "unknown")
185
+ fp = out / f"{exp_id}-{op}.yaml"
186
+ with open(fp, "w") as f: yaml.dump(report, f, default_flow_style=False, sort_keys=False)
187
+ return fp
188
+
189
+
190
+ def format_surgery_report(report: dict) -> str:
191
+ if "error" in report: return f"ERROR: {report['error']}"
192
+
193
+ plan = report.get("plan", {})
194
+ exp_id = report.get("experiment_id", "?")
195
+ op = plan.get("operation", "?")
196
+
197
+ lines = [f"# Surgery: {op} ({exp_id})", "",
198
+ f"*Generated {report.get('generated_at', 'N/A')[:19]}*",
199
+ f"**Model:** {plan.get('model_type', '?')}", ""]
200
+
201
+ lines.extend(["## Instructions", ""])
202
+ for i, inst in enumerate(plan.get("instructions", []), 1):
203
+ lines.append(f"{i}. {inst}")
204
+ lines.append("")
205
+
206
+ if plan.get("param_change"):
207
+ lines.append(f"**Parameter change:** {plan['param_change']}")
208
+ lines.append("")
209
+
210
+ orig = plan.get("original_config", {})
211
+ new = plan.get("new_config", {})
212
+ changed = {k: (orig.get(k), new[k]) for k in new if orig.get(k) != new.get(k)}
213
+ if changed:
214
+ lines.extend(["## Config Changes", ""])
215
+ for k, (old, new_v) in changed.items():
216
+ lines.append(f"- `{k}`: {old} → {new_v}")
217
+ lines.append("")
218
+
219
+ lines.append(f"**Warm-start from:** {report.get('warm_start_from', '?')}")
220
+
221
+ return "\n".join(lines)
222
+
223
+
224
+ def main() -> None:
225
+ parser = argparse.ArgumentParser(description="Architecture modification")
226
+ parser.add_argument("exp_id")
227
+ parser.add_argument("--op", required=True, help="Operation name")
228
+ parser.add_argument("op_args", nargs="*", help="Operation arguments")
229
+ parser.add_argument("--config", default="config.yaml")
230
+ parser.add_argument("--log", default=DEFAULT_LOG_PATH)
231
+ parser.add_argument("--json", action="store_true")
232
+ args = parser.parse_args()
233
+ report = surgery_report(args.exp_id, args.op, args.op_args, args.config, args.log)
234
+ if "error" not in report:
235
+ fp = save_surgery_report(report); print(f"Saved to {fp}", file=sys.stderr)
236
+ print(json.dumps(report, indent=2, default=str) if args.json else format_surgery_report(report))
237
+
238
+ if __name__ == "__main__": main()
@@ -0,0 +1,436 @@
1
+ #!/usr/bin/env python3
2
+ """Citation and attribution manager for the autoresearch pipeline.
3
+
4
+ Tracks academic citations associated with experiments. Every method,
5
+ dataset, technique, and codebase used in the research campaign should
6
+ have a citation. This script manages the citation store, audits for
7
+ missing attributions, and generates BibTeX output.
8
+
9
+ Usage:
10
+ python scripts/citation_manager.py add exp-042 --key Chen2016 --title "XGBoost" --url "..."
11
+ python scripts/citation_manager.py list
12
+ python scripts/citation_manager.py check
13
+ python scripts/citation_manager.py bib
14
+ python scripts/citation_manager.py --json
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import argparse
20
+ import json
21
+ import re
22
+ import sys
23
+ from datetime import datetime, timezone
24
+ from pathlib import Path
25
+
26
+ import yaml
27
+
28
+ from scripts.turing_io import load_config, load_experiments
29
+
30
+ DEFAULT_LOG_PATH = "experiments/log.jsonl"
31
+ DEFAULT_CITATIONS_PATH = "experiments/citations.yaml"
32
+ VALID_TYPES = ["method", "dataset", "technique", "codebase"]
33
+
34
+ # Keywords that suggest a method/technique needing citation
35
+ METHOD_KEYWORDS = {
36
+ "xgboost": "XGBoost (Chen & Guestrin, 2016)",
37
+ "lightgbm": "LightGBM (Ke et al., 2017)",
38
+ "catboost": "CatBoost (Prokhorenkova et al., 2018)",
39
+ "random_forest": "Random Forest (Breiman, 2001)",
40
+ "gradient_boosting": "Gradient Boosting (Friedman, 2001)",
41
+ "adam": "Adam optimizer (Kingma & Ba, 2015)",
42
+ "sgd": "SGD with momentum (Sutskever et al., 2013)",
43
+ "dropout": "Dropout (Srivastava et al., 2014)",
44
+ "batch_norm": "Batch Normalization (Ioffe & Szegedy, 2015)",
45
+ "resnet": "ResNet (He et al., 2016)",
46
+ "transformer": "Transformer (Vaswani et al., 2017)",
47
+ "bert": "BERT (Devlin et al., 2019)",
48
+ "lstm": "LSTM (Hochreiter & Schmidhuber, 1997)",
49
+ "svm": "SVM (Cortes & Vapnik, 1995)",
50
+ "lasso": "Lasso (Tibshirani, 1996)",
51
+ "ridge": "Ridge Regression (Hoerl & Kennard, 1970)",
52
+ "elastic_net": "Elastic Net (Zou & Hastie, 2005)",
53
+ "pca": "PCA (Pearson, 1901)",
54
+ "tsne": "t-SNE (van der Maaten & Hinton, 2008)",
55
+ "umap": "UMAP (McInnes et al., 2018)",
56
+ "cross_validation": "Cross-validation (Stone, 1974)",
57
+ "smote": "SMOTE (Chawla et al., 2002)",
58
+ }
59
+
60
+
61
+ # --- Storage ---
62
+
63
+
64
+ def load_citations(path: str = DEFAULT_CITATIONS_PATH) -> list[dict]:
65
+ """Load citations from YAML file."""
66
+ p = Path(path)
67
+ if not p.exists() or p.stat().st_size == 0:
68
+ return []
69
+ with open(p) as f:
70
+ data = yaml.safe_load(f)
71
+ return data if isinstance(data, list) else []
72
+
73
+
74
+ def save_citations(citations: list[dict], path: str = DEFAULT_CITATIONS_PATH) -> Path:
75
+ """Save citations list to YAML."""
76
+ p = Path(path)
77
+ p.parent.mkdir(parents=True, exist_ok=True)
78
+ with open(p, "w") as f:
79
+ yaml.dump(citations, f, default_flow_style=False, sort_keys=False)
80
+ return p
81
+
82
+
83
+ # --- Operations ---
84
+
85
+
86
+ def add_citation(
87
+ experiment_id: str,
88
+ key: str,
89
+ title: str,
90
+ authors: str | None = None,
91
+ year: int | None = None,
92
+ url: str | None = None,
93
+ doi: str | None = None,
94
+ cite_type: str = "method",
95
+ citations_path: str = DEFAULT_CITATIONS_PATH,
96
+ log_path: str = DEFAULT_LOG_PATH,
97
+ ) -> dict:
98
+ """Add or update a citation, associating it with an experiment.
99
+
100
+ If the citation key already exists, the experiment is appended to
101
+ its experiment list. Otherwise a new citation entry is created.
102
+ """
103
+ experiments = load_experiments(log_path)
104
+ known_ids = {e.get("experiment_id") for e in experiments}
105
+ if experiment_id not in known_ids:
106
+ return {"error": f"Experiment '{experiment_id}' not found in log"}
107
+
108
+ if cite_type not in VALID_TYPES:
109
+ return {"error": f"Invalid type '{cite_type}'. Valid: {VALID_TYPES}"}
110
+
111
+ citations = load_citations(citations_path)
112
+
113
+ # Check if key already exists
114
+ existing = None
115
+ for c in citations:
116
+ if c.get("key") == key:
117
+ existing = c
118
+ break
119
+
120
+ if existing:
121
+ if experiment_id not in existing.get("experiments", []):
122
+ existing.setdefault("experiments", []).append(experiment_id)
123
+ # Update fields if provided
124
+ if authors:
125
+ existing["authors"] = authors
126
+ if year:
127
+ existing["year"] = year
128
+ if url:
129
+ existing["url"] = url
130
+ if doi:
131
+ existing["doi"] = doi
132
+ save_citations(citations, citations_path)
133
+ return {"action": "updated", "citation": existing}
134
+
135
+ citation = {
136
+ "key": key,
137
+ "title": title,
138
+ "authors": authors or "",
139
+ "year": year or 0,
140
+ "url": url or "",
141
+ "doi": doi or "",
142
+ "type": cite_type,
143
+ "experiments": [experiment_id],
144
+ }
145
+ citations.append(citation)
146
+ save_citations(citations, citations_path)
147
+ return {"action": "added", "citation": citation}
148
+
149
+
150
+ def list_citations(
151
+ citations_path: str = DEFAULT_CITATIONS_PATH,
152
+ ) -> dict:
153
+ """List all citations grouped by type with experiment associations."""
154
+ citations = load_citations(citations_path)
155
+ grouped: dict[str, list[dict]] = {}
156
+ for c in citations:
157
+ ctype = c.get("type", "unknown")
158
+ grouped.setdefault(ctype, []).append(c)
159
+ return {
160
+ "total": len(citations),
161
+ "by_type": grouped,
162
+ "citations": citations,
163
+ }
164
+
165
+
166
+ def check_citations(
167
+ citations_path: str = DEFAULT_CITATIONS_PATH,
168
+ log_path: str = DEFAULT_LOG_PATH,
169
+ config_path: str = "config.yaml",
170
+ ) -> dict:
171
+ """Audit for missing citations — methods used without attribution.
172
+
173
+ Scans experiment configs and descriptions for known method keywords
174
+ that lack a corresponding citation entry.
175
+ """
176
+ citations = load_citations(citations_path)
177
+ experiments = load_experiments(log_path)
178
+ config = load_config(config_path)
179
+
180
+ cited_keys = {c.get("key", "").lower() for c in citations}
181
+ cited_titles = {c.get("title", "").lower() for c in citations}
182
+
183
+ missing: list[dict] = []
184
+ covered: list[str] = []
185
+
186
+ for keyword, suggestion in METHOD_KEYWORDS.items():
187
+ # Check if this keyword appears in any experiment
188
+ found_in: list[str] = []
189
+ for exp in experiments:
190
+ searchable = json.dumps(exp, default=str).lower()
191
+ if keyword.lower() in searchable:
192
+ found_in.append(exp.get("experiment_id", "?"))
193
+
194
+ # Also check config
195
+ config_str = json.dumps(config, default=str).lower()
196
+ if keyword.lower() in config_str:
197
+ found_in.append("config.yaml")
198
+
199
+ if not found_in:
200
+ continue
201
+
202
+ # Check if cited
203
+ is_cited = (
204
+ keyword.lower() in cited_keys
205
+ or keyword.lower() in cited_titles
206
+ or any(keyword.lower() in c.get("title", "").lower() for c in citations)
207
+ )
208
+
209
+ if is_cited:
210
+ covered.append(keyword)
211
+ else:
212
+ missing.append({
213
+ "keyword": keyword,
214
+ "suggestion": suggestion,
215
+ "found_in": found_in,
216
+ })
217
+
218
+ return {
219
+ "missing": missing,
220
+ "covered": covered,
221
+ "total_checked": len(METHOD_KEYWORDS),
222
+ "coverage": f"{len(covered)}/{len(covered) + len(missing)}" if (covered or missing) else "N/A",
223
+ }
224
+
225
+
226
+ def generate_bibtex(citations_path: str = DEFAULT_CITATIONS_PATH) -> str:
227
+ """Generate BibTeX output from all citations."""
228
+ citations = load_citations(citations_path)
229
+ if not citations:
230
+ return "% No citations found.\n"
231
+
232
+ entries = []
233
+ for c in citations:
234
+ key = c.get("key", "unknown")
235
+ title = c.get("title", "")
236
+ authors = c.get("authors", "")
237
+ year = c.get("year", 0)
238
+ url = c.get("url", "")
239
+ doi = c.get("doi", "")
240
+
241
+ # Determine entry type
242
+ entry_type = "misc"
243
+ if doi:
244
+ entry_type = "article"
245
+
246
+ lines = [f"@{entry_type}{{{key},"]
247
+ lines.append(f" title = {{{title}}},")
248
+ if authors:
249
+ lines.append(f" author = {{{authors}}},")
250
+ if year:
251
+ lines.append(f" year = {{{year}}},")
252
+ if url:
253
+ lines.append(f" url = {{{url}}},")
254
+ if doi:
255
+ lines.append(f" doi = {{{doi}}},")
256
+ note = f"Type: {c.get('type', 'unknown')}. Used in: {', '.join(c.get('experiments', []))}"
257
+ lines.append(f" note = {{{note}}},")
258
+ lines.append("}")
259
+ entries.append("\n".join(lines))
260
+
261
+ header = f"% Auto-generated by Turing citation manager\n% {len(entries)} citation(s)\n"
262
+ return header + "\n\n".join(entries) + "\n"
263
+
264
+
265
+ # --- Report ---
266
+
267
+
268
+ def format_citations_report(result: dict, action: str) -> str:
269
+ """Format citation results as readable text."""
270
+ lines: list[str] = []
271
+
272
+ if action == "list":
273
+ total = result.get("total", 0)
274
+ lines.append(f"# Citations ({total} total)")
275
+ lines.append("")
276
+ by_type = result.get("by_type", {})
277
+ for ctype in VALID_TYPES:
278
+ cites = by_type.get(ctype, [])
279
+ if not cites:
280
+ continue
281
+ lines.append(f"## {ctype.title()} ({len(cites)})")
282
+ lines.append("")
283
+ for c in cites:
284
+ key = c.get("key", "?")
285
+ title = c.get("title", "?")
286
+ authors = c.get("authors", "")
287
+ year = c.get("year", "")
288
+ exps = ", ".join(c.get("experiments", []))
289
+ author_year = f" ({authors}, {year})" if authors and year else ""
290
+ lines.append(f"- **[{key}]** {title}{author_year}")
291
+ if exps:
292
+ lines.append(f" Experiments: {exps}")
293
+ lines.append("")
294
+
295
+ elif action == "check":
296
+ missing = result.get("missing", [])
297
+ covered = result.get("covered", [])
298
+ coverage = result.get("coverage", "N/A")
299
+ lines.append(f"# Citation Audit (coverage: {coverage})")
300
+ lines.append("")
301
+ if missing:
302
+ lines.append(f"## Missing Citations ({len(missing)})")
303
+ lines.append("")
304
+ for m in missing:
305
+ lines.append(f"- **{m['keyword']}**: {m['suggestion']}")
306
+ lines.append(f" Found in: {', '.join(m['found_in'])}")
307
+ lines.append("")
308
+ if covered:
309
+ lines.append(f"## Covered ({len(covered)})")
310
+ lines.append("")
311
+ for kw in covered:
312
+ lines.append(f"- {kw}")
313
+ lines.append("")
314
+ if not missing:
315
+ lines.append("All detected methods have citations.")
316
+
317
+ elif action == "add":
318
+ cite = result.get("citation", {})
319
+ act = result.get("action", "added")
320
+ lines.append(f"Citation {act}: [{cite.get('key')}] {cite.get('title')}")
321
+ lines.append(f" Type: {cite.get('type')} | Experiments: {', '.join(cite.get('experiments', []))}")
322
+
323
+ return "\n".join(lines)
324
+
325
+
326
+ def save_citations_report(report: dict, path: str = "experiments/citations") -> Path:
327
+ """Save citation report to YAML."""
328
+ p = Path(path)
329
+ p.mkdir(parents=True, exist_ok=True)
330
+ out = p / f"report-{datetime.now(timezone.utc).strftime('%Y%m%d-%H%M%S')}.yaml"
331
+ with open(out, "w") as f:
332
+ yaml.dump(report, f, default_flow_style=False, sort_keys=False)
333
+ return out
334
+
335
+
336
+ # --- Orchestration ---
337
+
338
+
339
+ def run_citation_manager(
340
+ action: str,
341
+ experiment_id: str | None = None,
342
+ key: str | None = None,
343
+ title: str | None = None,
344
+ authors: str | None = None,
345
+ year: int | None = None,
346
+ url: str | None = None,
347
+ doi: str | None = None,
348
+ cite_type: str = "method",
349
+ citations_path: str = DEFAULT_CITATIONS_PATH,
350
+ log_path: str = DEFAULT_LOG_PATH,
351
+ config_path: str = "config.yaml",
352
+ ) -> dict:
353
+ """Run citation manager operation."""
354
+ timestamp = datetime.now(timezone.utc).isoformat()
355
+
356
+ if action == "add":
357
+ if not experiment_id or not key or not title:
358
+ return {"error": "add requires experiment_id, --key, and --title"}
359
+ result = add_citation(
360
+ experiment_id, key, title, authors, year, url, doi,
361
+ cite_type, citations_path, log_path,
362
+ )
363
+ if "error" in result:
364
+ return {"timestamp": timestamp, **result}
365
+ return {"timestamp": timestamp, "action": "add", **result}
366
+
367
+ elif action == "list":
368
+ result = list_citations(citations_path)
369
+ return {"timestamp": timestamp, "action": "list", **result}
370
+
371
+ elif action == "check":
372
+ result = check_citations(citations_path, log_path, config_path)
373
+ return {"timestamp": timestamp, "action": "check", **result}
374
+
375
+ elif action == "bib":
376
+ bibtex = generate_bibtex(citations_path)
377
+ return {"timestamp": timestamp, "action": "bib", "bibtex": bibtex,
378
+ "count": len(load_citations(citations_path))}
379
+
380
+ return {"error": f"Unknown action: {action}"}
381
+
382
+
383
+ def main() -> None:
384
+ """CLI entry point."""
385
+ parser = argparse.ArgumentParser(
386
+ description="Citation and attribution manager for ML experiments",
387
+ )
388
+ parser.add_argument("action", choices=["add", "list", "check", "bib"],
389
+ help="Citation action")
390
+ parser.add_argument("experiment_id", nargs="?", default=None,
391
+ help="Experiment ID (for add)")
392
+ parser.add_argument("--key", default=None, help="Citation key (e.g., Chen2016)")
393
+ parser.add_argument("--title", default=None, help="Paper/resource title")
394
+ parser.add_argument("--authors", default=None, help="Author list")
395
+ parser.add_argument("--year", type=int, default=None, help="Publication year")
396
+ parser.add_argument("--url", default=None, help="URL to paper/resource")
397
+ parser.add_argument("--doi", default=None, help="DOI identifier")
398
+ parser.add_argument("--type", dest="cite_type", default="method",
399
+ choices=VALID_TYPES, help="Citation type")
400
+ parser.add_argument("--config", default="config.yaml", help="Path to config.yaml")
401
+ parser.add_argument("--log", default=DEFAULT_LOG_PATH, help="Path to experiment log")
402
+ parser.add_argument("--citations-path", default=DEFAULT_CITATIONS_PATH,
403
+ help="Path to citations YAML")
404
+ parser.add_argument("--json", action="store_true", help="Output raw JSON")
405
+ args = parser.parse_args()
406
+
407
+ report = run_citation_manager(
408
+ action=args.action,
409
+ experiment_id=args.experiment_id,
410
+ key=args.key,
411
+ title=args.title,
412
+ authors=args.authors,
413
+ year=args.year,
414
+ url=args.url,
415
+ doi=args.doi,
416
+ cite_type=args.cite_type,
417
+ citations_path=args.citations_path,
418
+ log_path=args.log,
419
+ config_path=args.config,
420
+ )
421
+
422
+ if args.json:
423
+ print(json.dumps(report, indent=2, default=str))
424
+ else:
425
+ if "error" in report:
426
+ print(f"ERROR: {report['error']}", file=sys.stderr)
427
+ sys.exit(1)
428
+ action = report.get("action", "")
429
+ if action == "bib":
430
+ print(report["bibtex"])
431
+ else:
432
+ print(format_citations_report(report, action))
433
+
434
+
435
+ if __name__ == "__main__":
436
+ main()