claude-turing 3.3.0 → 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +2 -2
- package/README.md +13 -2
- package/commands/annotate.md +23 -0
- package/commands/archive.md +23 -0
- package/commands/cite.md +23 -0
- package/commands/flashback.md +22 -0
- package/commands/merge.md +24 -0
- package/commands/present.md +23 -0
- package/commands/prune.md +26 -0
- package/commands/quantize.md +24 -0
- package/commands/replay.md +23 -0
- package/commands/search.md +22 -0
- package/commands/surgery.md +27 -0
- package/commands/template.md +22 -0
- package/commands/trend.md +21 -0
- package/commands/turing.md +22 -0
- package/package.json +1 -1
- package/src/install.js +2 -0
- package/src/verify.js +11 -0
- package/templates/scripts/__pycache__/architecture_surgery.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_annotations.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_archive.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_replay.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_search.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_templates.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/model_merger.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/model_pruning.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/model_quantization.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/session_flashback.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/trend_analysis.cpython-314.pyc +0 -0
- package/templates/scripts/architecture_surgery.py +238 -0
- package/templates/scripts/citation_manager.py +436 -0
- package/templates/scripts/experiment_annotations.py +392 -0
- package/templates/scripts/experiment_archive.py +534 -0
- package/templates/scripts/experiment_replay.py +592 -0
- package/templates/scripts/experiment_search.py +451 -0
- package/templates/scripts/experiment_templates.py +501 -0
- package/templates/scripts/generate_changelog.py +464 -0
- package/templates/scripts/generate_figures.py +597 -0
- package/templates/scripts/model_merger.py +277 -0
- package/templates/scripts/model_pruning.py +182 -0
- package/templates/scripts/model_quantization.py +177 -0
- package/templates/scripts/scaffold.py +20 -0
- package/templates/scripts/session_flashback.py +461 -0
- package/templates/scripts/trend_analysis.py +503 -0
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Architecture modification for the autoresearch pipeline.
|
|
3
|
+
|
|
4
|
+
Programmatic architecture changes: add/remove layers, widen/narrow,
|
|
5
|
+
swap activation functions, inject skip connections, change normalization.
|
|
6
|
+
Produces a modified config and instructions for the modified experiment.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python scripts/architecture_surgery.py exp-042 --op widen 2
|
|
10
|
+
python scripts/architecture_surgery.py exp-042 --op add-layer
|
|
11
|
+
python scripts/architecture_surgery.py exp-042 --op swap-activation relu gelu
|
|
12
|
+
python scripts/architecture_surgery.py --json
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import argparse
|
|
18
|
+
import json
|
|
19
|
+
import math
|
|
20
|
+
import sys
|
|
21
|
+
from datetime import datetime, timezone
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
import yaml
|
|
25
|
+
|
|
26
|
+
from scripts.turing_io import load_config, load_experiments
|
|
27
|
+
|
|
28
|
+
DEFAULT_LOG_PATH = "experiments/log.jsonl"
|
|
29
|
+
OPERATIONS = ["add-layer", "remove-layer", "widen", "narrow", "swap-activation",
|
|
30
|
+
"add-skip", "add-norm", "deepen", "swap-objective"]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def plan_operation(
|
|
34
|
+
operation: str,
|
|
35
|
+
config: dict,
|
|
36
|
+
hyperparams: dict,
|
|
37
|
+
model_type: str,
|
|
38
|
+
args: list[str] | None = None,
|
|
39
|
+
) -> dict:
|
|
40
|
+
"""Plan an architecture modification.
|
|
41
|
+
|
|
42
|
+
Returns a plan dict with new config, parameter count change, and instructions.
|
|
43
|
+
"""
|
|
44
|
+
args = args or []
|
|
45
|
+
plan = {
|
|
46
|
+
"operation": operation,
|
|
47
|
+
"model_type": model_type,
|
|
48
|
+
"original_config": hyperparams.copy(),
|
|
49
|
+
"new_config": hyperparams.copy(),
|
|
50
|
+
"instructions": [],
|
|
51
|
+
"param_change": None,
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
is_tree = any(t in model_type.lower() for t in ("xgboost", "lightgbm", "forest", "gbm", "catboost"))
|
|
55
|
+
is_neural = any(t in model_type.lower() for t in ("mlp", "nn", "pytorch", "tensorflow", "transformer"))
|
|
56
|
+
|
|
57
|
+
if operation == "widen":
|
|
58
|
+
factor = float(args[0]) if args else 2.0
|
|
59
|
+
if is_neural:
|
|
60
|
+
hs = hyperparams.get("hidden_size", 256)
|
|
61
|
+
new_hs = int(hs * factor)
|
|
62
|
+
plan["new_config"]["hidden_size"] = new_hs
|
|
63
|
+
plan["instructions"].append(f"Multiply hidden dimensions: {hs} → {new_hs} ({factor}x)")
|
|
64
|
+
plan["param_change"] = f"+{(factor**2 - 1)*100:.0f}% parameters (quadratic in width)"
|
|
65
|
+
elif is_tree:
|
|
66
|
+
n = hyperparams.get("n_estimators", 100)
|
|
67
|
+
new_n = int(n * factor)
|
|
68
|
+
plan["new_config"]["n_estimators"] = new_n
|
|
69
|
+
plan["instructions"].append(f"Increase estimators: {n} → {new_n}")
|
|
70
|
+
plan["param_change"] = f"+{(factor - 1)*100:.0f}% trees"
|
|
71
|
+
else:
|
|
72
|
+
plan["instructions"].append(f"Widen by {factor}x — adjust model-specific width parameters")
|
|
73
|
+
|
|
74
|
+
elif operation == "narrow":
|
|
75
|
+
factor = float(args[0]) if args else 0.5
|
|
76
|
+
if is_neural:
|
|
77
|
+
hs = hyperparams.get("hidden_size", 256)
|
|
78
|
+
new_hs = max(8, int(hs * factor))
|
|
79
|
+
plan["new_config"]["hidden_size"] = new_hs
|
|
80
|
+
plan["instructions"].append(f"Reduce hidden dimensions: {hs} → {new_hs} ({factor}x)")
|
|
81
|
+
elif is_tree:
|
|
82
|
+
n = hyperparams.get("n_estimators", 100)
|
|
83
|
+
new_n = max(1, int(n * factor))
|
|
84
|
+
plan["new_config"]["n_estimators"] = new_n
|
|
85
|
+
plan["instructions"].append(f"Reduce estimators: {n} → {new_n}")
|
|
86
|
+
|
|
87
|
+
elif operation == "add-layer":
|
|
88
|
+
if is_neural:
|
|
89
|
+
n_layers = hyperparams.get("n_layers", hyperparams.get("layers", 3))
|
|
90
|
+
plan["new_config"]["n_layers"] = n_layers + 1
|
|
91
|
+
plan["instructions"].extend([
|
|
92
|
+
f"Add layer: {n_layers} → {n_layers + 1}",
|
|
93
|
+
"New layer initialized with default weights",
|
|
94
|
+
"Auto warm-start: existing layers loaded from source",
|
|
95
|
+
])
|
|
96
|
+
plan["param_change"] = f"+1 layer ({n_layers} → {n_layers + 1})"
|
|
97
|
+
else:
|
|
98
|
+
plan["instructions"].append("add-layer not applicable for non-neural models")
|
|
99
|
+
|
|
100
|
+
elif operation == "remove-layer":
|
|
101
|
+
if is_neural:
|
|
102
|
+
n_layers = hyperparams.get("n_layers", hyperparams.get("layers", 3))
|
|
103
|
+
if n_layers > 1:
|
|
104
|
+
plan["new_config"]["n_layers"] = n_layers - 1
|
|
105
|
+
plan["instructions"].append(f"Remove layer: {n_layers} → {n_layers - 1}")
|
|
106
|
+
else:
|
|
107
|
+
plan["instructions"].append("Cannot remove — only 1 layer remaining")
|
|
108
|
+
else:
|
|
109
|
+
plan["instructions"].append("remove-layer not applicable for non-neural models")
|
|
110
|
+
|
|
111
|
+
elif operation == "deepen":
|
|
112
|
+
if is_tree:
|
|
113
|
+
depth = hyperparams.get("max_depth", 6)
|
|
114
|
+
new_depth = depth + 2
|
|
115
|
+
plan["new_config"]["max_depth"] = new_depth
|
|
116
|
+
plan["instructions"].append(f"Increase max depth: {depth} → {new_depth}")
|
|
117
|
+
elif is_neural:
|
|
118
|
+
n_layers = hyperparams.get("n_layers", 3)
|
|
119
|
+
plan["new_config"]["n_layers"] = n_layers + 2
|
|
120
|
+
plan["instructions"].append(f"Add 2 layers: {n_layers} → {n_layers + 2}")
|
|
121
|
+
|
|
122
|
+
elif operation == "swap-activation":
|
|
123
|
+
if len(args) >= 2:
|
|
124
|
+
from_act, to_act = args[0], args[1]
|
|
125
|
+
else:
|
|
126
|
+
from_act, to_act = "relu", "gelu"
|
|
127
|
+
plan["new_config"]["activation"] = to_act
|
|
128
|
+
plan["instructions"].append(f"Swap activation: {from_act} → {to_act}")
|
|
129
|
+
|
|
130
|
+
elif operation == "add-skip":
|
|
131
|
+
plan["new_config"]["skip_connections"] = True
|
|
132
|
+
plan["instructions"].append("Inject residual/skip connections between layers")
|
|
133
|
+
|
|
134
|
+
elif operation == "add-norm":
|
|
135
|
+
norm_type = args[0] if args else "batch_norm"
|
|
136
|
+
plan["new_config"]["normalization"] = norm_type
|
|
137
|
+
plan["instructions"].append(f"Add {norm_type} after each layer")
|
|
138
|
+
|
|
139
|
+
elif operation == "swap-objective":
|
|
140
|
+
if len(args) >= 2:
|
|
141
|
+
from_obj, to_obj = args[0], args[1]
|
|
142
|
+
else:
|
|
143
|
+
from_obj, to_obj = hyperparams.get("objective", "logloss"), "focal"
|
|
144
|
+
plan["new_config"]["objective"] = to_obj
|
|
145
|
+
plan["instructions"].append(f"Swap objective: {from_obj} → {to_obj}")
|
|
146
|
+
|
|
147
|
+
else:
|
|
148
|
+
plan["instructions"].append(f"Unknown operation: {operation}")
|
|
149
|
+
|
|
150
|
+
return plan
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def surgery_report(
|
|
154
|
+
exp_id: str,
|
|
155
|
+
operation: str,
|
|
156
|
+
op_args: list[str] | None = None,
|
|
157
|
+
config_path: str = "config.yaml",
|
|
158
|
+
log_path: str = DEFAULT_LOG_PATH,
|
|
159
|
+
) -> dict:
|
|
160
|
+
"""Generate a surgery report."""
|
|
161
|
+
experiments = load_experiments(log_path)
|
|
162
|
+
exp = next((e for e in experiments if e.get("experiment_id") == exp_id), None)
|
|
163
|
+
|
|
164
|
+
if not exp:
|
|
165
|
+
return {"error": f"Experiment {exp_id} not found"}
|
|
166
|
+
|
|
167
|
+
config = exp.get("config", {})
|
|
168
|
+
model_type = config.get("model_type", "unknown")
|
|
169
|
+
hyperparams = config.get("hyperparams", {})
|
|
170
|
+
|
|
171
|
+
plan = plan_operation(operation, config, hyperparams, model_type, op_args)
|
|
172
|
+
|
|
173
|
+
return {
|
|
174
|
+
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
175
|
+
"experiment_id": exp_id,
|
|
176
|
+
"plan": plan,
|
|
177
|
+
"warm_start_from": exp_id,
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def save_surgery_report(report: dict, output_dir: str = "experiments/surgery") -> Path:
|
|
182
|
+
out = Path(output_dir); out.mkdir(parents=True, exist_ok=True)
|
|
183
|
+
exp_id = report.get("experiment_id", "unknown")
|
|
184
|
+
op = report.get("plan", {}).get("operation", "unknown")
|
|
185
|
+
fp = out / f"{exp_id}-{op}.yaml"
|
|
186
|
+
with open(fp, "w") as f: yaml.dump(report, f, default_flow_style=False, sort_keys=False)
|
|
187
|
+
return fp
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def format_surgery_report(report: dict) -> str:
|
|
191
|
+
if "error" in report: return f"ERROR: {report['error']}"
|
|
192
|
+
|
|
193
|
+
plan = report.get("plan", {})
|
|
194
|
+
exp_id = report.get("experiment_id", "?")
|
|
195
|
+
op = plan.get("operation", "?")
|
|
196
|
+
|
|
197
|
+
lines = [f"# Surgery: {op} ({exp_id})", "",
|
|
198
|
+
f"*Generated {report.get('generated_at', 'N/A')[:19]}*",
|
|
199
|
+
f"**Model:** {plan.get('model_type', '?')}", ""]
|
|
200
|
+
|
|
201
|
+
lines.extend(["## Instructions", ""])
|
|
202
|
+
for i, inst in enumerate(plan.get("instructions", []), 1):
|
|
203
|
+
lines.append(f"{i}. {inst}")
|
|
204
|
+
lines.append("")
|
|
205
|
+
|
|
206
|
+
if plan.get("param_change"):
|
|
207
|
+
lines.append(f"**Parameter change:** {plan['param_change']}")
|
|
208
|
+
lines.append("")
|
|
209
|
+
|
|
210
|
+
orig = plan.get("original_config", {})
|
|
211
|
+
new = plan.get("new_config", {})
|
|
212
|
+
changed = {k: (orig.get(k), new[k]) for k in new if orig.get(k) != new.get(k)}
|
|
213
|
+
if changed:
|
|
214
|
+
lines.extend(["## Config Changes", ""])
|
|
215
|
+
for k, (old, new_v) in changed.items():
|
|
216
|
+
lines.append(f"- `{k}`: {old} → {new_v}")
|
|
217
|
+
lines.append("")
|
|
218
|
+
|
|
219
|
+
lines.append(f"**Warm-start from:** {report.get('warm_start_from', '?')}")
|
|
220
|
+
|
|
221
|
+
return "\n".join(lines)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def main() -> None:
|
|
225
|
+
parser = argparse.ArgumentParser(description="Architecture modification")
|
|
226
|
+
parser.add_argument("exp_id")
|
|
227
|
+
parser.add_argument("--op", required=True, help="Operation name")
|
|
228
|
+
parser.add_argument("op_args", nargs="*", help="Operation arguments")
|
|
229
|
+
parser.add_argument("--config", default="config.yaml")
|
|
230
|
+
parser.add_argument("--log", default=DEFAULT_LOG_PATH)
|
|
231
|
+
parser.add_argument("--json", action="store_true")
|
|
232
|
+
args = parser.parse_args()
|
|
233
|
+
report = surgery_report(args.exp_id, args.op, args.op_args, args.config, args.log)
|
|
234
|
+
if "error" not in report:
|
|
235
|
+
fp = save_surgery_report(report); print(f"Saved to {fp}", file=sys.stderr)
|
|
236
|
+
print(json.dumps(report, indent=2, default=str) if args.json else format_surgery_report(report))
|
|
237
|
+
|
|
238
|
+
if __name__ == "__main__": main()
|
|
@@ -0,0 +1,436 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Citation and attribution manager for the autoresearch pipeline.
|
|
3
|
+
|
|
4
|
+
Tracks academic citations associated with experiments. Every method,
|
|
5
|
+
dataset, technique, and codebase used in the research campaign should
|
|
6
|
+
have a citation. This script manages the citation store, audits for
|
|
7
|
+
missing attributions, and generates BibTeX output.
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
python scripts/citation_manager.py add exp-042 --key Chen2016 --title "XGBoost" --url "..."
|
|
11
|
+
python scripts/citation_manager.py list
|
|
12
|
+
python scripts/citation_manager.py check
|
|
13
|
+
python scripts/citation_manager.py bib
|
|
14
|
+
python scripts/citation_manager.py --json
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import argparse
|
|
20
|
+
import json
|
|
21
|
+
import re
|
|
22
|
+
import sys
|
|
23
|
+
from datetime import datetime, timezone
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
import yaml
|
|
27
|
+
|
|
28
|
+
from scripts.turing_io import load_config, load_experiments
|
|
29
|
+
|
|
30
|
+
DEFAULT_LOG_PATH = "experiments/log.jsonl"
|
|
31
|
+
DEFAULT_CITATIONS_PATH = "experiments/citations.yaml"
|
|
32
|
+
VALID_TYPES = ["method", "dataset", "technique", "codebase"]
|
|
33
|
+
|
|
34
|
+
# Keywords that suggest a method/technique needing citation
|
|
35
|
+
METHOD_KEYWORDS = {
|
|
36
|
+
"xgboost": "XGBoost (Chen & Guestrin, 2016)",
|
|
37
|
+
"lightgbm": "LightGBM (Ke et al., 2017)",
|
|
38
|
+
"catboost": "CatBoost (Prokhorenkova et al., 2018)",
|
|
39
|
+
"random_forest": "Random Forest (Breiman, 2001)",
|
|
40
|
+
"gradient_boosting": "Gradient Boosting (Friedman, 2001)",
|
|
41
|
+
"adam": "Adam optimizer (Kingma & Ba, 2015)",
|
|
42
|
+
"sgd": "SGD with momentum (Sutskever et al., 2013)",
|
|
43
|
+
"dropout": "Dropout (Srivastava et al., 2014)",
|
|
44
|
+
"batch_norm": "Batch Normalization (Ioffe & Szegedy, 2015)",
|
|
45
|
+
"resnet": "ResNet (He et al., 2016)",
|
|
46
|
+
"transformer": "Transformer (Vaswani et al., 2017)",
|
|
47
|
+
"bert": "BERT (Devlin et al., 2019)",
|
|
48
|
+
"lstm": "LSTM (Hochreiter & Schmidhuber, 1997)",
|
|
49
|
+
"svm": "SVM (Cortes & Vapnik, 1995)",
|
|
50
|
+
"lasso": "Lasso (Tibshirani, 1996)",
|
|
51
|
+
"ridge": "Ridge Regression (Hoerl & Kennard, 1970)",
|
|
52
|
+
"elastic_net": "Elastic Net (Zou & Hastie, 2005)",
|
|
53
|
+
"pca": "PCA (Pearson, 1901)",
|
|
54
|
+
"tsne": "t-SNE (van der Maaten & Hinton, 2008)",
|
|
55
|
+
"umap": "UMAP (McInnes et al., 2018)",
|
|
56
|
+
"cross_validation": "Cross-validation (Stone, 1974)",
|
|
57
|
+
"smote": "SMOTE (Chawla et al., 2002)",
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# --- Storage ---
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def load_citations(path: str = DEFAULT_CITATIONS_PATH) -> list[dict]:
|
|
65
|
+
"""Load citations from YAML file."""
|
|
66
|
+
p = Path(path)
|
|
67
|
+
if not p.exists() or p.stat().st_size == 0:
|
|
68
|
+
return []
|
|
69
|
+
with open(p) as f:
|
|
70
|
+
data = yaml.safe_load(f)
|
|
71
|
+
return data if isinstance(data, list) else []
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def save_citations(citations: list[dict], path: str = DEFAULT_CITATIONS_PATH) -> Path:
|
|
75
|
+
"""Save citations list to YAML."""
|
|
76
|
+
p = Path(path)
|
|
77
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
78
|
+
with open(p, "w") as f:
|
|
79
|
+
yaml.dump(citations, f, default_flow_style=False, sort_keys=False)
|
|
80
|
+
return p
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# --- Operations ---
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def add_citation(
|
|
87
|
+
experiment_id: str,
|
|
88
|
+
key: str,
|
|
89
|
+
title: str,
|
|
90
|
+
authors: str | None = None,
|
|
91
|
+
year: int | None = None,
|
|
92
|
+
url: str | None = None,
|
|
93
|
+
doi: str | None = None,
|
|
94
|
+
cite_type: str = "method",
|
|
95
|
+
citations_path: str = DEFAULT_CITATIONS_PATH,
|
|
96
|
+
log_path: str = DEFAULT_LOG_PATH,
|
|
97
|
+
) -> dict:
|
|
98
|
+
"""Add or update a citation, associating it with an experiment.
|
|
99
|
+
|
|
100
|
+
If the citation key already exists, the experiment is appended to
|
|
101
|
+
its experiment list. Otherwise a new citation entry is created.
|
|
102
|
+
"""
|
|
103
|
+
experiments = load_experiments(log_path)
|
|
104
|
+
known_ids = {e.get("experiment_id") for e in experiments}
|
|
105
|
+
if experiment_id not in known_ids:
|
|
106
|
+
return {"error": f"Experiment '{experiment_id}' not found in log"}
|
|
107
|
+
|
|
108
|
+
if cite_type not in VALID_TYPES:
|
|
109
|
+
return {"error": f"Invalid type '{cite_type}'. Valid: {VALID_TYPES}"}
|
|
110
|
+
|
|
111
|
+
citations = load_citations(citations_path)
|
|
112
|
+
|
|
113
|
+
# Check if key already exists
|
|
114
|
+
existing = None
|
|
115
|
+
for c in citations:
|
|
116
|
+
if c.get("key") == key:
|
|
117
|
+
existing = c
|
|
118
|
+
break
|
|
119
|
+
|
|
120
|
+
if existing:
|
|
121
|
+
if experiment_id not in existing.get("experiments", []):
|
|
122
|
+
existing.setdefault("experiments", []).append(experiment_id)
|
|
123
|
+
# Update fields if provided
|
|
124
|
+
if authors:
|
|
125
|
+
existing["authors"] = authors
|
|
126
|
+
if year:
|
|
127
|
+
existing["year"] = year
|
|
128
|
+
if url:
|
|
129
|
+
existing["url"] = url
|
|
130
|
+
if doi:
|
|
131
|
+
existing["doi"] = doi
|
|
132
|
+
save_citations(citations, citations_path)
|
|
133
|
+
return {"action": "updated", "citation": existing}
|
|
134
|
+
|
|
135
|
+
citation = {
|
|
136
|
+
"key": key,
|
|
137
|
+
"title": title,
|
|
138
|
+
"authors": authors or "",
|
|
139
|
+
"year": year or 0,
|
|
140
|
+
"url": url or "",
|
|
141
|
+
"doi": doi or "",
|
|
142
|
+
"type": cite_type,
|
|
143
|
+
"experiments": [experiment_id],
|
|
144
|
+
}
|
|
145
|
+
citations.append(citation)
|
|
146
|
+
save_citations(citations, citations_path)
|
|
147
|
+
return {"action": "added", "citation": citation}
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def list_citations(
|
|
151
|
+
citations_path: str = DEFAULT_CITATIONS_PATH,
|
|
152
|
+
) -> dict:
|
|
153
|
+
"""List all citations grouped by type with experiment associations."""
|
|
154
|
+
citations = load_citations(citations_path)
|
|
155
|
+
grouped: dict[str, list[dict]] = {}
|
|
156
|
+
for c in citations:
|
|
157
|
+
ctype = c.get("type", "unknown")
|
|
158
|
+
grouped.setdefault(ctype, []).append(c)
|
|
159
|
+
return {
|
|
160
|
+
"total": len(citations),
|
|
161
|
+
"by_type": grouped,
|
|
162
|
+
"citations": citations,
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def check_citations(
|
|
167
|
+
citations_path: str = DEFAULT_CITATIONS_PATH,
|
|
168
|
+
log_path: str = DEFAULT_LOG_PATH,
|
|
169
|
+
config_path: str = "config.yaml",
|
|
170
|
+
) -> dict:
|
|
171
|
+
"""Audit for missing citations — methods used without attribution.
|
|
172
|
+
|
|
173
|
+
Scans experiment configs and descriptions for known method keywords
|
|
174
|
+
that lack a corresponding citation entry.
|
|
175
|
+
"""
|
|
176
|
+
citations = load_citations(citations_path)
|
|
177
|
+
experiments = load_experiments(log_path)
|
|
178
|
+
config = load_config(config_path)
|
|
179
|
+
|
|
180
|
+
cited_keys = {c.get("key", "").lower() for c in citations}
|
|
181
|
+
cited_titles = {c.get("title", "").lower() for c in citations}
|
|
182
|
+
|
|
183
|
+
missing: list[dict] = []
|
|
184
|
+
covered: list[str] = []
|
|
185
|
+
|
|
186
|
+
for keyword, suggestion in METHOD_KEYWORDS.items():
|
|
187
|
+
# Check if this keyword appears in any experiment
|
|
188
|
+
found_in: list[str] = []
|
|
189
|
+
for exp in experiments:
|
|
190
|
+
searchable = json.dumps(exp, default=str).lower()
|
|
191
|
+
if keyword.lower() in searchable:
|
|
192
|
+
found_in.append(exp.get("experiment_id", "?"))
|
|
193
|
+
|
|
194
|
+
# Also check config
|
|
195
|
+
config_str = json.dumps(config, default=str).lower()
|
|
196
|
+
if keyword.lower() in config_str:
|
|
197
|
+
found_in.append("config.yaml")
|
|
198
|
+
|
|
199
|
+
if not found_in:
|
|
200
|
+
continue
|
|
201
|
+
|
|
202
|
+
# Check if cited
|
|
203
|
+
is_cited = (
|
|
204
|
+
keyword.lower() in cited_keys
|
|
205
|
+
or keyword.lower() in cited_titles
|
|
206
|
+
or any(keyword.lower() in c.get("title", "").lower() for c in citations)
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
if is_cited:
|
|
210
|
+
covered.append(keyword)
|
|
211
|
+
else:
|
|
212
|
+
missing.append({
|
|
213
|
+
"keyword": keyword,
|
|
214
|
+
"suggestion": suggestion,
|
|
215
|
+
"found_in": found_in,
|
|
216
|
+
})
|
|
217
|
+
|
|
218
|
+
return {
|
|
219
|
+
"missing": missing,
|
|
220
|
+
"covered": covered,
|
|
221
|
+
"total_checked": len(METHOD_KEYWORDS),
|
|
222
|
+
"coverage": f"{len(covered)}/{len(covered) + len(missing)}" if (covered or missing) else "N/A",
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def generate_bibtex(citations_path: str = DEFAULT_CITATIONS_PATH) -> str:
|
|
227
|
+
"""Generate BibTeX output from all citations."""
|
|
228
|
+
citations = load_citations(citations_path)
|
|
229
|
+
if not citations:
|
|
230
|
+
return "% No citations found.\n"
|
|
231
|
+
|
|
232
|
+
entries = []
|
|
233
|
+
for c in citations:
|
|
234
|
+
key = c.get("key", "unknown")
|
|
235
|
+
title = c.get("title", "")
|
|
236
|
+
authors = c.get("authors", "")
|
|
237
|
+
year = c.get("year", 0)
|
|
238
|
+
url = c.get("url", "")
|
|
239
|
+
doi = c.get("doi", "")
|
|
240
|
+
|
|
241
|
+
# Determine entry type
|
|
242
|
+
entry_type = "misc"
|
|
243
|
+
if doi:
|
|
244
|
+
entry_type = "article"
|
|
245
|
+
|
|
246
|
+
lines = [f"@{entry_type}{{{key},"]
|
|
247
|
+
lines.append(f" title = {{{title}}},")
|
|
248
|
+
if authors:
|
|
249
|
+
lines.append(f" author = {{{authors}}},")
|
|
250
|
+
if year:
|
|
251
|
+
lines.append(f" year = {{{year}}},")
|
|
252
|
+
if url:
|
|
253
|
+
lines.append(f" url = {{{url}}},")
|
|
254
|
+
if doi:
|
|
255
|
+
lines.append(f" doi = {{{doi}}},")
|
|
256
|
+
note = f"Type: {c.get('type', 'unknown')}. Used in: {', '.join(c.get('experiments', []))}"
|
|
257
|
+
lines.append(f" note = {{{note}}},")
|
|
258
|
+
lines.append("}")
|
|
259
|
+
entries.append("\n".join(lines))
|
|
260
|
+
|
|
261
|
+
header = f"% Auto-generated by Turing citation manager\n% {len(entries)} citation(s)\n"
|
|
262
|
+
return header + "\n\n".join(entries) + "\n"
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
# --- Report ---
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def format_citations_report(result: dict, action: str) -> str:
|
|
269
|
+
"""Format citation results as readable text."""
|
|
270
|
+
lines: list[str] = []
|
|
271
|
+
|
|
272
|
+
if action == "list":
|
|
273
|
+
total = result.get("total", 0)
|
|
274
|
+
lines.append(f"# Citations ({total} total)")
|
|
275
|
+
lines.append("")
|
|
276
|
+
by_type = result.get("by_type", {})
|
|
277
|
+
for ctype in VALID_TYPES:
|
|
278
|
+
cites = by_type.get(ctype, [])
|
|
279
|
+
if not cites:
|
|
280
|
+
continue
|
|
281
|
+
lines.append(f"## {ctype.title()} ({len(cites)})")
|
|
282
|
+
lines.append("")
|
|
283
|
+
for c in cites:
|
|
284
|
+
key = c.get("key", "?")
|
|
285
|
+
title = c.get("title", "?")
|
|
286
|
+
authors = c.get("authors", "")
|
|
287
|
+
year = c.get("year", "")
|
|
288
|
+
exps = ", ".join(c.get("experiments", []))
|
|
289
|
+
author_year = f" ({authors}, {year})" if authors and year else ""
|
|
290
|
+
lines.append(f"- **[{key}]** {title}{author_year}")
|
|
291
|
+
if exps:
|
|
292
|
+
lines.append(f" Experiments: {exps}")
|
|
293
|
+
lines.append("")
|
|
294
|
+
|
|
295
|
+
elif action == "check":
|
|
296
|
+
missing = result.get("missing", [])
|
|
297
|
+
covered = result.get("covered", [])
|
|
298
|
+
coverage = result.get("coverage", "N/A")
|
|
299
|
+
lines.append(f"# Citation Audit (coverage: {coverage})")
|
|
300
|
+
lines.append("")
|
|
301
|
+
if missing:
|
|
302
|
+
lines.append(f"## Missing Citations ({len(missing)})")
|
|
303
|
+
lines.append("")
|
|
304
|
+
for m in missing:
|
|
305
|
+
lines.append(f"- **{m['keyword']}**: {m['suggestion']}")
|
|
306
|
+
lines.append(f" Found in: {', '.join(m['found_in'])}")
|
|
307
|
+
lines.append("")
|
|
308
|
+
if covered:
|
|
309
|
+
lines.append(f"## Covered ({len(covered)})")
|
|
310
|
+
lines.append("")
|
|
311
|
+
for kw in covered:
|
|
312
|
+
lines.append(f"- {kw}")
|
|
313
|
+
lines.append("")
|
|
314
|
+
if not missing:
|
|
315
|
+
lines.append("All detected methods have citations.")
|
|
316
|
+
|
|
317
|
+
elif action == "add":
|
|
318
|
+
cite = result.get("citation", {})
|
|
319
|
+
act = result.get("action", "added")
|
|
320
|
+
lines.append(f"Citation {act}: [{cite.get('key')}] {cite.get('title')}")
|
|
321
|
+
lines.append(f" Type: {cite.get('type')} | Experiments: {', '.join(cite.get('experiments', []))}")
|
|
322
|
+
|
|
323
|
+
return "\n".join(lines)
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def save_citations_report(report: dict, path: str = "experiments/citations") -> Path:
|
|
327
|
+
"""Save citation report to YAML."""
|
|
328
|
+
p = Path(path)
|
|
329
|
+
p.mkdir(parents=True, exist_ok=True)
|
|
330
|
+
out = p / f"report-{datetime.now(timezone.utc).strftime('%Y%m%d-%H%M%S')}.yaml"
|
|
331
|
+
with open(out, "w") as f:
|
|
332
|
+
yaml.dump(report, f, default_flow_style=False, sort_keys=False)
|
|
333
|
+
return out
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
# --- Orchestration ---
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def run_citation_manager(
|
|
340
|
+
action: str,
|
|
341
|
+
experiment_id: str | None = None,
|
|
342
|
+
key: str | None = None,
|
|
343
|
+
title: str | None = None,
|
|
344
|
+
authors: str | None = None,
|
|
345
|
+
year: int | None = None,
|
|
346
|
+
url: str | None = None,
|
|
347
|
+
doi: str | None = None,
|
|
348
|
+
cite_type: str = "method",
|
|
349
|
+
citations_path: str = DEFAULT_CITATIONS_PATH,
|
|
350
|
+
log_path: str = DEFAULT_LOG_PATH,
|
|
351
|
+
config_path: str = "config.yaml",
|
|
352
|
+
) -> dict:
|
|
353
|
+
"""Run citation manager operation."""
|
|
354
|
+
timestamp = datetime.now(timezone.utc).isoformat()
|
|
355
|
+
|
|
356
|
+
if action == "add":
|
|
357
|
+
if not experiment_id or not key or not title:
|
|
358
|
+
return {"error": "add requires experiment_id, --key, and --title"}
|
|
359
|
+
result = add_citation(
|
|
360
|
+
experiment_id, key, title, authors, year, url, doi,
|
|
361
|
+
cite_type, citations_path, log_path,
|
|
362
|
+
)
|
|
363
|
+
if "error" in result:
|
|
364
|
+
return {"timestamp": timestamp, **result}
|
|
365
|
+
return {"timestamp": timestamp, "action": "add", **result}
|
|
366
|
+
|
|
367
|
+
elif action == "list":
|
|
368
|
+
result = list_citations(citations_path)
|
|
369
|
+
return {"timestamp": timestamp, "action": "list", **result}
|
|
370
|
+
|
|
371
|
+
elif action == "check":
|
|
372
|
+
result = check_citations(citations_path, log_path, config_path)
|
|
373
|
+
return {"timestamp": timestamp, "action": "check", **result}
|
|
374
|
+
|
|
375
|
+
elif action == "bib":
|
|
376
|
+
bibtex = generate_bibtex(citations_path)
|
|
377
|
+
return {"timestamp": timestamp, "action": "bib", "bibtex": bibtex,
|
|
378
|
+
"count": len(load_citations(citations_path))}
|
|
379
|
+
|
|
380
|
+
return {"error": f"Unknown action: {action}"}
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def main() -> None:
|
|
384
|
+
"""CLI entry point."""
|
|
385
|
+
parser = argparse.ArgumentParser(
|
|
386
|
+
description="Citation and attribution manager for ML experiments",
|
|
387
|
+
)
|
|
388
|
+
parser.add_argument("action", choices=["add", "list", "check", "bib"],
|
|
389
|
+
help="Citation action")
|
|
390
|
+
parser.add_argument("experiment_id", nargs="?", default=None,
|
|
391
|
+
help="Experiment ID (for add)")
|
|
392
|
+
parser.add_argument("--key", default=None, help="Citation key (e.g., Chen2016)")
|
|
393
|
+
parser.add_argument("--title", default=None, help="Paper/resource title")
|
|
394
|
+
parser.add_argument("--authors", default=None, help="Author list")
|
|
395
|
+
parser.add_argument("--year", type=int, default=None, help="Publication year")
|
|
396
|
+
parser.add_argument("--url", default=None, help="URL to paper/resource")
|
|
397
|
+
parser.add_argument("--doi", default=None, help="DOI identifier")
|
|
398
|
+
parser.add_argument("--type", dest="cite_type", default="method",
|
|
399
|
+
choices=VALID_TYPES, help="Citation type")
|
|
400
|
+
parser.add_argument("--config", default="config.yaml", help="Path to config.yaml")
|
|
401
|
+
parser.add_argument("--log", default=DEFAULT_LOG_PATH, help="Path to experiment log")
|
|
402
|
+
parser.add_argument("--citations-path", default=DEFAULT_CITATIONS_PATH,
|
|
403
|
+
help="Path to citations YAML")
|
|
404
|
+
parser.add_argument("--json", action="store_true", help="Output raw JSON")
|
|
405
|
+
args = parser.parse_args()
|
|
406
|
+
|
|
407
|
+
report = run_citation_manager(
|
|
408
|
+
action=args.action,
|
|
409
|
+
experiment_id=args.experiment_id,
|
|
410
|
+
key=args.key,
|
|
411
|
+
title=args.title,
|
|
412
|
+
authors=args.authors,
|
|
413
|
+
year=args.year,
|
|
414
|
+
url=args.url,
|
|
415
|
+
doi=args.doi,
|
|
416
|
+
cite_type=args.cite_type,
|
|
417
|
+
citations_path=args.citations_path,
|
|
418
|
+
log_path=args.log,
|
|
419
|
+
config_path=args.config,
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
if args.json:
|
|
423
|
+
print(json.dumps(report, indent=2, default=str))
|
|
424
|
+
else:
|
|
425
|
+
if "error" in report:
|
|
426
|
+
print(f"ERROR: {report['error']}", file=sys.stderr)
|
|
427
|
+
sys.exit(1)
|
|
428
|
+
action = report.get("action", "")
|
|
429
|
+
if action == "bib":
|
|
430
|
+
print(report["bibtex"])
|
|
431
|
+
else:
|
|
432
|
+
print(format_citations_report(report, action))
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
if __name__ == "__main__":
|
|
436
|
+
main()
|