atris 2.6.0 → 2.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/GETTING_STARTED.md +2 -2
- package/atris/GETTING_STARTED.md +2 -2
- package/bin/atris.js +35 -4
- package/commands/business.js +244 -2
- package/commands/context-sync.js +228 -0
- package/commands/pull.js +176 -50
- package/commands/push.js +154 -61
- package/commands/setup.js +178 -0
- package/commands/workspace-clean.js +249 -0
- package/lib/manifest.js +224 -0
- package/lib/section-merge.js +196 -0
- package/package.json +9 -4
- package/utils/api.js +9 -1
- package/utils/update-check.js +11 -11
- package/AGENT.md +0 -35
- package/atris/experiments/README.md +0 -118
- package/atris/experiments/_examples/smoke-keep-revert/README.md +0 -45
- package/atris/experiments/_examples/smoke-keep-revert/candidate.py +0 -8
- package/atris/experiments/_examples/smoke-keep-revert/loop.py +0 -129
- package/atris/experiments/_examples/smoke-keep-revert/measure.py +0 -47
- package/atris/experiments/_examples/smoke-keep-revert/program.md +0 -3
- package/atris/experiments/_examples/smoke-keep-revert/proposals/bad_patch.py +0 -19
- package/atris/experiments/_examples/smoke-keep-revert/proposals/fix_patch.py +0 -22
- package/atris/experiments/_examples/smoke-keep-revert/reset.py +0 -21
- package/atris/experiments/_examples/smoke-keep-revert/results.tsv +0 -5
- package/atris/experiments/_examples/smoke-keep-revert/visual.svg +0 -52
- package/atris/experiments/_fixtures/invalid/BadName/loop.py +0 -1
- package/atris/experiments/_fixtures/invalid/BadName/program.md +0 -3
- package/atris/experiments/_fixtures/invalid/BadName/results.tsv +0 -1
- package/atris/experiments/_fixtures/invalid/bloated-context/loop.py +0 -1
- package/atris/experiments/_fixtures/invalid/bloated-context/measure.py +0 -1
- package/atris/experiments/_fixtures/invalid/bloated-context/program.md +0 -6
- package/atris/experiments/_fixtures/invalid/bloated-context/results.tsv +0 -1
- package/atris/experiments/_fixtures/valid/good-experiment/loop.py +0 -1
- package/atris/experiments/_fixtures/valid/good-experiment/measure.py +0 -1
- package/atris/experiments/_fixtures/valid/good-experiment/program.md +0 -3
- package/atris/experiments/_fixtures/valid/good-experiment/results.tsv +0 -1
- package/atris/experiments/_template/pack/loop.py +0 -3
- package/atris/experiments/_template/pack/measure.py +0 -13
- package/atris/experiments/_template/pack/program.md +0 -3
- package/atris/experiments/_template/pack/reset.py +0 -3
- package/atris/experiments/_template/pack/results.tsv +0 -1
- package/atris/experiments/benchmark_runtime.py +0 -81
- package/atris/experiments/benchmark_validate.py +0 -70
- package/atris/experiments/validate.py +0 -92
- package/atris/team/navigator/journal/2026-02-23.md +0 -6
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
"""A deliberately bad mutation that should be reverted."""
|
|
2
|
-
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
import os
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
TARGET = Path(os.environ["EXPERIMENT_TARGET"])
|
|
8
|
-
|
|
9
|
-
TARGET.write_text(
|
|
10
|
-
'''"""Bounded mutation target for the smoke experiment."""
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def count_words(text: str) -> int:
|
|
14
|
-
return 0
|
|
15
|
-
''',
|
|
16
|
-
encoding="utf-8",
|
|
17
|
-
)
|
|
18
|
-
|
|
19
|
-
print("applied bad proposal")
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
"""A good mutation that should be kept."""
|
|
2
|
-
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
import os
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
TARGET = Path(os.environ["EXPERIMENT_TARGET"])
|
|
8
|
-
|
|
9
|
-
TARGET.write_text(
|
|
10
|
-
'''"""Bounded mutation target for the smoke experiment."""
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def count_words(text: str) -> int:
|
|
14
|
-
cleaned = text.strip()
|
|
15
|
-
if not cleaned:
|
|
16
|
-
return 0
|
|
17
|
-
return len(cleaned.split())
|
|
18
|
-
''',
|
|
19
|
-
encoding="utf-8",
|
|
20
|
-
)
|
|
21
|
-
|
|
22
|
-
print("applied good proposal")
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
"""Restore the smoke example to its baseline."""
|
|
2
|
-
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
TARGET = Path(__file__).resolve().parent / "candidate.py"
|
|
7
|
-
|
|
8
|
-
TARGET.write_text(
|
|
9
|
-
'''"""Bounded mutation target for the smoke experiment."""
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def count_words(text: str) -> int:
|
|
13
|
-
cleaned = text.strip()
|
|
14
|
-
if not cleaned:
|
|
15
|
-
return 0
|
|
16
|
-
return len(cleaned)
|
|
17
|
-
''',
|
|
18
|
-
encoding="utf-8",
|
|
19
|
-
)
|
|
20
|
-
|
|
21
|
-
print("reset smoke-keep-revert to baseline")
|
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
timestamp trial status old_score new_score proposal description
|
|
2
|
-
2026-03-11T11:05:17.887045+00:00 1 reverted 0.2000 0.2000 bad_patch.py applied bad proposal
|
|
3
|
-
2026-03-11T11:05:17.920737+00:00 2 kept 0.2000 1.0000 fix_patch.py applied good proposal
|
|
4
|
-
2026-03-11T11:05:40.063680+00:00 1 reverted 0.2000 0.2000 bad_patch.py applied bad proposal
|
|
5
|
-
2026-03-11T11:05:40.097842+00:00 2 kept 0.2000 1.0000 fix_patch.py applied good proposal
|
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
<svg width="980" height="260" viewBox="0 0 980 260" fill="none" xmlns="http://www.w3.org/2000/svg">
|
|
2
|
-
<rect width="980" height="260" fill="#F7F5EF"/>
|
|
3
|
-
<text x="40" y="42" font-family="Helvetica, Arial, sans-serif" font-size="28" font-weight="700" fill="#111111">Smoke Keep/Revert</text>
|
|
4
|
-
<text x="40" y="68" font-family="Helvetica, Arial, sans-serif" font-size="14" fill="#4B5563">One bounded target. One hard metric. Reject the loser. Keep the winner.</text>
|
|
5
|
-
|
|
6
|
-
<rect x="40" y="110" width="150" height="88" rx="16" fill="#FFF7ED" stroke="#C2410C" stroke-width="2"/>
|
|
7
|
-
<text x="115" y="140" text-anchor="middle" font-family="Helvetica, Arial, sans-serif" font-size="18" font-weight="700" fill="#9A3412">Broken Target</text>
|
|
8
|
-
<text x="115" y="166" text-anchor="middle" font-family="Helvetica, Arial, sans-serif" font-size="14" fill="#7C2D12">candidate.py</text>
|
|
9
|
-
<text x="115" y="186" text-anchor="middle" font-family="Helvetica, Arial, sans-serif" font-size="14" fill="#7C2D12">buggy on purpose</text>
|
|
10
|
-
|
|
11
|
-
<rect x="220" y="110" width="150" height="88" rx="16" fill="#EFF6FF" stroke="#1D4ED8" stroke-width="2"/>
|
|
12
|
-
<text x="295" y="140" text-anchor="middle" font-family="Helvetica, Arial, sans-serif" font-size="18" font-weight="700" fill="#1E3A8A">Measure</text>
|
|
13
|
-
<text x="295" y="166" text-anchor="middle" font-family="Helvetica, Arial, sans-serif" font-size="14" fill="#1D4ED8">score = 0.2</text>
|
|
14
|
-
<text x="295" y="186" text-anchor="middle" font-family="Helvetica, Arial, sans-serif" font-size="14" fill="#1D4ED8">baseline truth</text>
|
|
15
|
-
|
|
16
|
-
<rect x="400" y="38" width="160" height="72" rx="16" fill="#FEF2F2" stroke="#DC2626" stroke-width="2"/>
|
|
17
|
-
<text x="480" y="66" text-anchor="middle" font-family="Helvetica, Arial, sans-serif" font-size="18" font-weight="700" fill="#991B1B">Bad Patch</text>
|
|
18
|
-
<text x="480" y="92" text-anchor="middle" font-family="Helvetica, Arial, sans-serif" font-size="14" fill="#B91C1C">score falls to 0.0</text>
|
|
19
|
-
|
|
20
|
-
<rect x="400" y="150" width="160" height="72" rx="16" fill="#ECFDF5" stroke="#059669" stroke-width="2"/>
|
|
21
|
-
<text x="480" y="178" text-anchor="middle" font-family="Helvetica, Arial, sans-serif" font-size="18" font-weight="700" fill="#065F46">Good Patch</text>
|
|
22
|
-
<text x="480" y="204" text-anchor="middle" font-family="Helvetica, Arial, sans-serif" font-size="14" fill="#047857">score rises to 1.0</text>
|
|
23
|
-
|
|
24
|
-
<rect x="610" y="38" width="150" height="72" rx="16" fill="#FEE2E2" stroke="#DC2626" stroke-width="2"/>
|
|
25
|
-
<text x="685" y="66" text-anchor="middle" font-family="Helvetica, Arial, sans-serif" font-size="18" font-weight="700" fill="#991B1B">REVERT</text>
|
|
26
|
-
<text x="685" y="92" text-anchor="middle" font-family="Helvetica, Arial, sans-serif" font-size="14" fill="#B91C1C">reject loser</text>
|
|
27
|
-
|
|
28
|
-
<rect x="610" y="150" width="150" height="72" rx="16" fill="#DCFCE7" stroke="#16A34A" stroke-width="2"/>
|
|
29
|
-
<text x="685" y="178" text-anchor="middle" font-family="Helvetica, Arial, sans-serif" font-size="18" font-weight="700" fill="#166534">KEEP</text>
|
|
30
|
-
<text x="685" y="204" text-anchor="middle" font-family="Helvetica, Arial, sans-serif" font-size="14" fill="#15803D">accept winner</text>
|
|
31
|
-
|
|
32
|
-
<rect x="800" y="110" width="140" height="88" rx="16" fill="#F0FDF4" stroke="#16A34A" stroke-width="2"/>
|
|
33
|
-
<text x="870" y="140" text-anchor="middle" font-family="Helvetica, Arial, sans-serif" font-size="18" font-weight="700" fill="#166534">Final State</text>
|
|
34
|
-
<text x="870" y="166" text-anchor="middle" font-family="Helvetica, Arial, sans-serif" font-size="14" fill="#15803D">fixed target</text>
|
|
35
|
-
<text x="870" y="186" text-anchor="middle" font-family="Helvetica, Arial, sans-serif" font-size="14" fill="#15803D">score = 1.0</text>
|
|
36
|
-
|
|
37
|
-
<path d="M190 154H220" stroke="#6B7280" stroke-width="3"/>
|
|
38
|
-
<path d="M365 154H385" stroke="#6B7280" stroke-width="3"/>
|
|
39
|
-
<path d="M560 74H600" stroke="#DC2626" stroke-width="3"/>
|
|
40
|
-
<path d="M560 186H600" stroke="#16A34A" stroke-width="3"/>
|
|
41
|
-
<path d="M760 154H790" stroke="#6B7280" stroke-width="3"/>
|
|
42
|
-
|
|
43
|
-
<path d="M480 110V138" stroke="#6B7280" stroke-width="3" stroke-dasharray="8 8"/>
|
|
44
|
-
<path d="M295 154C340 154 350 74 400 74" stroke="#DC2626" stroke-width="3" fill="none"/>
|
|
45
|
-
<path d="M295 154C340 154 350 186 400 186" stroke="#16A34A" stroke-width="3" fill="none"/>
|
|
46
|
-
|
|
47
|
-
<polygon points="220,154 210,148 210,160" fill="#6B7280"/>
|
|
48
|
-
<polygon points="385,154 375,148 375,160" fill="#6B7280"/>
|
|
49
|
-
<polygon points="600,74 590,68 590,80" fill="#DC2626"/>
|
|
50
|
-
<polygon points="600,186 590,180 590,192" fill="#16A34A"/>
|
|
51
|
-
<polygon points="790,154 780,148 780,160" fill="#6B7280"/>
|
|
52
|
-
</svg>
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
print("ok")
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
timestamp trial status old_score new_score proposal description
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
print("ok")
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
print("ok")
|
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
# Program
|
|
2
|
-
|
|
3
|
-
This program is intentionally too long.
|
|
4
|
-
|
|
5
|
-
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
|
6
|
-
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
timestamp trial status old_score new_score proposal description
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
print("ok")
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
print("ok")
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
timestamp trial status old_score new_score proposal description
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
timestamp trial status old_score new_score proposal description
|
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
"""Runtime benchmark for example experiment packs."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import json
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
import subprocess
|
|
8
|
-
import sys
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
ROOT = Path(__file__).resolve().parent
|
|
12
|
-
EXAMPLES_DIR = ROOT / "_examples"
|
|
13
|
-
|
|
14
|
-
CASES = [
|
|
15
|
-
{
|
|
16
|
-
"name": "smoke-keep-revert",
|
|
17
|
-
"baseline_below": 1.0,
|
|
18
|
-
"expected_final": 1.0,
|
|
19
|
-
"proposals": ["proposals/bad_patch.py", "proposals/fix_patch.py"],
|
|
20
|
-
},
|
|
21
|
-
]
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def run_python(script: Path, *args: str) -> subprocess.CompletedProcess[str]:
|
|
25
|
-
return subprocess.run(
|
|
26
|
-
[sys.executable, str(script), *args],
|
|
27
|
-
cwd=str(script.parent),
|
|
28
|
-
capture_output=True,
|
|
29
|
-
text=True,
|
|
30
|
-
check=True,
|
|
31
|
-
)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def run_measure(exp_dir: Path) -> dict:
|
|
35
|
-
proc = run_python(exp_dir / "measure.py")
|
|
36
|
-
return json.loads(proc.stdout.strip())
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def main() -> int:
|
|
40
|
-
passed = 0
|
|
41
|
-
failures = []
|
|
42
|
-
|
|
43
|
-
for case in CASES:
|
|
44
|
-
exp_dir = EXAMPLES_DIR / case["name"]
|
|
45
|
-
run_python(exp_dir / "reset.py")
|
|
46
|
-
baseline = run_measure(exp_dir)
|
|
47
|
-
|
|
48
|
-
if float(baseline["score"]) >= case["baseline_below"]:
|
|
49
|
-
failures.append(f"{case['name']}: baseline too high ({baseline['score']})")
|
|
50
|
-
continue
|
|
51
|
-
|
|
52
|
-
proposal_args: list[str] = []
|
|
53
|
-
for proposal in case["proposals"]:
|
|
54
|
-
proposal_args.extend(["--proposal", str(exp_dir / proposal)])
|
|
55
|
-
|
|
56
|
-
run_python(exp_dir / "loop.py", *proposal_args)
|
|
57
|
-
final = run_measure(exp_dir)
|
|
58
|
-
|
|
59
|
-
if float(final["score"]) != case["expected_final"]:
|
|
60
|
-
failures.append(
|
|
61
|
-
f"{case['name']}: final score {final['score']} != {case['expected_final']}"
|
|
62
|
-
)
|
|
63
|
-
continue
|
|
64
|
-
|
|
65
|
-
passed += 1
|
|
66
|
-
|
|
67
|
-
total = len(CASES)
|
|
68
|
-
score = passed / total if total else 0.0
|
|
69
|
-
print(f"SCORE {score:.4f} ({passed}/{total})")
|
|
70
|
-
|
|
71
|
-
if failures:
|
|
72
|
-
for failure in failures:
|
|
73
|
-
print(f"FAIL {failure}")
|
|
74
|
-
return 1
|
|
75
|
-
|
|
76
|
-
print("PASS benchmark_runtime")
|
|
77
|
-
return 0
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
if __name__ == "__main__":
|
|
81
|
-
raise SystemExit(main())
|
|
@@ -1,70 +0,0 @@
|
|
|
1
|
-
"""Benchmark the validator against fixed good/bad fixtures."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
import sys
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
ROOT = Path(__file__).resolve().parent
|
|
10
|
-
if str(ROOT) not in sys.path:
|
|
11
|
-
sys.path.insert(0, str(ROOT))
|
|
12
|
-
|
|
13
|
-
from validate import validate_experiment
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
FIXTURES_DIR = ROOT / "_fixtures"
|
|
17
|
-
|
|
18
|
-
CASES = [
|
|
19
|
-
{
|
|
20
|
-
"path": FIXTURES_DIR / "valid" / "good-experiment",
|
|
21
|
-
"expect_ok": True,
|
|
22
|
-
"must_contain": [],
|
|
23
|
-
},
|
|
24
|
-
{
|
|
25
|
-
"path": FIXTURES_DIR / "invalid" / "BadName",
|
|
26
|
-
"expect_ok": False,
|
|
27
|
-
"must_contain": ["invalid folder name", "missing required file measure.py"],
|
|
28
|
-
},
|
|
29
|
-
{
|
|
30
|
-
"path": FIXTURES_DIR / "invalid" / "bloated-context",
|
|
31
|
-
"expect_ok": False,
|
|
32
|
-
"must_contain": ["program.md too long"],
|
|
33
|
-
},
|
|
34
|
-
]
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def main() -> int:
|
|
38
|
-
passed = 0
|
|
39
|
-
failures = []
|
|
40
|
-
|
|
41
|
-
for case in CASES:
|
|
42
|
-
issues = validate_experiment(case["path"])
|
|
43
|
-
is_ok = not issues
|
|
44
|
-
|
|
45
|
-
if case["expect_ok"] != is_ok:
|
|
46
|
-
failures.append(f"{case['path'].name}: expected ok={case['expect_ok']} got ok={is_ok}")
|
|
47
|
-
continue
|
|
48
|
-
|
|
49
|
-
missing = [needle for needle in case["must_contain"] if not any(needle in issue for issue in issues)]
|
|
50
|
-
if missing:
|
|
51
|
-
failures.append(f"{case['path'].name}: missing expected issue(s): {', '.join(missing)}")
|
|
52
|
-
continue
|
|
53
|
-
|
|
54
|
-
passed += 1
|
|
55
|
-
|
|
56
|
-
total = len(CASES)
|
|
57
|
-
score = passed / total if total else 0.0
|
|
58
|
-
print(f"SCORE {score:.4f} ({passed}/{total})")
|
|
59
|
-
|
|
60
|
-
if failures:
|
|
61
|
-
for failure in failures:
|
|
62
|
-
print(f"FAIL {failure}")
|
|
63
|
-
return 1
|
|
64
|
-
|
|
65
|
-
print("PASS benchmark_validate")
|
|
66
|
-
return 0
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
if __name__ == "__main__":
|
|
70
|
-
raise SystemExit(main())
|
|
@@ -1,92 +0,0 @@
|
|
|
1
|
-
"""Validate experiments for structure and context hygiene."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import argparse
|
|
6
|
-
import re
|
|
7
|
-
from pathlib import Path
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
REQUIRED_FILES = ("program.md", "measure.py", "loop.py", "results.tsv")
|
|
11
|
-
MAX_PROGRAM_CHARS = 1200
|
|
12
|
-
MAX_RESULTS_BYTES = 64_000
|
|
13
|
-
SLUG_RE = re.compile(r"^[a-z0-9]+(?:-[a-z0-9]+)*$")
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def find_experiments(root: Path) -> list[Path]:
|
|
17
|
-
return sorted(
|
|
18
|
-
path
|
|
19
|
-
for path in root.iterdir()
|
|
20
|
-
if path.is_dir() and not path.name.startswith((".", "_"))
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def resolve_experiments(root: Path) -> list[Path]:
|
|
25
|
-
if not root.exists() or not root.is_dir():
|
|
26
|
-
return []
|
|
27
|
-
|
|
28
|
-
# Allow validating a single pack directly, not just a parent directory.
|
|
29
|
-
if any((root / filename).exists() for filename in REQUIRED_FILES):
|
|
30
|
-
return [root]
|
|
31
|
-
|
|
32
|
-
return find_experiments(root)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def validate_experiment(path: Path) -> list[str]:
|
|
36
|
-
issues: list[str] = []
|
|
37
|
-
|
|
38
|
-
if not SLUG_RE.match(path.name):
|
|
39
|
-
issues.append(f"{path.name}: invalid folder name, use lowercase-hyphen slug")
|
|
40
|
-
|
|
41
|
-
for filename in REQUIRED_FILES:
|
|
42
|
-
if not (path / filename).exists():
|
|
43
|
-
issues.append(f"{path.name}: missing required file {filename}")
|
|
44
|
-
|
|
45
|
-
program_path = path / "program.md"
|
|
46
|
-
if program_path.exists():
|
|
47
|
-
size = len(program_path.read_text(encoding="utf-8"))
|
|
48
|
-
if size > MAX_PROGRAM_CHARS:
|
|
49
|
-
issues.append(
|
|
50
|
-
f"{path.name}: program.md too long ({size} chars > {MAX_PROGRAM_CHARS})"
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
results_path = path / "results.tsv"
|
|
54
|
-
if results_path.exists():
|
|
55
|
-
size = results_path.stat().st_size
|
|
56
|
-
if size > MAX_RESULTS_BYTES:
|
|
57
|
-
issues.append(
|
|
58
|
-
f"{path.name}: results.tsv too large ({size} bytes > {MAX_RESULTS_BYTES})"
|
|
59
|
-
)
|
|
60
|
-
|
|
61
|
-
return issues
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def main() -> int:
|
|
65
|
-
parser = argparse.ArgumentParser(description="Validate experiment packs.")
|
|
66
|
-
parser.add_argument("root", nargs="?", default=".", help="Directory containing experiment packs")
|
|
67
|
-
args = parser.parse_args()
|
|
68
|
-
|
|
69
|
-
root = Path(args.root).resolve()
|
|
70
|
-
experiments = resolve_experiments(root)
|
|
71
|
-
if not experiments:
|
|
72
|
-
print("FAIL: no experiments found")
|
|
73
|
-
return 1
|
|
74
|
-
|
|
75
|
-
all_issues: list[str] = []
|
|
76
|
-
for path in experiments:
|
|
77
|
-
all_issues.extend(validate_experiment(path))
|
|
78
|
-
|
|
79
|
-
if all_issues:
|
|
80
|
-
print("FAIL")
|
|
81
|
-
for issue in all_issues:
|
|
82
|
-
print(f"- {issue}")
|
|
83
|
-
return 1
|
|
84
|
-
|
|
85
|
-
print(f"PASS: {len(experiments)} experiment(s) valid")
|
|
86
|
-
for path in experiments:
|
|
87
|
-
print(f"- {path.name}")
|
|
88
|
-
return 0
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
if __name__ == "__main__":
|
|
92
|
-
raise SystemExit(main())
|
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
## Navigator - Feb 23
|
|
2
|
-
|
|
3
|
-
**Task:** Plan the authentication feature
|
|
4
|
-
**Delivered:** build.md with 4 tasks, visualization confirmed
|
|
5
|
-
**User reaction:** Approved plan, asked for more detail on OAuth flow
|
|
6
|
-
**Pattern:** User prefers step-by-step breakdowns over high-level summaries
|