devlyn-cli 1.15.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +104 -0
- package/CLAUDE.md +135 -21
- package/README.md +43 -125
- package/benchmark/auto-resolve/BENCHMARK-DESIGN.md +272 -0
- package/benchmark/auto-resolve/README.md +114 -0
- package/benchmark/auto-resolve/RUBRIC.md +162 -0
- package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/NOTES.md +30 -0
- package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/expected.json +68 -0
- package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/setup.sh +4 -0
- package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/spec.md +45 -0
- package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/task.txt +8 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/NOTES.md +54 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/expected-pair-plan-registry.json +170 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/expected.json +84 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/metadata.json +21 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/pair-plan.sample-fail.json +214 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/pair-plan.sample-pass.json +223 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/setup.sh +5 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/spec.md +56 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/task.txt +14 -0
- package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/NOTES.md +28 -0
- package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/expected-pair-plan-registry.json +162 -0
- package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/expected.json +65 -0
- package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/metadata.json +19 -0
- package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/setup.sh +4 -0
- package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/spec.md +56 -0
- package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/task.txt +9 -0
- package/benchmark/auto-resolve/fixtures/F4-web-browser-design/NOTES.md +40 -0
- package/benchmark/auto-resolve/fixtures/F4-web-browser-design/expected.json +57 -0
- package/benchmark/auto-resolve/fixtures/F4-web-browser-design/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/F4-web-browser-design/setup.sh +6 -0
- package/benchmark/auto-resolve/fixtures/F4-web-browser-design/spec.md +49 -0
- package/benchmark/auto-resolve/fixtures/F4-web-browser-design/task.txt +9 -0
- package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/NOTES.md +38 -0
- package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/expected.json +65 -0
- package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/setup.sh +55 -0
- package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/spec.md +49 -0
- package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/task.txt +7 -0
- package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/NOTES.md +38 -0
- package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/expected.json +77 -0
- package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/setup.sh +4 -0
- package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/spec.md +49 -0
- package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/task.txt +10 -0
- package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/NOTES.md +50 -0
- package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/expected.json +76 -0
- package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/setup.sh +36 -0
- package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/spec.md +46 -0
- package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/task.txt +7 -0
- package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/NOTES.md +50 -0
- package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/expected.json +63 -0
- package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/setup.sh +4 -0
- package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/spec.md +48 -0
- package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/task.txt +1 -0
- package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/NOTES.md +93 -0
- package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/expected.json +74 -0
- package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/setup.sh +28 -0
- package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/spec.md +62 -0
- package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/task.txt +5 -0
- package/benchmark/auto-resolve/fixtures/SCHEMA.md +130 -0
- package/benchmark/auto-resolve/fixtures/test-repo/README.md +27 -0
- package/benchmark/auto-resolve/fixtures/test-repo/bin/cli.js +63 -0
- package/benchmark/auto-resolve/fixtures/test-repo/package-lock.json +823 -0
- package/benchmark/auto-resolve/fixtures/test-repo/package.json +22 -0
- package/benchmark/auto-resolve/fixtures/test-repo/playwright.config.js +17 -0
- package/benchmark/auto-resolve/fixtures/test-repo/server/index.js +37 -0
- package/benchmark/auto-resolve/fixtures/test-repo/tests/cli.test.js +25 -0
- package/benchmark/auto-resolve/fixtures/test-repo/tests/server.test.js +58 -0
- package/benchmark/auto-resolve/fixtures/test-repo/web/index.html +37 -0
- package/benchmark/auto-resolve/scripts/build-pair-eligible-manifest.py +174 -0
- package/benchmark/auto-resolve/scripts/check-f9-artifacts.py +256 -0
- package/benchmark/auto-resolve/scripts/compile-report.py +331 -0
- package/benchmark/auto-resolve/scripts/iter-0033c-compare.py +552 -0
- package/benchmark/auto-resolve/scripts/judge-opus-pass.sh +430 -0
- package/benchmark/auto-resolve/scripts/judge.sh +359 -0
- package/benchmark/auto-resolve/scripts/oracle-scope-tier-a.py +260 -0
- package/benchmark/auto-resolve/scripts/oracle-scope-tier-b.py +274 -0
- package/benchmark/auto-resolve/scripts/oracle-test-fidelity.py +328 -0
- package/benchmark/auto-resolve/scripts/pair-plan-idgen.py +401 -0
- package/benchmark/auto-resolve/scripts/pair-plan-lint.py +468 -0
- package/benchmark/auto-resolve/scripts/run-fixture.sh +691 -0
- package/benchmark/auto-resolve/scripts/run-iter-0033c.sh +234 -0
- package/benchmark/auto-resolve/scripts/run-suite.sh +214 -0
- package/benchmark/auto-resolve/scripts/ship-gate.py +222 -0
- package/bin/devlyn.js +175 -17
- package/config/skills/_shared/adapters/README.md +64 -0
- package/config/skills/_shared/adapters/gpt-5-5.md +29 -0
- package/config/skills/_shared/adapters/opus-4-7.md +29 -0
- package/config/skills/{devlyn:auto-resolve/scripts → _shared}/archive_run.py +26 -0
- package/config/skills/_shared/codex-config.md +54 -0
- package/config/skills/_shared/codex-monitored.sh +141 -0
- package/config/skills/_shared/engine-preflight.md +35 -0
- package/config/skills/_shared/expected.schema.json +93 -0
- package/config/skills/_shared/pair-plan-schema.md +298 -0
- package/config/skills/_shared/runtime-principles.md +110 -0
- package/config/skills/_shared/spec-verify-check.py +519 -0
- package/config/skills/devlyn:ideate/SKILL.md +99 -429
- package/config/skills/devlyn:ideate/references/elicitation.md +97 -0
- package/config/skills/devlyn:ideate/references/from-spec-mode.md +54 -0
- package/config/skills/devlyn:ideate/references/project-mode.md +76 -0
- package/config/skills/devlyn:ideate/references/spec-template.md +102 -0
- package/config/skills/devlyn:resolve/SKILL.md +172 -184
- package/config/skills/devlyn:resolve/references/free-form-mode.md +68 -0
- package/config/skills/devlyn:resolve/references/phases/build-gate.md +45 -0
- package/config/skills/devlyn:resolve/references/phases/cleanup.md +39 -0
- package/config/skills/devlyn:resolve/references/phases/implement.md +42 -0
- package/config/skills/devlyn:resolve/references/phases/plan.md +42 -0
- package/config/skills/devlyn:resolve/references/phases/verify.md +69 -0
- package/config/skills/devlyn:resolve/references/state-schema.md +106 -0
- package/{config/skills → optional-skills}/devlyn:design-system/SKILL.md +1 -0
- package/{config/skills → optional-skills}/devlyn:reap/SKILL.md +1 -0
- package/{config/skills → optional-skills}/devlyn:team-design-ui/SKILL.md +5 -0
- package/package.json +12 -2
- package/scripts/lint-skills.sh +431 -0
- package/config/skills/devlyn:auto-resolve/SKILL.md +0 -252
- package/config/skills/devlyn:auto-resolve/evals/evals.json +0 -21
- package/config/skills/devlyn:auto-resolve/evals/task-doctor-subcommand.md +0 -42
- package/config/skills/devlyn:auto-resolve/references/build-gate.md +0 -130
- package/config/skills/devlyn:auto-resolve/references/engine-routing.md +0 -82
- package/config/skills/devlyn:auto-resolve/references/findings-schema.md +0 -103
- package/config/skills/devlyn:auto-resolve/references/phases/phase-1-build.md +0 -54
- package/config/skills/devlyn:auto-resolve/references/phases/phase-2-evaluate.md +0 -45
- package/config/skills/devlyn:auto-resolve/references/phases/phase-3-critic.md +0 -84
- package/config/skills/devlyn:auto-resolve/references/pipeline-routing.md +0 -114
- package/config/skills/devlyn:auto-resolve/references/pipeline-state.md +0 -201
- package/config/skills/devlyn:auto-resolve/scripts/terminal_verdict.py +0 -96
- package/config/skills/devlyn:browser-validate/SKILL.md +0 -164
- package/config/skills/devlyn:browser-validate/references/flow-testing.md +0 -118
- package/config/skills/devlyn:browser-validate/references/tier1-chrome.md +0 -137
- package/config/skills/devlyn:browser-validate/references/tier2-playwright.md +0 -195
- package/config/skills/devlyn:browser-validate/references/tier3-curl.md +0 -57
- package/config/skills/devlyn:clean/SKILL.md +0 -285
- package/config/skills/devlyn:design-ui/SKILL.md +0 -351
- package/config/skills/devlyn:discover-product/SKILL.md +0 -124
- package/config/skills/devlyn:evaluate/SKILL.md +0 -564
- package/config/skills/devlyn:feature-spec/SKILL.md +0 -630
- package/config/skills/devlyn:ideate/references/challenge-rubric.md +0 -122
- package/config/skills/devlyn:ideate/references/codex-critic-template.md +0 -42
- package/config/skills/devlyn:ideate/references/templates/item-spec.md +0 -90
- package/config/skills/devlyn:implement-ui/SKILL.md +0 -466
- package/config/skills/devlyn:preflight/SKILL.md +0 -355
- package/config/skills/devlyn:preflight/references/auditors/browser-auditor.md +0 -32
- package/config/skills/devlyn:preflight/references/auditors/code-auditor.md +0 -86
- package/config/skills/devlyn:preflight/references/auditors/docs-auditor.md +0 -38
- package/config/skills/devlyn:product-spec/SKILL.md +0 -603
- package/config/skills/devlyn:recommend-features/SKILL.md +0 -286
- package/config/skills/devlyn:review/SKILL.md +0 -161
- package/config/skills/devlyn:team-resolve/SKILL.md +0 -631
- package/config/skills/devlyn:team-review/SKILL.md +0 -493
- package/config/skills/devlyn:update-docs/SKILL.md +0 -463
- package/config/skills/workflow-routing/SKILL.md +0 -73
- /package/{config/skills → optional-skills}/devlyn:reap/scripts/reap.sh +0 -0
- /package/{config/skills → optional-skills}/devlyn:reap/scripts/scan.sh +0 -0
|
@@ -0,0 +1,401 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
pair-plan-idgen.py — emit canonical_id_registry.json for a benchmark fixture.
|
|
4
|
+
|
|
5
|
+
Reads `expected.json` + `metadata.json` from the fixture directory and the
|
|
6
|
+
checked-in oracle scripts' `--list-categories` output (filtered through
|
|
7
|
+
metadata.json:pair_plan_oracle_categories). Produces a deterministic,
|
|
8
|
+
sorted-by-id registry that `pair-plan-lint.py` validates plans against.
|
|
9
|
+
|
|
10
|
+
Hard rules (iter-0022 D2 acceptance gates):
|
|
11
|
+
* NEVER reads any path containing `/results/`. A `builtins.open` /
|
|
12
|
+
`os.open` wrapper raises AssertionError if any code path tries.
|
|
13
|
+
Reading archived run artifacts would leak iter-0020 outcome data into
|
|
14
|
+
the registry source-of-truth, contaminating iter-0023 measurement.
|
|
15
|
+
* Same input → byte-identical output (after fixing the volatile
|
|
16
|
+
`generated_at` field via `--generated-at`). Lint Check 13 enforces.
|
|
17
|
+
* Output JSON is sorted by required_invariants[].id, sort_keys=True for
|
|
18
|
+
every dict, indent=2 for human review, trailing newline for POSIX.
|
|
19
|
+
|
|
20
|
+
See `config/skills/_shared/pair-plan-schema.md` for the full registry
|
|
21
|
+
shape and the slug rules implemented here.
|
|
22
|
+
"""
|
|
23
|
+
import argparse
|
|
24
|
+
import builtins
|
|
25
|
+
import datetime
|
|
26
|
+
import hashlib
|
|
27
|
+
import json
|
|
28
|
+
import os
|
|
29
|
+
import pathlib
|
|
30
|
+
import re
|
|
31
|
+
import subprocess
|
|
32
|
+
import sys
|
|
33
|
+
|
|
34
|
+
ORACLE_SCRIPTS = {
|
|
35
|
+
"test-fidelity": "oracle-test-fidelity.py",
|
|
36
|
+
"scope-tier-a": "oracle-scope-tier-a.py",
|
|
37
|
+
"scope-tier-b": "oracle-scope-tier-b.py",
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
SCHEMA_VERSION = "1"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
# Path trap — refuse any read under a `/results/` directory.
|
|
45
|
+
# ---------------------------------------------------------------------------
|
|
46
|
+
_real_open = builtins.open
|
|
47
|
+
_real_os_open = os.open
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _trap_path(path):
|
|
51
|
+
"""Raise if `path` contains a `/results/` segment. Read-side only — call
|
|
52
|
+
this just before opening for read; write-mode opens skip the check."""
|
|
53
|
+
if isinstance(path, (bytes, bytearray)):
|
|
54
|
+
path = path.decode("utf-8", "replace")
|
|
55
|
+
elif isinstance(path, os.PathLike):
|
|
56
|
+
path = os.fspath(path)
|
|
57
|
+
s = str(path).replace("\\", "/")
|
|
58
|
+
if "/results/" in s:
|
|
59
|
+
raise AssertionError(
|
|
60
|
+
f"pair-plan-idgen.py: forbidden read — {s!r} contains '/results/'. "
|
|
61
|
+
"iter-0022 hard rule: idgen MUST NOT read archived run artifacts. "
|
|
62
|
+
"Registry sources are limited to expected.json + metadata.json + checked-in oracle scripts. "
|
|
63
|
+
"Writes to /results/ are legitimate (e.g. preflight output) and are NOT trapped."
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _is_read_mode(args, kwargs):
|
|
68
|
+
"""`open()` 1st positional or `mode` kwarg. Default 'r' is read."""
|
|
69
|
+
mode = kwargs.get("mode")
|
|
70
|
+
if mode is None and args:
|
|
71
|
+
mode = args[0]
|
|
72
|
+
if mode is None:
|
|
73
|
+
return True # default 'r'
|
|
74
|
+
return isinstance(mode, str) and ("w" not in mode and "a" not in mode and "x" not in mode and "+" not in mode)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _trapped_open(file, *args, **kwargs):
|
|
78
|
+
if _is_read_mode(args, kwargs):
|
|
79
|
+
_trap_path(file)
|
|
80
|
+
return _real_open(file, *args, **kwargs)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _is_read_flags(flags):
|
|
84
|
+
"""`os.open` flags: O_WRONLY | O_RDWR | O_CREAT | O_APPEND | O_TRUNC are write-side."""
|
|
85
|
+
write_bits = (
|
|
86
|
+
getattr(os, "O_WRONLY", 0)
|
|
87
|
+
| getattr(os, "O_RDWR", 0)
|
|
88
|
+
| getattr(os, "O_CREAT", 0)
|
|
89
|
+
| getattr(os, "O_APPEND", 0)
|
|
90
|
+
| getattr(os, "O_TRUNC", 0)
|
|
91
|
+
)
|
|
92
|
+
return (flags & write_bits) == 0
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _trapped_os_open(path, flags, mode=0o777, **kwargs):
|
|
96
|
+
if _is_read_flags(flags):
|
|
97
|
+
_trap_path(path)
|
|
98
|
+
return _real_os_open(path, flags, mode, **kwargs)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def install_path_trap():
|
|
102
|
+
builtins.open = _trapped_open
|
|
103
|
+
os.open = _trapped_os_open
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# ---------------------------------------------------------------------------
|
|
107
|
+
# Slug + sha helpers.
|
|
108
|
+
# ---------------------------------------------------------------------------
|
|
109
|
+
def sanitize(s, max_len):
|
|
110
|
+
s = (s or "").lower()
|
|
111
|
+
s = re.sub(r"[^a-z0-9]+", "_", s)
|
|
112
|
+
s = s.strip("_")
|
|
113
|
+
return s[:max_len]
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def canonical_compact_json(obj):
|
|
117
|
+
return json.dumps(
|
|
118
|
+
obj,
|
|
119
|
+
sort_keys=True,
|
|
120
|
+
separators=(",", ":"),
|
|
121
|
+
ensure_ascii=False,
|
|
122
|
+
allow_nan=False,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def sha8(s):
|
|
127
|
+
return hashlib.sha256(s.encode("utf-8")).hexdigest()[:8]
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def file_sha256(path):
|
|
131
|
+
with open(path, "rb") as f:
|
|
132
|
+
return hashlib.sha256(f.read()).hexdigest()
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def forbidden_pattern_slug(item, index, prior_slugs):
|
|
136
|
+
desc = item.get("description", "")
|
|
137
|
+
files = item.get("files", []) or []
|
|
138
|
+
file0 = files[0] if files else ""
|
|
139
|
+
base = f"forbidden_pattern__{sanitize(desc, 60)}__{sanitize(file0, 30)}"
|
|
140
|
+
if base in prior_slugs:
|
|
141
|
+
return f"{base}__i{index}"
|
|
142
|
+
return base
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def verification_slug(verification_obj):
|
|
146
|
+
return f"verification__{sha8(canonical_compact_json(verification_obj))}"
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# ---------------------------------------------------------------------------
|
|
150
|
+
# Oracle category enumeration via subprocess.
|
|
151
|
+
# ---------------------------------------------------------------------------
|
|
152
|
+
def list_oracle_categories(scripts_dir, oracle_name):
|
|
153
|
+
script = scripts_dir / ORACLE_SCRIPTS[oracle_name]
|
|
154
|
+
r = subprocess.run(
|
|
155
|
+
[sys.executable, str(script), "--list-categories"],
|
|
156
|
+
capture_output=True,
|
|
157
|
+
text=True,
|
|
158
|
+
check=True,
|
|
159
|
+
)
|
|
160
|
+
payload = json.loads(r.stdout)
|
|
161
|
+
if payload.get("oracle") != oracle_name:
|
|
162
|
+
raise ValueError(
|
|
163
|
+
f"oracle name mismatch: expected {oracle_name}, got {payload.get('oracle')}"
|
|
164
|
+
)
|
|
165
|
+
return payload["categories"]
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
# ---------------------------------------------------------------------------
|
|
169
|
+
# Registry assembly.
|
|
170
|
+
# ---------------------------------------------------------------------------
|
|
171
|
+
def build_registry(fixture_dir, scripts_dir, generated_at, repo_root):
|
|
172
|
+
fixture_dir = pathlib.Path(fixture_dir).resolve()
|
|
173
|
+
expected_path = fixture_dir / "expected.json"
|
|
174
|
+
metadata_path = fixture_dir / "metadata.json"
|
|
175
|
+
|
|
176
|
+
with open(expected_path, "r", encoding="utf-8") as f:
|
|
177
|
+
expected = json.load(f)
|
|
178
|
+
with open(metadata_path, "r", encoding="utf-8") as f:
|
|
179
|
+
metadata = json.load(f)
|
|
180
|
+
|
|
181
|
+
fixture_id = metadata.get("id") or fixture_dir.name
|
|
182
|
+
|
|
183
|
+
entries = []
|
|
184
|
+
|
|
185
|
+
# forbidden_patterns ----------------------------------------------------
|
|
186
|
+
seen_slugs = set()
|
|
187
|
+
for i, item in enumerate(expected.get("forbidden_patterns", []) or []):
|
|
188
|
+
slug = forbidden_pattern_slug(item, i, seen_slugs)
|
|
189
|
+
seen_slugs.add(slug)
|
|
190
|
+
desc = item.get("description", "")
|
|
191
|
+
sev = item.get("severity", "flag")
|
|
192
|
+
files = item.get("files", []) or []
|
|
193
|
+
pattern = item.get("pattern", "")
|
|
194
|
+
entries.append({
|
|
195
|
+
"id": slug,
|
|
196
|
+
"source_field": f"expected.json/forbidden_patterns/{i}",
|
|
197
|
+
"source_ref": f"expected.json:forbidden_patterns[{i}]",
|
|
198
|
+
"operational_check": (
|
|
199
|
+
f"variant arm output MUST NOT contain regex pattern {pattern!r} "
|
|
200
|
+
f"in files {files}; rationale: {desc}"
|
|
201
|
+
),
|
|
202
|
+
"severity": sev,
|
|
203
|
+
"authority": "expected.json/forbidden_patterns",
|
|
204
|
+
})
|
|
205
|
+
|
|
206
|
+
# verification_commands -------------------------------------------------
|
|
207
|
+
for i, item in enumerate(expected.get("verification_commands", []) or []):
|
|
208
|
+
slug = verification_slug(item)
|
|
209
|
+
cmd = item.get("cmd", "")
|
|
210
|
+
exit_code = item.get("exit_code")
|
|
211
|
+
sc = item.get("stdout_contains", []) or []
|
|
212
|
+
sn = item.get("stdout_not_contains", []) or []
|
|
213
|
+
entries.append({
|
|
214
|
+
"id": slug,
|
|
215
|
+
"source_field": f"expected.json/verification_commands/{i}",
|
|
216
|
+
"source_ref": f"expected.json:verification_commands[{i}]",
|
|
217
|
+
"operational_check": (
|
|
218
|
+
f"running `{cmd}` in the post-arm work dir MUST exit with code {exit_code}; "
|
|
219
|
+
f"stdout MUST contain all of {sc}; stdout MUST NOT contain any of {sn}"
|
|
220
|
+
),
|
|
221
|
+
"severity": "hard",
|
|
222
|
+
"authority": "expected.json/verification_commands",
|
|
223
|
+
})
|
|
224
|
+
|
|
225
|
+
# required_files --------------------------------------------------------
|
|
226
|
+
for path in expected.get("required_files", []) or []:
|
|
227
|
+
entries.append({
|
|
228
|
+
"id": f"required_file__{sanitize(path, 60)}",
|
|
229
|
+
"source_field": "expected.json/required_files",
|
|
230
|
+
"source_ref": f"expected.json:required_files[{path}]",
|
|
231
|
+
"operational_check": (
|
|
232
|
+
f"variant arm output MUST contain file {path!r} "
|
|
233
|
+
"(created or preserved)"
|
|
234
|
+
),
|
|
235
|
+
"severity": "hard",
|
|
236
|
+
"authority": "expected.json/required_files",
|
|
237
|
+
})
|
|
238
|
+
|
|
239
|
+
# forbidden_files -------------------------------------------------------
|
|
240
|
+
for path in expected.get("forbidden_files", []) or []:
|
|
241
|
+
entries.append({
|
|
242
|
+
"id": f"forbidden_file__{sanitize(path, 60)}",
|
|
243
|
+
"source_field": "expected.json/forbidden_files",
|
|
244
|
+
"source_ref": f"expected.json:forbidden_files[{path}]",
|
|
245
|
+
"operational_check": (
|
|
246
|
+
f"variant arm output MUST NOT add file {path!r}"
|
|
247
|
+
),
|
|
248
|
+
"severity": "hard",
|
|
249
|
+
"authority": "expected.json/forbidden_files",
|
|
250
|
+
})
|
|
251
|
+
|
|
252
|
+
# spec_output_files -----------------------------------------------------
|
|
253
|
+
for path in expected.get("spec_output_files", []) or []:
|
|
254
|
+
entries.append({
|
|
255
|
+
"id": f"spec_output_file__{sanitize(path, 60)}",
|
|
256
|
+
"source_field": "expected.json/spec_output_files",
|
|
257
|
+
"source_ref": f"expected.json:spec_output_files[{path}]",
|
|
258
|
+
"operational_check": (
|
|
259
|
+
"variant-touched files MUST be inside (or reachable via static "
|
|
260
|
+
f"imports from) the spec_output_files set; {path!r} is one Tier C seed"
|
|
261
|
+
),
|
|
262
|
+
"severity": "warn",
|
|
263
|
+
"authority": "expected.json/spec_output_files",
|
|
264
|
+
})
|
|
265
|
+
|
|
266
|
+
# max_deps_added --------------------------------------------------------
|
|
267
|
+
if "max_deps_added" in expected:
|
|
268
|
+
v = expected["max_deps_added"]
|
|
269
|
+
entries.append({
|
|
270
|
+
"id": f"max_deps_added__{v}",
|
|
271
|
+
"source_field": "expected.json/max_deps_added",
|
|
272
|
+
"source_ref": "expected.json:max_deps_added",
|
|
273
|
+
"operational_check": (
|
|
274
|
+
f"variant arm MUST NOT add more than {v} new npm dependencies "
|
|
275
|
+
"(count delta of package.json:dependencies + devDependencies)"
|
|
276
|
+
),
|
|
277
|
+
"severity": "hard",
|
|
278
|
+
"authority": "expected.json/max_deps_added",
|
|
279
|
+
})
|
|
280
|
+
|
|
281
|
+
# oracle categories per metadata allowlist -----------------------------
|
|
282
|
+
allowlist = metadata.get("pair_plan_oracle_categories", []) or []
|
|
283
|
+
used_oracles = set()
|
|
284
|
+
cat_index = {}
|
|
285
|
+
for entry_id in allowlist:
|
|
286
|
+
if ":" not in entry_id:
|
|
287
|
+
raise ValueError(
|
|
288
|
+
f"malformed pair_plan_oracle_categories entry: {entry_id!r} (expected '<oracle>:<category>')"
|
|
289
|
+
)
|
|
290
|
+
oracle_name = entry_id.split(":", 1)[0]
|
|
291
|
+
if oracle_name not in ORACLE_SCRIPTS:
|
|
292
|
+
raise ValueError(
|
|
293
|
+
f"unknown oracle {oracle_name!r} (known: {sorted(ORACLE_SCRIPTS)})"
|
|
294
|
+
)
|
|
295
|
+
if oracle_name not in cat_index:
|
|
296
|
+
cat_index[oracle_name] = list_oracle_categories(scripts_dir, oracle_name)
|
|
297
|
+
match = next(
|
|
298
|
+
(c for c in cat_index[oracle_name] if c["id"] == entry_id),
|
|
299
|
+
None,
|
|
300
|
+
)
|
|
301
|
+
if match is None:
|
|
302
|
+
available = [c["id"] for c in cat_index[oracle_name]]
|
|
303
|
+
raise ValueError(
|
|
304
|
+
f"oracle {oracle_name!r} has no category {entry_id!r}; available: {available}"
|
|
305
|
+
)
|
|
306
|
+
used_oracles.add(oracle_name)
|
|
307
|
+
entries.append({
|
|
308
|
+
"id": match["id"],
|
|
309
|
+
"source_field": f"oracle/{oracle_name}/{match['id']}",
|
|
310
|
+
"source_ref": f"{ORACLE_SCRIPTS[oracle_name]}",
|
|
311
|
+
"operational_check": match["operational_check"],
|
|
312
|
+
"severity": match["severity"],
|
|
313
|
+
"authority": "metadata/oracle-allowlist",
|
|
314
|
+
})
|
|
315
|
+
|
|
316
|
+
# sort entries by id (deterministic) ------------------------------------
|
|
317
|
+
entries.sort(key=lambda e: e["id"])
|
|
318
|
+
|
|
319
|
+
# file shas (raw bytes) -------------------------------------------------
|
|
320
|
+
expected_sha = file_sha256(expected_path)
|
|
321
|
+
metadata_sha = file_sha256(metadata_path)
|
|
322
|
+
oracle_shas = {}
|
|
323
|
+
for ora in sorted(used_oracles):
|
|
324
|
+
oracle_shas[ora] = file_sha256(scripts_dir / ORACLE_SCRIPTS[ora])
|
|
325
|
+
|
|
326
|
+
# repo-root-relative paths for portability ------------------------------
|
|
327
|
+
def rel(p):
|
|
328
|
+
try:
|
|
329
|
+
return str(pathlib.Path(p).resolve().relative_to(repo_root))
|
|
330
|
+
except ValueError:
|
|
331
|
+
return str(p)
|
|
332
|
+
|
|
333
|
+
return {
|
|
334
|
+
"schema_version": SCHEMA_VERSION,
|
|
335
|
+
"fixture_id": fixture_id,
|
|
336
|
+
"generated_at": generated_at,
|
|
337
|
+
"generated_from": {
|
|
338
|
+
"expected_path": rel(expected_path),
|
|
339
|
+
"expected_sha256": expected_sha,
|
|
340
|
+
"metadata_path": rel(metadata_path),
|
|
341
|
+
"metadata_sha256": metadata_sha,
|
|
342
|
+
"oracle_script_shas": oracle_shas,
|
|
343
|
+
},
|
|
344
|
+
"required_invariants": entries,
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def main():
|
|
349
|
+
ap = argparse.ArgumentParser()
|
|
350
|
+
ap.add_argument(
|
|
351
|
+
"--fixture",
|
|
352
|
+
required=True,
|
|
353
|
+
help="Path to fixture directory (e.g. benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand)",
|
|
354
|
+
)
|
|
355
|
+
ap.add_argument(
|
|
356
|
+
"--scripts-dir",
|
|
357
|
+
default="benchmark/auto-resolve/scripts",
|
|
358
|
+
help="Directory containing oracle-*.py scripts",
|
|
359
|
+
)
|
|
360
|
+
ap.add_argument(
|
|
361
|
+
"--output",
|
|
362
|
+
help="Write to this path (default: stdout)",
|
|
363
|
+
)
|
|
364
|
+
ap.add_argument(
|
|
365
|
+
"--generated-at",
|
|
366
|
+
default=None,
|
|
367
|
+
help="ISO8601 timestamp to embed (default: UTC now); pin to a fixed value for determinism testing",
|
|
368
|
+
)
|
|
369
|
+
ap.add_argument(
|
|
370
|
+
"--repo-root",
|
|
371
|
+
default=None,
|
|
372
|
+
help="Repo root for resolving relative paths in output (default: cwd)",
|
|
373
|
+
)
|
|
374
|
+
args = ap.parse_args()
|
|
375
|
+
|
|
376
|
+
install_path_trap()
|
|
377
|
+
|
|
378
|
+
generated_at = (
|
|
379
|
+
args.generated_at
|
|
380
|
+
or datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
381
|
+
)
|
|
382
|
+
scripts_dir = pathlib.Path(args.scripts_dir).resolve()
|
|
383
|
+
repo_root = pathlib.Path(args.repo_root or os.getcwd()).resolve()
|
|
384
|
+
|
|
385
|
+
registry = build_registry(args.fixture, scripts_dir, generated_at, repo_root)
|
|
386
|
+
out_text = json.dumps(
|
|
387
|
+
registry,
|
|
388
|
+
indent=2,
|
|
389
|
+
sort_keys=True,
|
|
390
|
+
ensure_ascii=False,
|
|
391
|
+
) + "\n"
|
|
392
|
+
|
|
393
|
+
if args.output:
|
|
394
|
+
with open(args.output, "w", encoding="utf-8") as f:
|
|
395
|
+
f.write(out_text)
|
|
396
|
+
else:
|
|
397
|
+
sys.stdout.write(out_text)
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
if __name__ == "__main__":
|
|
401
|
+
main()
|