devlyn-cli 1.15.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. package/AGENTS.md +104 -0
  2. package/CLAUDE.md +135 -21
  3. package/README.md +43 -125
  4. package/benchmark/auto-resolve/BENCHMARK-DESIGN.md +272 -0
  5. package/benchmark/auto-resolve/README.md +114 -0
  6. package/benchmark/auto-resolve/RUBRIC.md +162 -0
  7. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/NOTES.md +30 -0
  8. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/expected.json +68 -0
  9. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/metadata.json +10 -0
  10. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/setup.sh +4 -0
  11. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/spec.md +45 -0
  12. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/task.txt +8 -0
  13. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/NOTES.md +54 -0
  14. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/expected-pair-plan-registry.json +170 -0
  15. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/expected.json +84 -0
  16. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/metadata.json +21 -0
  17. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/pair-plan.sample-fail.json +214 -0
  18. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/pair-plan.sample-pass.json +223 -0
  19. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/setup.sh +5 -0
  20. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/spec.md +56 -0
  21. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/task.txt +14 -0
  22. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/NOTES.md +28 -0
  23. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/expected-pair-plan-registry.json +162 -0
  24. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/expected.json +65 -0
  25. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/metadata.json +19 -0
  26. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/setup.sh +4 -0
  27. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/spec.md +56 -0
  28. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/task.txt +9 -0
  29. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/NOTES.md +40 -0
  30. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/expected.json +57 -0
  31. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/metadata.json +10 -0
  32. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/setup.sh +6 -0
  33. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/spec.md +49 -0
  34. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/task.txt +9 -0
  35. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/NOTES.md +38 -0
  36. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/expected.json +65 -0
  37. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/metadata.json +10 -0
  38. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/setup.sh +55 -0
  39. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/spec.md +49 -0
  40. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/task.txt +7 -0
  41. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/NOTES.md +38 -0
  42. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/expected.json +77 -0
  43. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/metadata.json +10 -0
  44. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/setup.sh +4 -0
  45. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/spec.md +49 -0
  46. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/task.txt +10 -0
  47. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/NOTES.md +50 -0
  48. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/expected.json +76 -0
  49. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/metadata.json +10 -0
  50. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/setup.sh +36 -0
  51. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/spec.md +46 -0
  52. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/task.txt +7 -0
  53. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/NOTES.md +50 -0
  54. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/expected.json +63 -0
  55. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/metadata.json +10 -0
  56. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/setup.sh +4 -0
  57. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/spec.md +48 -0
  58. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/task.txt +1 -0
  59. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/NOTES.md +93 -0
  60. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/expected.json +74 -0
  61. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/metadata.json +10 -0
  62. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/setup.sh +28 -0
  63. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/spec.md +62 -0
  64. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/task.txt +5 -0
  65. package/benchmark/auto-resolve/fixtures/SCHEMA.md +130 -0
  66. package/benchmark/auto-resolve/fixtures/test-repo/README.md +27 -0
  67. package/benchmark/auto-resolve/fixtures/test-repo/bin/cli.js +63 -0
  68. package/benchmark/auto-resolve/fixtures/test-repo/package-lock.json +823 -0
  69. package/benchmark/auto-resolve/fixtures/test-repo/package.json +22 -0
  70. package/benchmark/auto-resolve/fixtures/test-repo/playwright.config.js +17 -0
  71. package/benchmark/auto-resolve/fixtures/test-repo/server/index.js +37 -0
  72. package/benchmark/auto-resolve/fixtures/test-repo/tests/cli.test.js +25 -0
  73. package/benchmark/auto-resolve/fixtures/test-repo/tests/server.test.js +58 -0
  74. package/benchmark/auto-resolve/fixtures/test-repo/web/index.html +37 -0
  75. package/benchmark/auto-resolve/scripts/build-pair-eligible-manifest.py +174 -0
  76. package/benchmark/auto-resolve/scripts/check-f9-artifacts.py +256 -0
  77. package/benchmark/auto-resolve/scripts/compile-report.py +331 -0
  78. package/benchmark/auto-resolve/scripts/iter-0033c-compare.py +552 -0
  79. package/benchmark/auto-resolve/scripts/judge-opus-pass.sh +430 -0
  80. package/benchmark/auto-resolve/scripts/judge.sh +359 -0
  81. package/benchmark/auto-resolve/scripts/oracle-scope-tier-a.py +260 -0
  82. package/benchmark/auto-resolve/scripts/oracle-scope-tier-b.py +274 -0
  83. package/benchmark/auto-resolve/scripts/oracle-test-fidelity.py +328 -0
  84. package/benchmark/auto-resolve/scripts/pair-plan-idgen.py +401 -0
  85. package/benchmark/auto-resolve/scripts/pair-plan-lint.py +468 -0
  86. package/benchmark/auto-resolve/scripts/run-fixture.sh +691 -0
  87. package/benchmark/auto-resolve/scripts/run-iter-0033c.sh +234 -0
  88. package/benchmark/auto-resolve/scripts/run-suite.sh +214 -0
  89. package/benchmark/auto-resolve/scripts/ship-gate.py +222 -0
  90. package/bin/devlyn.js +129 -17
  91. package/config/skills/_shared/adapters/README.md +64 -0
  92. package/config/skills/_shared/adapters/gpt-5-5.md +29 -0
  93. package/config/skills/_shared/adapters/opus-4-7.md +29 -0
  94. package/config/skills/{devlyn:auto-resolve/scripts → _shared}/archive_run.py +26 -0
  95. package/config/skills/_shared/codex-config.md +54 -0
  96. package/config/skills/_shared/codex-monitored.sh +141 -0
  97. package/config/skills/_shared/engine-preflight.md +35 -0
  98. package/config/skills/_shared/expected.schema.json +93 -0
  99. package/config/skills/_shared/pair-plan-schema.md +298 -0
  100. package/config/skills/_shared/runtime-principles.md +110 -0
  101. package/config/skills/_shared/spec-verify-check.py +519 -0
  102. package/config/skills/devlyn:ideate/SKILL.md +99 -429
  103. package/config/skills/devlyn:ideate/references/elicitation.md +97 -0
  104. package/config/skills/devlyn:ideate/references/from-spec-mode.md +54 -0
  105. package/config/skills/devlyn:ideate/references/project-mode.md +76 -0
  106. package/config/skills/devlyn:ideate/references/spec-template.md +102 -0
  107. package/config/skills/devlyn:resolve/SKILL.md +172 -184
  108. package/config/skills/devlyn:resolve/references/free-form-mode.md +68 -0
  109. package/config/skills/devlyn:resolve/references/phases/build-gate.md +45 -0
  110. package/config/skills/devlyn:resolve/references/phases/cleanup.md +39 -0
  111. package/config/skills/devlyn:resolve/references/phases/implement.md +42 -0
  112. package/config/skills/devlyn:resolve/references/phases/plan.md +42 -0
  113. package/config/skills/devlyn:resolve/references/phases/verify.md +69 -0
  114. package/config/skills/devlyn:resolve/references/state-schema.md +106 -0
  115. package/{config/skills → optional-skills}/devlyn:design-system/SKILL.md +1 -0
  116. package/{config/skills → optional-skills}/devlyn:reap/SKILL.md +1 -0
  117. package/{config/skills → optional-skills}/devlyn:team-design-ui/SKILL.md +5 -0
  118. package/package.json +12 -2
  119. package/scripts/lint-skills.sh +431 -0
  120. package/config/skills/devlyn:auto-resolve/SKILL.md +0 -252
  121. package/config/skills/devlyn:auto-resolve/evals/evals.json +0 -21
  122. package/config/skills/devlyn:auto-resolve/evals/task-doctor-subcommand.md +0 -42
  123. package/config/skills/devlyn:auto-resolve/references/build-gate.md +0 -130
  124. package/config/skills/devlyn:auto-resolve/references/engine-routing.md +0 -82
  125. package/config/skills/devlyn:auto-resolve/references/findings-schema.md +0 -103
  126. package/config/skills/devlyn:auto-resolve/references/phases/phase-1-build.md +0 -54
  127. package/config/skills/devlyn:auto-resolve/references/phases/phase-2-evaluate.md +0 -45
  128. package/config/skills/devlyn:auto-resolve/references/phases/phase-3-critic.md +0 -84
  129. package/config/skills/devlyn:auto-resolve/references/pipeline-routing.md +0 -114
  130. package/config/skills/devlyn:auto-resolve/references/pipeline-state.md +0 -201
  131. package/config/skills/devlyn:auto-resolve/scripts/terminal_verdict.py +0 -96
  132. package/config/skills/devlyn:browser-validate/SKILL.md +0 -164
  133. package/config/skills/devlyn:browser-validate/references/flow-testing.md +0 -118
  134. package/config/skills/devlyn:browser-validate/references/tier1-chrome.md +0 -137
  135. package/config/skills/devlyn:browser-validate/references/tier2-playwright.md +0 -195
  136. package/config/skills/devlyn:browser-validate/references/tier3-curl.md +0 -57
  137. package/config/skills/devlyn:clean/SKILL.md +0 -285
  138. package/config/skills/devlyn:design-ui/SKILL.md +0 -351
  139. package/config/skills/devlyn:discover-product/SKILL.md +0 -124
  140. package/config/skills/devlyn:evaluate/SKILL.md +0 -564
  141. package/config/skills/devlyn:feature-spec/SKILL.md +0 -630
  142. package/config/skills/devlyn:ideate/references/challenge-rubric.md +0 -122
  143. package/config/skills/devlyn:ideate/references/codex-critic-template.md +0 -42
  144. package/config/skills/devlyn:ideate/references/templates/item-spec.md +0 -90
  145. package/config/skills/devlyn:implement-ui/SKILL.md +0 -466
  146. package/config/skills/devlyn:preflight/SKILL.md +0 -355
  147. package/config/skills/devlyn:preflight/references/auditors/browser-auditor.md +0 -32
  148. package/config/skills/devlyn:preflight/references/auditors/code-auditor.md +0 -86
  149. package/config/skills/devlyn:preflight/references/auditors/docs-auditor.md +0 -38
  150. package/config/skills/devlyn:product-spec/SKILL.md +0 -603
  151. package/config/skills/devlyn:recommend-features/SKILL.md +0 -286
  152. package/config/skills/devlyn:review/SKILL.md +0 -161
  153. package/config/skills/devlyn:team-resolve/SKILL.md +0 -631
  154. package/config/skills/devlyn:team-review/SKILL.md +0 -493
  155. package/config/skills/devlyn:update-docs/SKILL.md +0 -463
  156. package/config/skills/workflow-routing/SKILL.md +0 -73
  157. /package/{config/skills → optional-skills}/devlyn:reap/scripts/reap.sh +0 -0
  158. /package/{config/skills → optional-skills}/devlyn:reap/scripts/scan.sh +0 -0
@@ -0,0 +1,401 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ pair-plan-idgen.py — emit canonical_id_registry.json for a benchmark fixture.
4
+
5
+ Reads `expected.json` + `metadata.json` from the fixture directory and the
6
+ checked-in oracle scripts' `--list-categories` output (filtered through
7
+ metadata.json:pair_plan_oracle_categories). Produces a deterministic,
8
+ sorted-by-id registry that `pair-plan-lint.py` validates plans against.
9
+
10
+ Hard rules (iter-0022 D2 acceptance gates):
11
+ * NEVER reads any path containing `/results/`. A `builtins.open` /
12
+ `os.open` wrapper raises AssertionError if any code path tries.
13
+ Reading archived run artifacts would leak iter-0020 outcome data into
14
+ the registry source-of-truth, contaminating iter-0023 measurement.
15
+ * Same input → byte-identical output (after fixing the volatile
16
+ `generated_at` field via `--generated-at`). Lint Check 13 enforces.
17
+ * Output JSON is sorted by required_invariants[].id, sort_keys=True for
18
+ every dict, indent=2 for human review, trailing newline for POSIX.
19
+
20
+ See `config/skills/_shared/pair-plan-schema.md` for the full registry
21
+ shape and the slug rules implemented here.
22
+ """
23
+ import argparse
24
+ import builtins
25
+ import datetime
26
+ import hashlib
27
+ import json
28
+ import os
29
+ import pathlib
30
+ import re
31
+ import subprocess
32
+ import sys
33
+
34
+ ORACLE_SCRIPTS = {
35
+ "test-fidelity": "oracle-test-fidelity.py",
36
+ "scope-tier-a": "oracle-scope-tier-a.py",
37
+ "scope-tier-b": "oracle-scope-tier-b.py",
38
+ }
39
+
40
+ SCHEMA_VERSION = "1"
41
+
42
+
43
+ # ---------------------------------------------------------------------------
44
+ # Path trap — refuse any read under a `/results/` directory.
45
+ # ---------------------------------------------------------------------------
46
+ _real_open = builtins.open
47
+ _real_os_open = os.open
48
+
49
+
50
+ def _trap_path(path):
51
+ """Raise if `path` contains a `/results/` segment. Read-side only — call
52
+ this just before opening for read; write-mode opens skip the check."""
53
+ if isinstance(path, (bytes, bytearray)):
54
+ path = path.decode("utf-8", "replace")
55
+ elif isinstance(path, os.PathLike):
56
+ path = os.fspath(path)
57
+ s = str(path).replace("\\", "/")
58
+ if "/results/" in s:
59
+ raise AssertionError(
60
+ f"pair-plan-idgen.py: forbidden read — {s!r} contains '/results/'. "
61
+ "iter-0022 hard rule: idgen MUST NOT read archived run artifacts. "
62
+ "Registry sources are limited to expected.json + metadata.json + checked-in oracle scripts. "
63
+ "Writes to /results/ are legitimate (e.g. preflight output) and are NOT trapped."
64
+ )
65
+
66
+
67
+ def _is_read_mode(args, kwargs):
68
+ """`open()` 1st positional or `mode` kwarg. Default 'r' is read."""
69
+ mode = kwargs.get("mode")
70
+ if mode is None and args:
71
+ mode = args[0]
72
+ if mode is None:
73
+ return True # default 'r'
74
+ return isinstance(mode, str) and ("w" not in mode and "a" not in mode and "x" not in mode and "+" not in mode)
75
+
76
+
77
+ def _trapped_open(file, *args, **kwargs):
78
+ if _is_read_mode(args, kwargs):
79
+ _trap_path(file)
80
+ return _real_open(file, *args, **kwargs)
81
+
82
+
83
+ def _is_read_flags(flags):
84
+ """`os.open` flags: O_WRONLY | O_RDWR | O_CREAT | O_APPEND | O_TRUNC are write-side."""
85
+ write_bits = (
86
+ getattr(os, "O_WRONLY", 0)
87
+ | getattr(os, "O_RDWR", 0)
88
+ | getattr(os, "O_CREAT", 0)
89
+ | getattr(os, "O_APPEND", 0)
90
+ | getattr(os, "O_TRUNC", 0)
91
+ )
92
+ return (flags & write_bits) == 0
93
+
94
+
95
+ def _trapped_os_open(path, flags, mode=0o777, **kwargs):
96
+ if _is_read_flags(flags):
97
+ _trap_path(path)
98
+ return _real_os_open(path, flags, mode, **kwargs)
99
+
100
+
101
+ def install_path_trap():
102
+ builtins.open = _trapped_open
103
+ os.open = _trapped_os_open
104
+
105
+
106
+ # ---------------------------------------------------------------------------
107
+ # Slug + sha helpers.
108
+ # ---------------------------------------------------------------------------
109
+ def sanitize(s, max_len):
110
+ s = (s or "").lower()
111
+ s = re.sub(r"[^a-z0-9]+", "_", s)
112
+ s = s.strip("_")
113
+ return s[:max_len]
114
+
115
+
116
+ def canonical_compact_json(obj):
117
+ return json.dumps(
118
+ obj,
119
+ sort_keys=True,
120
+ separators=(",", ":"),
121
+ ensure_ascii=False,
122
+ allow_nan=False,
123
+ )
124
+
125
+
126
+ def sha8(s):
127
+ return hashlib.sha256(s.encode("utf-8")).hexdigest()[:8]
128
+
129
+
130
+ def file_sha256(path):
131
+ with open(path, "rb") as f:
132
+ return hashlib.sha256(f.read()).hexdigest()
133
+
134
+
135
+ def forbidden_pattern_slug(item, index, prior_slugs):
136
+ desc = item.get("description", "")
137
+ files = item.get("files", []) or []
138
+ file0 = files[0] if files else ""
139
+ base = f"forbidden_pattern__{sanitize(desc, 60)}__{sanitize(file0, 30)}"
140
+ if base in prior_slugs:
141
+ return f"{base}__i{index}"
142
+ return base
143
+
144
+
145
+ def verification_slug(verification_obj):
146
+ return f"verification__{sha8(canonical_compact_json(verification_obj))}"
147
+
148
+
149
+ # ---------------------------------------------------------------------------
150
+ # Oracle category enumeration via subprocess.
151
+ # ---------------------------------------------------------------------------
152
+ def list_oracle_categories(scripts_dir, oracle_name):
153
+ script = scripts_dir / ORACLE_SCRIPTS[oracle_name]
154
+ r = subprocess.run(
155
+ [sys.executable, str(script), "--list-categories"],
156
+ capture_output=True,
157
+ text=True,
158
+ check=True,
159
+ )
160
+ payload = json.loads(r.stdout)
161
+ if payload.get("oracle") != oracle_name:
162
+ raise ValueError(
163
+ f"oracle name mismatch: expected {oracle_name}, got {payload.get('oracle')}"
164
+ )
165
+ return payload["categories"]
166
+
167
+
168
+ # ---------------------------------------------------------------------------
169
+ # Registry assembly.
170
+ # ---------------------------------------------------------------------------
171
+ def build_registry(fixture_dir, scripts_dir, generated_at, repo_root):
172
+ fixture_dir = pathlib.Path(fixture_dir).resolve()
173
+ expected_path = fixture_dir / "expected.json"
174
+ metadata_path = fixture_dir / "metadata.json"
175
+
176
+ with open(expected_path, "r", encoding="utf-8") as f:
177
+ expected = json.load(f)
178
+ with open(metadata_path, "r", encoding="utf-8") as f:
179
+ metadata = json.load(f)
180
+
181
+ fixture_id = metadata.get("id") or fixture_dir.name
182
+
183
+ entries = []
184
+
185
+ # forbidden_patterns ----------------------------------------------------
186
+ seen_slugs = set()
187
+ for i, item in enumerate(expected.get("forbidden_patterns", []) or []):
188
+ slug = forbidden_pattern_slug(item, i, seen_slugs)
189
+ seen_slugs.add(slug)
190
+ desc = item.get("description", "")
191
+ sev = item.get("severity", "flag")
192
+ files = item.get("files", []) or []
193
+ pattern = item.get("pattern", "")
194
+ entries.append({
195
+ "id": slug,
196
+ "source_field": f"expected.json/forbidden_patterns/{i}",
197
+ "source_ref": f"expected.json:forbidden_patterns[{i}]",
198
+ "operational_check": (
199
+ f"variant arm output MUST NOT contain regex pattern {pattern!r} "
200
+ f"in files {files}; rationale: {desc}"
201
+ ),
202
+ "severity": sev,
203
+ "authority": "expected.json/forbidden_patterns",
204
+ })
205
+
206
+ # verification_commands -------------------------------------------------
207
+ for i, item in enumerate(expected.get("verification_commands", []) or []):
208
+ slug = verification_slug(item)
209
+ cmd = item.get("cmd", "")
210
+ exit_code = item.get("exit_code")
211
+ sc = item.get("stdout_contains", []) or []
212
+ sn = item.get("stdout_not_contains", []) or []
213
+ entries.append({
214
+ "id": slug,
215
+ "source_field": f"expected.json/verification_commands/{i}",
216
+ "source_ref": f"expected.json:verification_commands[{i}]",
217
+ "operational_check": (
218
+ f"running `{cmd}` in the post-arm work dir MUST exit with code {exit_code}; "
219
+ f"stdout MUST contain all of {sc}; stdout MUST NOT contain any of {sn}"
220
+ ),
221
+ "severity": "hard",
222
+ "authority": "expected.json/verification_commands",
223
+ })
224
+
225
+ # required_files --------------------------------------------------------
226
+ for path in expected.get("required_files", []) or []:
227
+ entries.append({
228
+ "id": f"required_file__{sanitize(path, 60)}",
229
+ "source_field": "expected.json/required_files",
230
+ "source_ref": f"expected.json:required_files[{path}]",
231
+ "operational_check": (
232
+ f"variant arm output MUST contain file {path!r} "
233
+ "(created or preserved)"
234
+ ),
235
+ "severity": "hard",
236
+ "authority": "expected.json/required_files",
237
+ })
238
+
239
+ # forbidden_files -------------------------------------------------------
240
+ for path in expected.get("forbidden_files", []) or []:
241
+ entries.append({
242
+ "id": f"forbidden_file__{sanitize(path, 60)}",
243
+ "source_field": "expected.json/forbidden_files",
244
+ "source_ref": f"expected.json:forbidden_files[{path}]",
245
+ "operational_check": (
246
+ f"variant arm output MUST NOT add file {path!r}"
247
+ ),
248
+ "severity": "hard",
249
+ "authority": "expected.json/forbidden_files",
250
+ })
251
+
252
+ # spec_output_files -----------------------------------------------------
253
+ for path in expected.get("spec_output_files", []) or []:
254
+ entries.append({
255
+ "id": f"spec_output_file__{sanitize(path, 60)}",
256
+ "source_field": "expected.json/spec_output_files",
257
+ "source_ref": f"expected.json:spec_output_files[{path}]",
258
+ "operational_check": (
259
+ "variant-touched files MUST be inside (or reachable via static "
260
+ f"imports from) the spec_output_files set; {path!r} is one Tier C seed"
261
+ ),
262
+ "severity": "warn",
263
+ "authority": "expected.json/spec_output_files",
264
+ })
265
+
266
+ # max_deps_added --------------------------------------------------------
267
+ if "max_deps_added" in expected:
268
+ v = expected["max_deps_added"]
269
+ entries.append({
270
+ "id": f"max_deps_added__{v}",
271
+ "source_field": "expected.json/max_deps_added",
272
+ "source_ref": "expected.json:max_deps_added",
273
+ "operational_check": (
274
+ f"variant arm MUST NOT add more than {v} new npm dependencies "
275
+ "(count delta of package.json:dependencies + devDependencies)"
276
+ ),
277
+ "severity": "hard",
278
+ "authority": "expected.json/max_deps_added",
279
+ })
280
+
281
+ # oracle categories per metadata allowlist -----------------------------
282
+ allowlist = metadata.get("pair_plan_oracle_categories", []) or []
283
+ used_oracles = set()
284
+ cat_index = {}
285
+ for entry_id in allowlist:
286
+ if ":" not in entry_id:
287
+ raise ValueError(
288
+ f"malformed pair_plan_oracle_categories entry: {entry_id!r} (expected '<oracle>:<category>')"
289
+ )
290
+ oracle_name = entry_id.split(":", 1)[0]
291
+ if oracle_name not in ORACLE_SCRIPTS:
292
+ raise ValueError(
293
+ f"unknown oracle {oracle_name!r} (known: {sorted(ORACLE_SCRIPTS)})"
294
+ )
295
+ if oracle_name not in cat_index:
296
+ cat_index[oracle_name] = list_oracle_categories(scripts_dir, oracle_name)
297
+ match = next(
298
+ (c for c in cat_index[oracle_name] if c["id"] == entry_id),
299
+ None,
300
+ )
301
+ if match is None:
302
+ available = [c["id"] for c in cat_index[oracle_name]]
303
+ raise ValueError(
304
+ f"oracle {oracle_name!r} has no category {entry_id!r}; available: {available}"
305
+ )
306
+ used_oracles.add(oracle_name)
307
+ entries.append({
308
+ "id": match["id"],
309
+ "source_field": f"oracle/{oracle_name}/{match['id']}",
310
+ "source_ref": f"{ORACLE_SCRIPTS[oracle_name]}",
311
+ "operational_check": match["operational_check"],
312
+ "severity": match["severity"],
313
+ "authority": "metadata/oracle-allowlist",
314
+ })
315
+
316
+ # sort entries by id (deterministic) ------------------------------------
317
+ entries.sort(key=lambda e: e["id"])
318
+
319
+ # file shas (raw bytes) -------------------------------------------------
320
+ expected_sha = file_sha256(expected_path)
321
+ metadata_sha = file_sha256(metadata_path)
322
+ oracle_shas = {}
323
+ for ora in sorted(used_oracles):
324
+ oracle_shas[ora] = file_sha256(scripts_dir / ORACLE_SCRIPTS[ora])
325
+
326
+ # repo-root-relative paths for portability ------------------------------
327
+ def rel(p):
328
+ try:
329
+ return str(pathlib.Path(p).resolve().relative_to(repo_root))
330
+ except ValueError:
331
+ return str(p)
332
+
333
+ return {
334
+ "schema_version": SCHEMA_VERSION,
335
+ "fixture_id": fixture_id,
336
+ "generated_at": generated_at,
337
+ "generated_from": {
338
+ "expected_path": rel(expected_path),
339
+ "expected_sha256": expected_sha,
340
+ "metadata_path": rel(metadata_path),
341
+ "metadata_sha256": metadata_sha,
342
+ "oracle_script_shas": oracle_shas,
343
+ },
344
+ "required_invariants": entries,
345
+ }
346
+
347
+
348
+ def main():
349
+ ap = argparse.ArgumentParser()
350
+ ap.add_argument(
351
+ "--fixture",
352
+ required=True,
353
+ help="Path to fixture directory (e.g. benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand)",
354
+ )
355
+ ap.add_argument(
356
+ "--scripts-dir",
357
+ default="benchmark/auto-resolve/scripts",
358
+ help="Directory containing oracle-*.py scripts",
359
+ )
360
+ ap.add_argument(
361
+ "--output",
362
+ help="Write to this path (default: stdout)",
363
+ )
364
+ ap.add_argument(
365
+ "--generated-at",
366
+ default=None,
367
+ help="ISO8601 timestamp to embed (default: UTC now); pin to a fixed value for determinism testing",
368
+ )
369
+ ap.add_argument(
370
+ "--repo-root",
371
+ default=None,
372
+ help="Repo root for resolving relative paths in output (default: cwd)",
373
+ )
374
+ args = ap.parse_args()
375
+
376
+ install_path_trap()
377
+
378
+ generated_at = (
379
+ args.generated_at
380
+ or datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
381
+ )
382
+ scripts_dir = pathlib.Path(args.scripts_dir).resolve()
383
+ repo_root = pathlib.Path(args.repo_root or os.getcwd()).resolve()
384
+
385
+ registry = build_registry(args.fixture, scripts_dir, generated_at, repo_root)
386
+ out_text = json.dumps(
387
+ registry,
388
+ indent=2,
389
+ sort_keys=True,
390
+ ensure_ascii=False,
391
+ ) + "\n"
392
+
393
+ if args.output:
394
+ with open(args.output, "w", encoding="utf-8") as f:
395
+ f.write(out_text)
396
+ else:
397
+ sys.stdout.write(out_text)
398
+
399
+
400
+ if __name__ == "__main__":
401
+ main()