devlyn-cli 1.15.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +104 -0
- package/CLAUDE.md +135 -21
- package/README.md +43 -125
- package/benchmark/auto-resolve/BENCHMARK-DESIGN.md +272 -0
- package/benchmark/auto-resolve/README.md +114 -0
- package/benchmark/auto-resolve/RUBRIC.md +162 -0
- package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/NOTES.md +30 -0
- package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/expected.json +68 -0
- package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/setup.sh +4 -0
- package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/spec.md +45 -0
- package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/task.txt +8 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/NOTES.md +54 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/expected-pair-plan-registry.json +170 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/expected.json +84 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/metadata.json +21 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/pair-plan.sample-fail.json +214 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/pair-plan.sample-pass.json +223 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/setup.sh +5 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/spec.md +56 -0
- package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/task.txt +14 -0
- package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/NOTES.md +28 -0
- package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/expected-pair-plan-registry.json +162 -0
- package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/expected.json +65 -0
- package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/metadata.json +19 -0
- package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/setup.sh +4 -0
- package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/spec.md +56 -0
- package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/task.txt +9 -0
- package/benchmark/auto-resolve/fixtures/F4-web-browser-design/NOTES.md +40 -0
- package/benchmark/auto-resolve/fixtures/F4-web-browser-design/expected.json +57 -0
- package/benchmark/auto-resolve/fixtures/F4-web-browser-design/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/F4-web-browser-design/setup.sh +6 -0
- package/benchmark/auto-resolve/fixtures/F4-web-browser-design/spec.md +49 -0
- package/benchmark/auto-resolve/fixtures/F4-web-browser-design/task.txt +9 -0
- package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/NOTES.md +38 -0
- package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/expected.json +65 -0
- package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/setup.sh +55 -0
- package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/spec.md +49 -0
- package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/task.txt +7 -0
- package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/NOTES.md +38 -0
- package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/expected.json +77 -0
- package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/setup.sh +4 -0
- package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/spec.md +49 -0
- package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/task.txt +10 -0
- package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/NOTES.md +50 -0
- package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/expected.json +76 -0
- package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/setup.sh +36 -0
- package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/spec.md +46 -0
- package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/task.txt +7 -0
- package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/NOTES.md +50 -0
- package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/expected.json +63 -0
- package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/setup.sh +4 -0
- package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/spec.md +48 -0
- package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/task.txt +1 -0
- package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/NOTES.md +93 -0
- package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/expected.json +74 -0
- package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/metadata.json +10 -0
- package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/setup.sh +28 -0
- package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/spec.md +62 -0
- package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/task.txt +5 -0
- package/benchmark/auto-resolve/fixtures/SCHEMA.md +130 -0
- package/benchmark/auto-resolve/fixtures/test-repo/README.md +27 -0
- package/benchmark/auto-resolve/fixtures/test-repo/bin/cli.js +63 -0
- package/benchmark/auto-resolve/fixtures/test-repo/package-lock.json +823 -0
- package/benchmark/auto-resolve/fixtures/test-repo/package.json +22 -0
- package/benchmark/auto-resolve/fixtures/test-repo/playwright.config.js +17 -0
- package/benchmark/auto-resolve/fixtures/test-repo/server/index.js +37 -0
- package/benchmark/auto-resolve/fixtures/test-repo/tests/cli.test.js +25 -0
- package/benchmark/auto-resolve/fixtures/test-repo/tests/server.test.js +58 -0
- package/benchmark/auto-resolve/fixtures/test-repo/web/index.html +37 -0
- package/benchmark/auto-resolve/scripts/build-pair-eligible-manifest.py +174 -0
- package/benchmark/auto-resolve/scripts/check-f9-artifacts.py +256 -0
- package/benchmark/auto-resolve/scripts/compile-report.py +331 -0
- package/benchmark/auto-resolve/scripts/iter-0033c-compare.py +552 -0
- package/benchmark/auto-resolve/scripts/judge-opus-pass.sh +430 -0
- package/benchmark/auto-resolve/scripts/judge.sh +359 -0
- package/benchmark/auto-resolve/scripts/oracle-scope-tier-a.py +260 -0
- package/benchmark/auto-resolve/scripts/oracle-scope-tier-b.py +274 -0
- package/benchmark/auto-resolve/scripts/oracle-test-fidelity.py +328 -0
- package/benchmark/auto-resolve/scripts/pair-plan-idgen.py +401 -0
- package/benchmark/auto-resolve/scripts/pair-plan-lint.py +468 -0
- package/benchmark/auto-resolve/scripts/run-fixture.sh +691 -0
- package/benchmark/auto-resolve/scripts/run-iter-0033c.sh +234 -0
- package/benchmark/auto-resolve/scripts/run-suite.sh +214 -0
- package/benchmark/auto-resolve/scripts/ship-gate.py +222 -0
- package/bin/devlyn.js +175 -17
- package/config/skills/_shared/adapters/README.md +64 -0
- package/config/skills/_shared/adapters/gpt-5-5.md +29 -0
- package/config/skills/_shared/adapters/opus-4-7.md +29 -0
- package/config/skills/{devlyn:auto-resolve/scripts → _shared}/archive_run.py +26 -0
- package/config/skills/_shared/codex-config.md +54 -0
- package/config/skills/_shared/codex-monitored.sh +141 -0
- package/config/skills/_shared/engine-preflight.md +35 -0
- package/config/skills/_shared/expected.schema.json +93 -0
- package/config/skills/_shared/pair-plan-schema.md +298 -0
- package/config/skills/_shared/runtime-principles.md +110 -0
- package/config/skills/_shared/spec-verify-check.py +519 -0
- package/config/skills/devlyn:ideate/SKILL.md +99 -429
- package/config/skills/devlyn:ideate/references/elicitation.md +97 -0
- package/config/skills/devlyn:ideate/references/from-spec-mode.md +54 -0
- package/config/skills/devlyn:ideate/references/project-mode.md +76 -0
- package/config/skills/devlyn:ideate/references/spec-template.md +102 -0
- package/config/skills/devlyn:resolve/SKILL.md +172 -184
- package/config/skills/devlyn:resolve/references/free-form-mode.md +68 -0
- package/config/skills/devlyn:resolve/references/phases/build-gate.md +45 -0
- package/config/skills/devlyn:resolve/references/phases/cleanup.md +39 -0
- package/config/skills/devlyn:resolve/references/phases/implement.md +42 -0
- package/config/skills/devlyn:resolve/references/phases/plan.md +42 -0
- package/config/skills/devlyn:resolve/references/phases/verify.md +69 -0
- package/config/skills/devlyn:resolve/references/state-schema.md +106 -0
- package/{config/skills → optional-skills}/devlyn:design-system/SKILL.md +1 -0
- package/{config/skills → optional-skills}/devlyn:reap/SKILL.md +1 -0
- package/{config/skills → optional-skills}/devlyn:team-design-ui/SKILL.md +5 -0
- package/package.json +12 -2
- package/scripts/lint-skills.sh +431 -0
- package/config/skills/devlyn:auto-resolve/SKILL.md +0 -252
- package/config/skills/devlyn:auto-resolve/evals/evals.json +0 -21
- package/config/skills/devlyn:auto-resolve/evals/task-doctor-subcommand.md +0 -42
- package/config/skills/devlyn:auto-resolve/references/build-gate.md +0 -130
- package/config/skills/devlyn:auto-resolve/references/engine-routing.md +0 -82
- package/config/skills/devlyn:auto-resolve/references/findings-schema.md +0 -103
- package/config/skills/devlyn:auto-resolve/references/phases/phase-1-build.md +0 -54
- package/config/skills/devlyn:auto-resolve/references/phases/phase-2-evaluate.md +0 -45
- package/config/skills/devlyn:auto-resolve/references/phases/phase-3-critic.md +0 -84
- package/config/skills/devlyn:auto-resolve/references/pipeline-routing.md +0 -114
- package/config/skills/devlyn:auto-resolve/references/pipeline-state.md +0 -201
- package/config/skills/devlyn:auto-resolve/scripts/terminal_verdict.py +0 -96
- package/config/skills/devlyn:browser-validate/SKILL.md +0 -164
- package/config/skills/devlyn:browser-validate/references/flow-testing.md +0 -118
- package/config/skills/devlyn:browser-validate/references/tier1-chrome.md +0 -137
- package/config/skills/devlyn:browser-validate/references/tier2-playwright.md +0 -195
- package/config/skills/devlyn:browser-validate/references/tier3-curl.md +0 -57
- package/config/skills/devlyn:clean/SKILL.md +0 -285
- package/config/skills/devlyn:design-ui/SKILL.md +0 -351
- package/config/skills/devlyn:discover-product/SKILL.md +0 -124
- package/config/skills/devlyn:evaluate/SKILL.md +0 -564
- package/config/skills/devlyn:feature-spec/SKILL.md +0 -630
- package/config/skills/devlyn:ideate/references/challenge-rubric.md +0 -122
- package/config/skills/devlyn:ideate/references/codex-critic-template.md +0 -42
- package/config/skills/devlyn:ideate/references/templates/item-spec.md +0 -90
- package/config/skills/devlyn:implement-ui/SKILL.md +0 -466
- package/config/skills/devlyn:preflight/SKILL.md +0 -355
- package/config/skills/devlyn:preflight/references/auditors/browser-auditor.md +0 -32
- package/config/skills/devlyn:preflight/references/auditors/code-auditor.md +0 -86
- package/config/skills/devlyn:preflight/references/auditors/docs-auditor.md +0 -38
- package/config/skills/devlyn:product-spec/SKILL.md +0 -603
- package/config/skills/devlyn:recommend-features/SKILL.md +0 -286
- package/config/skills/devlyn:review/SKILL.md +0 -161
- package/config/skills/devlyn:team-resolve/SKILL.md +0 -631
- package/config/skills/devlyn:team-review/SKILL.md +0 -493
- package/config/skills/devlyn:update-docs/SKILL.md +0 -463
- package/config/skills/workflow-routing/SKILL.md +0 -73
- /package/{config/skills → optional-skills}/devlyn:reap/scripts/reap.sh +0 -0
- /package/{config/skills → optional-skills}/devlyn:reap/scripts/scan.sh +0 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
---
|
|
2
|
+
id: "F1-cli-trivial-flag"
|
|
3
|
+
title: "Add --loud flag to hello subcommand"
|
|
4
|
+
status: planned
|
|
5
|
+
complexity: trivial
|
|
6
|
+
depends-on: []
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# F1 Add `--loud` flag to `hello`
|
|
10
|
+
|
|
11
|
+
## Context
|
|
12
|
+
|
|
13
|
+
The `hello` subcommand in `bin/cli.js` currently prints `Hello, <name>!`. A
|
|
14
|
+
`--loud` flag gives users an emphatic variant without breaking the default.
|
|
15
|
+
This is a low-risk edit used to calibrate trivial-tier fixture difficulty.
|
|
16
|
+
|
|
17
|
+
## Requirements
|
|
18
|
+
|
|
19
|
+
- [ ] `node bin/cli.js hello --loud` prints `HELLO, WORLD!!` (everything uppercased, two trailing exclamation marks).
|
|
20
|
+
- [ ] `node bin/cli.js hello --loud --name alice` prints `HELLO, ALICE!!`.
|
|
21
|
+
- [ ] `node bin/cli.js hello` (no flag) still prints `Hello, world!` (unchanged).
|
|
22
|
+
- [ ] `node bin/cli.js hello --name bob` still prints `Hello, bob!` (unchanged).
|
|
23
|
+
- [ ] Existing tests continue to pass. Add at least one test covering the `--loud` path.
|
|
24
|
+
|
|
25
|
+
## Constraints
|
|
26
|
+
|
|
27
|
+
- **No new npm dependencies.** Built-ins only.
|
|
28
|
+
- **No silent catches.** If an unknown flag is passed, exit 1 with an informative message (same pattern as the existing `--name` handler).
|
|
29
|
+
- **Surgical diff.** Only touch `bin/cli.js` and `tests/cli.test.js`. Do not reformat unrelated code.
|
|
30
|
+
|
|
31
|
+
- **Lifecycle note.** The harness's DOCS phase flips this spec's frontmatter `status` after implementation completes — that is benchmark lifecycle bookkeeping, not a scope violation.
|
|
32
|
+
|
|
33
|
+
## Out of Scope
|
|
34
|
+
|
|
35
|
+
- Adding unrelated flags (`--quiet`, `--locale`, etc.).
|
|
36
|
+
- Refactoring the existing argument parser.
|
|
37
|
+
- Touching `server/`, `web/`, or `tests/server.test.js`.
|
|
38
|
+
|
|
39
|
+
## Verification
|
|
40
|
+
|
|
41
|
+
- `node bin/cli.js hello` prints `Hello, world!` (exit 0).
|
|
42
|
+
- `node bin/cli.js hello --loud` prints `HELLO, WORLD!!` (exit 0).
|
|
43
|
+
- `node bin/cli.js hello --loud --name alice` prints `HELLO, ALICE!!` (exit 0).
|
|
44
|
+
- `node --test tests/` passes all tests including the new `--loud` case.
|
|
45
|
+
- `git diff --stat` shows only `bin/cli.js` and `tests/cli.test.js` touched.
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
Add a --loud flag to the `hello` subcommand in bench-test-repo's CLI (bin/cli.js). When --loud is passed, the greeting is uppercased and ends with two exclamation marks.
|
|
2
|
+
|
|
3
|
+
For example:
|
|
4
|
+
- `node bin/cli.js hello --loud` → `HELLO, WORLD!!`
|
|
5
|
+
- `node bin/cli.js hello --loud --name alice` → `HELLO, ALICE!!`
|
|
6
|
+
- `node bin/cli.js hello` → `Hello, world!` (unchanged default)
|
|
7
|
+
|
|
8
|
+
Make sure existing tests still pass and add at least one test for the --loud path. Don't touch unrelated files — only `bin/cli.js` and `tests/cli.test.js`. No new npm dependencies.
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# F2 — Notes
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
|
|
5
|
+
Canonical **medium-complexity single-file CLI task** in the suite. Tests the
|
|
6
|
+
middle-ground: a task big enough that first-draft implementations often miss
|
|
7
|
+
an edge case (EACCES vs missing-dir distinction, TTY gating, HOME guard),
|
|
8
|
+
small enough that every arm can plausibly finish in < 10 minutes.
|
|
9
|
+
|
|
10
|
+
## What failure mode does it detect?
|
|
11
|
+
|
|
12
|
+
- **Silent catches.** The pattern `try { readdirSync(...) } catch { return [] }`
|
|
13
|
+
is a natural shortcut here. Bare prompt arms tend to take it. The pipeline's
|
|
14
|
+
EVAL phase catches it as a `correctness.silent-error` or
|
|
15
|
+
`hygiene.silent-catch` finding.
|
|
16
|
+
- **Edge-case distinction.** ENOENT vs EACCES must be reported differently.
|
|
17
|
+
Arms that collapse both into a generic FAIL miss a spec Requirement.
|
|
18
|
+
- **Over-engineering.** Since v3.6's CRITIC calibration, hand-rolled
|
|
19
|
+
mode-bit writable checks are blocked in favor of `fs.accessSync(...,
|
|
20
|
+
fs.constants.W_OK)`.
|
|
21
|
+
|
|
22
|
+
## Which pipeline phases does it exercise?
|
|
23
|
+
|
|
24
|
+
- Phase 0: routing — `permission`, `env` risk keywords in the task body
|
|
25
|
+
escalate to `strict`.
|
|
26
|
+
- Phase 1 BUILD: main implementation pass.
|
|
27
|
+
- Phase 1.4 BUILD GATE: `node --check` syntax gate.
|
|
28
|
+
- Phase 2 EVAL: catches silent-catch trap if present.
|
|
29
|
+
- Phase 3 CRITIC design: applies stdlib-vs-hand-rolled calibration.
|
|
30
|
+
- Phase 3 CRITIC security (native): minimal — no deps changed.
|
|
31
|
+
- Phase 4 DOCS: spec frontmatter `status: done`.
|
|
32
|
+
|
|
33
|
+
## Why can't another fixture cover this?
|
|
34
|
+
|
|
35
|
+
- F1 is trivial (single-line edit, no edge cases).
|
|
36
|
+
- F3 is backend (different idioms, tests run differently).
|
|
37
|
+
- F5 is designed to force fix-loop (not applicable here).
|
|
38
|
+
- F7 is scope-creep (orthogonal concern).
|
|
39
|
+
|
|
40
|
+
## When should this fixture be retired or replaced?
|
|
41
|
+
|
|
42
|
+
When both arms score > 95 for two consecutive shipped versions — i.e., the
|
|
43
|
+
fixture saturates and no longer differentiates. Candidate replacement: a
|
|
44
|
+
similar-size CLI task with multiple interacting flags or a subcommand that
|
|
45
|
+
spawns a child process.
|
|
46
|
+
|
|
47
|
+
## Calibration history
|
|
48
|
+
|
|
49
|
+
- v3.4 skill 57 / bare 45 / margin +12 (gpt-5.3-codex judge)
|
|
50
|
+
- v3.4.1 skill 59 / bare 43 / margin +16 (gpt-5.3-codex judge)
|
|
51
|
+
- v3.5 skill 92 / bare 81 / margin +11 (gpt-5.4 xhigh judge) — huge absolute jump; bare silent-catch caught
|
|
52
|
+
|
|
53
|
+
Absolute scores jumped with the stronger judge. Margin stays solid (+11
|
|
54
|
+
after stdlib calibration is expected to open a few points more).
|
package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/expected-pair-plan-registry.json
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
{
|
|
2
|
+
"fixture_id": "F2-cli-medium-subcommand",
|
|
3
|
+
"generated_at": "2026-04-29T09:57:53Z",
|
|
4
|
+
"generated_from": {
|
|
5
|
+
"expected_path": "benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/expected.json",
|
|
6
|
+
"expected_sha256": "ddef8feba49f20b6957e37840bc6a03e78e554776e380d81ad6390944c72fcab",
|
|
7
|
+
"metadata_path": "benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/metadata.json",
|
|
8
|
+
"metadata_sha256": "1b8066a7c649eb6baad7a3e056edbdb16cc3b796e154cedee0cf2258c5543b18",
|
|
9
|
+
"oracle_script_shas": {
|
|
10
|
+
"scope-tier-a": "baaf21ed4a67f35d2a8af825e72869ef9737b5dfe08d65dd1a11c26fafe297ae",
|
|
11
|
+
"scope-tier-b": "9349d00a5c7456a4df9142923334e7004407d53f2443f2e210945bb771971e25",
|
|
12
|
+
"test-fidelity": "401184da51ae500cecfc75a6c5819b0d28acb63a397f788fb628c2913562f903"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
"required_invariants": [
|
|
16
|
+
{
|
|
17
|
+
"authority": "expected.json/forbidden_patterns",
|
|
18
|
+
"id": "forbidden_pattern__silent_catch_returning_a_fallback_value_violates_no_silent_c__bin_cli_js",
|
|
19
|
+
"operational_check": "variant arm output MUST NOT contain regex pattern \"catch\\\\s*\\\\([^)]*\\\\)\\\\s*\\\\{[^}]*return\\\\s+(\\\\[\\\\]|null|undefined|\\\\{|false|'')\" in files ['bin/cli.js']; rationale: silent catch returning a fallback value — violates no-silent-catches policy",
|
|
20
|
+
"severity": "disqualifier",
|
|
21
|
+
"source_field": "expected.json/forbidden_patterns/0",
|
|
22
|
+
"source_ref": "expected.json:forbidden_patterns[0]"
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
"authority": "expected.json/forbidden_patterns",
|
|
26
|
+
"id": "forbidden_pattern__ts_ignore_escape_hatch__bin_cli_js",
|
|
27
|
+
"operational_check": "variant arm output MUST NOT contain regex pattern '@ts-ignore' in files ['bin/cli.js']; rationale: @ts-ignore escape hatch",
|
|
28
|
+
"severity": "disqualifier",
|
|
29
|
+
"source_field": "expected.json/forbidden_patterns/1",
|
|
30
|
+
"source_ref": "expected.json:forbidden_patterns[1]"
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"authority": "expected.json/max_deps_added",
|
|
34
|
+
"id": "max_deps_added__0",
|
|
35
|
+
"operational_check": "variant arm MUST NOT add more than 0 new npm dependencies (count delta of package.json:dependencies + devDependencies)",
|
|
36
|
+
"severity": "hard",
|
|
37
|
+
"source_field": "expected.json/max_deps_added",
|
|
38
|
+
"source_ref": "expected.json:max_deps_added"
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"authority": "expected.json/required_files",
|
|
42
|
+
"id": "required_file__bin_cli_js",
|
|
43
|
+
"operational_check": "variant arm output MUST contain file 'bin/cli.js' (created or preserved)",
|
|
44
|
+
"severity": "hard",
|
|
45
|
+
"source_field": "expected.json/required_files",
|
|
46
|
+
"source_ref": "expected.json:required_files[bin/cli.js]"
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
"authority": "metadata/oracle-allowlist",
|
|
50
|
+
"id": "scope-tier-a:lockfile-deletion",
|
|
51
|
+
"operational_check": "variant arm MUST NOT delete a scaffold-present lockfile",
|
|
52
|
+
"severity": "hard",
|
|
53
|
+
"source_field": "oracle/scope-tier-a/scope-tier-a:lockfile-deletion",
|
|
54
|
+
"source_ref": "oracle-scope-tier-a.py"
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
"authority": "metadata/oracle-allowlist",
|
|
58
|
+
"id": "scope-tier-a:tier-a-violation",
|
|
59
|
+
"operational_check": "variant arm MUST NOT add or modify paths matching: docs/roadmap/** | docs/VISION.md | docs/ROADMAP.md | .github/** | node_modules/** | **/node_modules/** | test-results/** | coverage/** | .nyc_output/** | basename suffix .log | basename prefix .env or secrets.",
|
|
60
|
+
"severity": "hard",
|
|
61
|
+
"source_field": "oracle/scope-tier-a/scope-tier-a:tier-a-violation",
|
|
62
|
+
"source_ref": "oracle-scope-tier-a.py"
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
"authority": "metadata/oracle-allowlist",
|
|
66
|
+
"id": "scope-tier-b:scope-unmatched",
|
|
67
|
+
"operational_check": "every variant-touched file MUST be either inside spec_output_files (Tier C) OR reachable from a Tier C seed via static JS/TS imports OR matched by expected.json:tier_a_waivers",
|
|
68
|
+
"severity": "warn",
|
|
69
|
+
"source_field": "oracle/scope-tier-b/scope-tier-b:scope-unmatched",
|
|
70
|
+
"source_ref": "oracle-scope-tier-b.py"
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
"authority": "expected.json/spec_output_files",
|
|
74
|
+
"id": "spec_output_file__bin_cli_js",
|
|
75
|
+
"operational_check": "variant-touched files MUST be inside (or reachable via static imports from) the spec_output_files set; 'bin/cli.js' is one Tier C seed",
|
|
76
|
+
"severity": "warn",
|
|
77
|
+
"source_field": "expected.json/spec_output_files",
|
|
78
|
+
"source_ref": "expected.json:spec_output_files[bin/cli.js]"
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
"authority": "expected.json/spec_output_files",
|
|
82
|
+
"id": "spec_output_file__tests_cli_test_js",
|
|
83
|
+
"operational_check": "variant-touched files MUST be inside (or reachable via static imports from) the spec_output_files set; 'tests/cli.test.js' is one Tier C seed",
|
|
84
|
+
"severity": "warn",
|
|
85
|
+
"source_field": "expected.json/spec_output_files",
|
|
86
|
+
"source_ref": "expected.json:spec_output_files[tests/cli.test.js]"
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
"authority": "metadata/oracle-allowlist",
|
|
90
|
+
"id": "test-fidelity:assertion-regression",
|
|
91
|
+
"operational_check": "effective assertion count MUST NOT drop and skipped-test count MUST NOT rise; vacuous expect.assertions(0) is treated as a real regression",
|
|
92
|
+
"severity": "warn",
|
|
93
|
+
"source_field": "oracle/test-fidelity/test-fidelity:assertion-regression",
|
|
94
|
+
"source_ref": "oracle-test-fidelity.py"
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
"authority": "metadata/oracle-allowlist",
|
|
98
|
+
"id": "test-fidelity:mock-swap",
|
|
99
|
+
"operational_check": "post-arm test file MUST NOT swap REAL_PATTERNS hits for MOCK_PATTERNS hits (jest/vi/sinon, nock/msw, app.handle/inject/callback, hand-rolled IncomingMessage/ServerResponse, etc.); a drop in real_calls combined with a rise in mock_calls is a mock-swap flag",
|
|
100
|
+
"severity": "flag",
|
|
101
|
+
"source_field": "oracle/test-fidelity/test-fidelity:mock-swap",
|
|
102
|
+
"source_ref": "oracle-test-fidelity.py"
|
|
103
|
+
},
|
|
104
|
+
{
|
|
105
|
+
"authority": "metadata/oracle-allowlist",
|
|
106
|
+
"id": "test-fidelity:test-file-deleted",
|
|
107
|
+
"operational_check": "no scaffold-present test file may be deleted by the variant arm; deletion of an existing tests/*.test.* / *.spec.* / *.e2e.* file is a flag-severity finding",
|
|
108
|
+
"severity": "flag",
|
|
109
|
+
"source_field": "oracle/test-fidelity/test-fidelity:test-file-deleted",
|
|
110
|
+
"source_ref": "oracle-test-fidelity.py"
|
|
111
|
+
},
|
|
112
|
+
{
|
|
113
|
+
"authority": "metadata/oracle-allowlist",
|
|
114
|
+
"id": "test-fidelity:test-file-renamed",
|
|
115
|
+
"operational_check": "rename of a scaffold-present test file is warn-severity (content fidelity not verified across renames in step 1)",
|
|
116
|
+
"severity": "warn",
|
|
117
|
+
"source_field": "oracle/test-fidelity/test-fidelity:test-file-renamed",
|
|
118
|
+
"source_ref": "oracle-test-fidelity.py"
|
|
119
|
+
},
|
|
120
|
+
{
|
|
121
|
+
"authority": "expected.json/verification_commands",
|
|
122
|
+
"id": "verification__3f35982a",
|
|
123
|
+
"operational_check": "running `node bin/cli.js doctor` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['doctor:']; stdout MUST NOT contain any of ['undefined', 'Error:']",
|
|
124
|
+
"severity": "hard",
|
|
125
|
+
"source_field": "expected.json/verification_commands/0",
|
|
126
|
+
"source_ref": "expected.json:verification_commands[0]"
|
|
127
|
+
},
|
|
128
|
+
{
|
|
129
|
+
"authority": "expected.json/verification_commands",
|
|
130
|
+
"id": "verification__460fce04",
|
|
131
|
+
"operational_check": "running `HOME=/nonexistent node bin/cli.js doctor` in the post-arm work dir MUST exit with code 1; stdout MUST contain all of ['/nonexistent']; stdout MUST NOT contain any of []",
|
|
132
|
+
"severity": "hard",
|
|
133
|
+
"source_field": "expected.json/verification_commands/1",
|
|
134
|
+
"source_ref": "expected.json:verification_commands[1]"
|
|
135
|
+
},
|
|
136
|
+
{
|
|
137
|
+
"authority": "expected.json/verification_commands",
|
|
138
|
+
"id": "verification__973e287e",
|
|
139
|
+
"operational_check": "running `python3 -c \"import subprocess; r = subprocess.run(['node', 'bin/cli.js', 'doctor'], capture_output=True); n = r.stdout.count(b'\\x1b['); print(n); exit(0 if n == 0 else 1)\"` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['0']; stdout MUST NOT contain any of []",
|
|
140
|
+
"severity": "hard",
|
|
141
|
+
"source_field": "expected.json/verification_commands/2",
|
|
142
|
+
"source_ref": "expected.json:verification_commands[2]"
|
|
143
|
+
},
|
|
144
|
+
{
|
|
145
|
+
"authority": "expected.json/verification_commands",
|
|
146
|
+
"id": "verification__d6253a97",
|
|
147
|
+
"operational_check": "running `node bin/cli.js doctor --help` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['doctor']; stdout MUST NOT contain any of []",
|
|
148
|
+
"severity": "hard",
|
|
149
|
+
"source_field": "expected.json/verification_commands/3",
|
|
150
|
+
"source_ref": "expected.json:verification_commands[3]"
|
|
151
|
+
},
|
|
152
|
+
{
|
|
153
|
+
"authority": "expected.json/verification_commands",
|
|
154
|
+
"id": "verification__e0f149e4",
|
|
155
|
+
"operational_check": "running `node bin/cli.js --help` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['doctor']; stdout MUST NOT contain any of []",
|
|
156
|
+
"severity": "hard",
|
|
157
|
+
"source_field": "expected.json/verification_commands/4",
|
|
158
|
+
"source_ref": "expected.json:verification_commands[4]"
|
|
159
|
+
},
|
|
160
|
+
{
|
|
161
|
+
"authority": "expected.json/verification_commands",
|
|
162
|
+
"id": "verification__fdbcd321",
|
|
163
|
+
"operational_check": "running `node bin/cli.js doctor --verbose` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['doctor:']; stdout MUST NOT contain any of ['Error:']",
|
|
164
|
+
"severity": "hard",
|
|
165
|
+
"source_field": "expected.json/verification_commands/5",
|
|
166
|
+
"source_ref": "expected.json:verification_commands[5]"
|
|
167
|
+
}
|
|
168
|
+
],
|
|
169
|
+
"schema_version": "1"
|
|
170
|
+
}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
{
|
|
2
|
+
"verification_commands": [
|
|
3
|
+
{
|
|
4
|
+
"cmd": "node bin/cli.js doctor",
|
|
5
|
+
"exit_code": 0,
|
|
6
|
+
"stdout_contains": [
|
|
7
|
+
"doctor:"
|
|
8
|
+
],
|
|
9
|
+
"stdout_not_contains": [
|
|
10
|
+
"undefined",
|
|
11
|
+
"Error:"
|
|
12
|
+
]
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
"cmd": "HOME=/nonexistent node bin/cli.js doctor",
|
|
16
|
+
"exit_code": 1,
|
|
17
|
+
"stdout_contains": [
|
|
18
|
+
"/nonexistent"
|
|
19
|
+
],
|
|
20
|
+
"stdout_not_contains": []
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
"cmd": "python3 -c \"import subprocess; r = subprocess.run(['node', 'bin/cli.js', 'doctor'], capture_output=True); n = r.stdout.count(b'\\x1b['); print(n); exit(0 if n == 0 else 1)\"",
|
|
24
|
+
"exit_code": 0,
|
|
25
|
+
"stdout_contains": [
|
|
26
|
+
"0"
|
|
27
|
+
],
|
|
28
|
+
"stdout_not_contains": []
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"cmd": "node bin/cli.js doctor --help",
|
|
32
|
+
"exit_code": 0,
|
|
33
|
+
"stdout_contains": [
|
|
34
|
+
"doctor"
|
|
35
|
+
],
|
|
36
|
+
"stdout_not_contains": []
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
"cmd": "node bin/cli.js --help",
|
|
40
|
+
"exit_code": 0,
|
|
41
|
+
"stdout_contains": [
|
|
42
|
+
"doctor"
|
|
43
|
+
],
|
|
44
|
+
"stdout_not_contains": []
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
"cmd": "node bin/cli.js doctor --verbose",
|
|
48
|
+
"exit_code": 0,
|
|
49
|
+
"stdout_contains": [
|
|
50
|
+
"doctor:"
|
|
51
|
+
],
|
|
52
|
+
"stdout_not_contains": [
|
|
53
|
+
"Error:"
|
|
54
|
+
]
|
|
55
|
+
}
|
|
56
|
+
],
|
|
57
|
+
"forbidden_patterns": [
|
|
58
|
+
{
|
|
59
|
+
"pattern": "catch\\s*\\([^)]*\\)\\s*\\{[^}]*return\\s+(?:\\[\\]|null|undefined|false|''|\\{\\s*\\})",
|
|
60
|
+
"description": "silent catch returning a fallback value (null / undefined / [] / false / '' / empty {}) \u2014 violates no-silent-catches policy. Structured error returns like `return { level: 'fail', message }` are NOT silent (they surface a user-visible failure object) and must not match.",
|
|
61
|
+
"files": [
|
|
62
|
+
"bin/cli.js"
|
|
63
|
+
],
|
|
64
|
+
"severity": "disqualifier"
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
"pattern": "@ts-ignore",
|
|
68
|
+
"description": "@ts-ignore escape hatch",
|
|
69
|
+
"files": [
|
|
70
|
+
"bin/cli.js"
|
|
71
|
+
],
|
|
72
|
+
"severity": "disqualifier"
|
|
73
|
+
}
|
|
74
|
+
],
|
|
75
|
+
"required_files": [
|
|
76
|
+
"bin/cli.js"
|
|
77
|
+
],
|
|
78
|
+
"forbidden_files": [],
|
|
79
|
+
"max_deps_added": 0,
|
|
80
|
+
"spec_output_files": [
|
|
81
|
+
"bin/cli.js",
|
|
82
|
+
"tests/cli.test.js"
|
|
83
|
+
]
|
|
84
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "F2-cli-medium-subcommand",
|
|
3
|
+
"category": "medium",
|
|
4
|
+
"difficulty": "medium",
|
|
5
|
+
"timeout_seconds": 1500,
|
|
6
|
+
"required_tools": [
|
|
7
|
+
"node"
|
|
8
|
+
],
|
|
9
|
+
"browser": false,
|
|
10
|
+
"deps_change_expected": false,
|
|
11
|
+
"intent": "Add a `doctor` subcommand to bin/cli.js that diagnoses the local environment: node version check, $HOME/.claude directory check, installed plugins count, installed skills count, TTY-gated ANSI color, summary line, exit code, --verbose flag, help integration. Zero new npm dependencies. No silent error catches.",
|
|
12
|
+
"pair_plan_oracle_categories": [
|
|
13
|
+
"scope-tier-a:lockfile-deletion",
|
|
14
|
+
"scope-tier-a:tier-a-violation",
|
|
15
|
+
"scope-tier-b:scope-unmatched",
|
|
16
|
+
"test-fidelity:assertion-regression",
|
|
17
|
+
"test-fidelity:mock-swap",
|
|
18
|
+
"test-fidelity:test-file-deleted",
|
|
19
|
+
"test-fidelity:test-file-renamed"
|
|
20
|
+
]
|
|
21
|
+
}
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
{
|
|
2
|
+
"accepted_invariants": [
|
|
3
|
+
{
|
|
4
|
+
"authority": "expected.json/forbidden_patterns",
|
|
5
|
+
"id": "forbidden_pattern__ts_ignore_escape_hatch__bin_cli_js",
|
|
6
|
+
"operational_check": "variant arm output MUST NOT contain regex pattern '@ts-ignore' in files ['bin/cli.js']; rationale: @ts-ignore escape hatch",
|
|
7
|
+
"paraphrase": "variant arm output MUST NOT contain regex pattern '@ts-ignore' in files ['bin/cli.js']; rationale: @ts-ignore escape hat",
|
|
8
|
+
"source_refs": [
|
|
9
|
+
"expected.json:forbidden_patterns[1]"
|
|
10
|
+
]
|
|
11
|
+
},
|
|
12
|
+
{
|
|
13
|
+
"authority": "expected.json/max_deps_added",
|
|
14
|
+
"id": "max_deps_added__0",
|
|
15
|
+
"operational_check": "variant arm MUST NOT add more than 0 new npm dependencies (count delta of package.json:dependencies + devDependencies)",
|
|
16
|
+
"paraphrase": "variant arm MUST NOT add more than 0 new npm dependencies (count delta of package.json:dependencies + devDependencies)",
|
|
17
|
+
"source_refs": [
|
|
18
|
+
"expected.json:max_deps_added"
|
|
19
|
+
]
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
"authority": "expected.json/required_files",
|
|
23
|
+
"id": "required_file__bin_cli_js",
|
|
24
|
+
"operational_check": "variant arm output MUST contain file 'bin/cli.js' (created or preserved)",
|
|
25
|
+
"paraphrase": "variant arm output MUST contain file 'bin/cli.js' (created or preserved)",
|
|
26
|
+
"source_refs": [
|
|
27
|
+
"expected.json:required_files[bin/cli.js]"
|
|
28
|
+
]
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"authority": "metadata/oracle-allowlist",
|
|
32
|
+
"id": "scope-tier-a:lockfile-deletion",
|
|
33
|
+
"operational_check": "variant arm MUST NOT delete a scaffold-present lockfile",
|
|
34
|
+
"paraphrase": "variant arm MUST NOT delete a scaffold-present lockfile",
|
|
35
|
+
"source_refs": [
|
|
36
|
+
"oracle-scope-tier-a.py"
|
|
37
|
+
]
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
"authority": "metadata/oracle-allowlist",
|
|
41
|
+
"id": "scope-tier-a:tier-a-violation",
|
|
42
|
+
"operational_check": "variant arm MUST NOT add or modify paths matching: docs/roadmap/** | docs/VISION.md | docs/ROADMAP.md | .github/** | node_modules/** | **/node_modules/** | test-results/** | coverage/** | .nyc_output/** | basename suffix .log | basename prefix .env or secrets.",
|
|
43
|
+
"paraphrase": "variant arm MUST NOT add or modify paths matching: docs/roadmap/** | docs/VISION.md | docs/ROADMAP.md | .github/** | nod",
|
|
44
|
+
"source_refs": [
|
|
45
|
+
"oracle-scope-tier-a.py"
|
|
46
|
+
]
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
"authority": "metadata/oracle-allowlist",
|
|
50
|
+
"id": "scope-tier-b:scope-unmatched",
|
|
51
|
+
"operational_check": "every variant-touched file MUST be either inside spec_output_files (Tier C) OR reachable from a Tier C seed via static JS/TS imports OR matched by expected.json:tier_a_waivers",
|
|
52
|
+
"paraphrase": "every variant-touched file MUST be either inside spec_output_files (Tier C) OR reachable from a Tier C seed via static J",
|
|
53
|
+
"source_refs": [
|
|
54
|
+
"oracle-scope-tier-b.py"
|
|
55
|
+
]
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
"authority": "expected.json/spec_output_files",
|
|
59
|
+
"id": "spec_output_file__bin_cli_js",
|
|
60
|
+
"operational_check": "variant-touched files MUST be inside (or reachable via static imports from) the spec_output_files set; 'bin/cli.js' is one Tier C seed",
|
|
61
|
+
"paraphrase": "variant-touched files MUST be inside (or reachable via static imports from) the spec_output_files set; 'bin/cli.js' is o",
|
|
62
|
+
"source_refs": [
|
|
63
|
+
"expected.json:spec_output_files[bin/cli.js]"
|
|
64
|
+
]
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
"authority": "expected.json/spec_output_files",
|
|
68
|
+
"id": "spec_output_file__tests_cli_test_js",
|
|
69
|
+
"operational_check": "variant-touched files MUST be inside (or reachable via static imports from) the spec_output_files set; 'tests/cli.test.js' is one Tier C seed",
|
|
70
|
+
"paraphrase": "variant-touched files MUST be inside (or reachable via static imports from) the spec_output_files set; 'tests/cli.test.j",
|
|
71
|
+
"source_refs": [
|
|
72
|
+
"expected.json:spec_output_files[tests/cli.test.js]"
|
|
73
|
+
]
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
"authority": "metadata/oracle-allowlist",
|
|
77
|
+
"id": "test-fidelity:assertion-regression",
|
|
78
|
+
"operational_check": "effective assertion count MUST NOT drop and skipped-test count MUST NOT rise; vacuous expect.assertions(0) is treated as a real regression",
|
|
79
|
+
"paraphrase": "effective assertion count MUST NOT drop and skipped-test count MUST NOT rise; vacuous expect.assertions(0) is treated as",
|
|
80
|
+
"source_refs": [
|
|
81
|
+
"oracle-test-fidelity.py"
|
|
82
|
+
]
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
"authority": "metadata/oracle-allowlist",
|
|
86
|
+
"id": "test-fidelity:mock-swap",
|
|
87
|
+
"operational_check": "post-arm test file MUST NOT swap REAL_PATTERNS hits for MOCK_PATTERNS hits (jest/vi/sinon, nock/msw, app.handle/inject/callback, hand-rolled IncomingMessage/ServerResponse, etc.); a drop in real_calls combined with a rise in mock_calls is a mock-swap flag",
|
|
88
|
+
"paraphrase": "post-arm test file MUST NOT swap REAL_PATTERNS hits for MOCK_PATTERNS hits (jest/vi/sinon, nock/msw, app.handle/inject/c",
|
|
89
|
+
"source_refs": [
|
|
90
|
+
"oracle-test-fidelity.py"
|
|
91
|
+
]
|
|
92
|
+
},
|
|
93
|
+
{
|
|
94
|
+
"authority": "metadata/oracle-allowlist",
|
|
95
|
+
"id": "test-fidelity:test-file-deleted",
|
|
96
|
+
"operational_check": "no scaffold-present test file may be deleted by the variant arm; deletion of an existing tests/*.test.* / *.spec.* / *.e2e.* file is a flag-severity finding",
|
|
97
|
+
"paraphrase": "no scaffold-present test file may be deleted by the variant arm; deletion of an existing tests/*.test.* / *.spec.* / *.e",
|
|
98
|
+
"source_refs": [
|
|
99
|
+
"oracle-test-fidelity.py"
|
|
100
|
+
]
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
"authority": "metadata/oracle-allowlist",
|
|
104
|
+
"id": "test-fidelity:test-file-renamed",
|
|
105
|
+
"operational_check": "rename of a scaffold-present test file is warn-severity (content fidelity not verified across renames in step 1)",
|
|
106
|
+
"paraphrase": "rename of a scaffold-present test file is warn-severity (content fidelity not verified across renames in step 1)",
|
|
107
|
+
"source_refs": [
|
|
108
|
+
"oracle-test-fidelity.py"
|
|
109
|
+
]
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
"authority": "expected.json/verification_commands",
|
|
113
|
+
"id": "verification__3f35982a",
|
|
114
|
+
"operational_check": "running `node bin/cli.js doctor` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['doctor:']; stdout MUST NOT contain any of ['undefined', 'Error:']",
|
|
115
|
+
"paraphrase": "running `node bin/cli.js doctor` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['doctor:'];",
|
|
116
|
+
"source_refs": [
|
|
117
|
+
"expected.json:verification_commands[0]"
|
|
118
|
+
]
|
|
119
|
+
},
|
|
120
|
+
{
|
|
121
|
+
"authority": "expected.json/verification_commands",
|
|
122
|
+
"id": "verification__460fce04",
|
|
123
|
+
"operational_check": "running `HOME=/nonexistent node bin/cli.js doctor` in the post-arm work dir MUST exit with code 1; stdout MUST contain all of ['/nonexistent']; stdout MUST NOT contain any of []",
|
|
124
|
+
"paraphrase": "running `HOME=/nonexistent node bin/cli.js doctor` in the post-arm work dir MUST exit with code 1; stdout MUST contain a",
|
|
125
|
+
"source_refs": [
|
|
126
|
+
"expected.json:verification_commands[1]"
|
|
127
|
+
]
|
|
128
|
+
},
|
|
129
|
+
{
|
|
130
|
+
"authority": "expected.json/verification_commands",
|
|
131
|
+
"id": "verification__973e287e",
|
|
132
|
+
"operational_check": "running `python3 -c \"import subprocess; r = subprocess.run(['node', 'bin/cli.js', 'doctor'], capture_output=True); n = r.stdout.count(b'\\x1b['); print(n); exit(0 if n == 0 else 1)\"` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['0']; stdout MUST NOT contain any of []",
|
|
133
|
+
"paraphrase": "running `python3 -c \"import subprocess; r = subprocess.run(['node', 'bin/cli.js', 'doctor'], capture_output=True); n = r",
|
|
134
|
+
"source_refs": [
|
|
135
|
+
"expected.json:verification_commands[2]"
|
|
136
|
+
]
|
|
137
|
+
},
|
|
138
|
+
{
|
|
139
|
+
"authority": "expected.json/verification_commands",
|
|
140
|
+
"id": "verification__d6253a97",
|
|
141
|
+
"operational_check": "running `node bin/cli.js doctor --help` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['doctor']; stdout MUST NOT contain any of []",
|
|
142
|
+
"paraphrase": "running `node bin/cli.js doctor --help` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['doc",
|
|
143
|
+
"source_refs": [
|
|
144
|
+
"expected.json:verification_commands[3]"
|
|
145
|
+
]
|
|
146
|
+
},
|
|
147
|
+
{
|
|
148
|
+
"authority": "expected.json/verification_commands",
|
|
149
|
+
"id": "verification__e0f149e4",
|
|
150
|
+
"operational_check": "running `node bin/cli.js --help` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['doctor']; stdout MUST NOT contain any of []",
|
|
151
|
+
"paraphrase": "running `node bin/cli.js --help` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['doctor']; ",
|
|
152
|
+
"source_refs": [
|
|
153
|
+
"expected.json:verification_commands[4]"
|
|
154
|
+
]
|
|
155
|
+
},
|
|
156
|
+
{
|
|
157
|
+
"authority": "expected.json/verification_commands",
|
|
158
|
+
"id": "verification__fdbcd321",
|
|
159
|
+
"operational_check": "running `node bin/cli.js doctor --verbose` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['doctor:']; stdout MUST NOT contain any of ['Error:']",
|
|
160
|
+
"paraphrase": "running `node bin/cli.js doctor --verbose` in the post-arm work dir MUST exit with code 0; stdout MUST contain all of ['",
|
|
161
|
+
"source_refs": [
|
|
162
|
+
"expected.json:verification_commands[5]"
|
|
163
|
+
]
|
|
164
|
+
}
|
|
165
|
+
],
|
|
166
|
+
"authority_order": [
|
|
167
|
+
"spec.md",
|
|
168
|
+
"expected.json/rubric",
|
|
169
|
+
"phase prompt",
|
|
170
|
+
"model preference"
|
|
171
|
+
],
|
|
172
|
+
"escalated_to_user": [],
|
|
173
|
+
"fixture_id": "F2-cli-medium-subcommand",
|
|
174
|
+
"model_stamps": {
|
|
175
|
+
"claude": {
|
|
176
|
+
"blocked_ids": [],
|
|
177
|
+
"model": "claude-opus-4-7",
|
|
178
|
+
"signed_plan_sha256": "05d19dc09a1c8820f58afcd091c2cd20888f7bf1141af4ec451a69723af0588a",
|
|
179
|
+
"status": "sign",
|
|
180
|
+
"timestamp": "2026-04-29T18:30:00Z"
|
|
181
|
+
},
|
|
182
|
+
"codex": {
|
|
183
|
+
"blocked_ids": [],
|
|
184
|
+
"model": "gpt-5.5",
|
|
185
|
+
"signed_plan_sha256": "05d19dc09a1c8820f58afcd091c2cd20888f7bf1141af4ec451a69723af0588a",
|
|
186
|
+
"status": "sign",
|
|
187
|
+
"timestamp": "2026-04-29T18:31:00Z"
|
|
188
|
+
}
|
|
189
|
+
},
|
|
190
|
+
"plan_status": "final",
|
|
191
|
+
"planning_mode": "pair",
|
|
192
|
+
"rejected_alternatives": [],
|
|
193
|
+
"rounds": [
|
|
194
|
+
{
|
|
195
|
+
"claude_draft_sha256": "0000000000000000000000000000000000000000000000000000000000000000",
|
|
196
|
+
"codex_draft_sha256": "1111111111111111111111111111111111111111111111111111111111111111",
|
|
197
|
+
"merged_sha256": "2222222222222222222222222222222222222222222222222222222222222222",
|
|
198
|
+
"note": "sample-pass synthetic round (test fixture)",
|
|
199
|
+
"round": 1
|
|
200
|
+
}
|
|
201
|
+
],
|
|
202
|
+
"schema_version": "1",
|
|
203
|
+
"source": {
|
|
204
|
+
"canonical_id_registry_path": "benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/expected-pair-plan-registry.json",
|
|
205
|
+
"canonical_id_registry_sha256": "98ac16e4536ea3ef2e51d3c728982c014211c193a742cea74f1331e4fbba76be",
|
|
206
|
+
"expected_path": "benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/expected.json",
|
|
207
|
+
"expected_sha256": "ddef8feba49f20b6957e37840bc6a03e78e554776e380d81ad6390944c72fcab",
|
|
208
|
+
"rubric_path": "benchmark/auto-resolve/RUBRIC.md",
|
|
209
|
+
"rubric_sha256": "5b5b709a0b57f7e6f4fbc072af91e1edbc8d7910ae16b9b7be7170616aeaa9af",
|
|
210
|
+
"spec_path": "benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/spec.md",
|
|
211
|
+
"spec_sha256": "9b0949c2afd4a522de2bdbbf267d93907fd908bf0f1d0dc5e111ee30ba875bb7"
|
|
212
|
+
},
|
|
213
|
+
"unresolved": []
|
|
214
|
+
}
|