devlyn-cli 1.15.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. package/AGENTS.md +104 -0
  2. package/CLAUDE.md +135 -21
  3. package/README.md +43 -125
  4. package/benchmark/auto-resolve/BENCHMARK-DESIGN.md +272 -0
  5. package/benchmark/auto-resolve/README.md +114 -0
  6. package/benchmark/auto-resolve/RUBRIC.md +162 -0
  7. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/NOTES.md +30 -0
  8. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/expected.json +68 -0
  9. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/metadata.json +10 -0
  10. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/setup.sh +4 -0
  11. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/spec.md +45 -0
  12. package/benchmark/auto-resolve/fixtures/F1-cli-trivial-flag/task.txt +8 -0
  13. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/NOTES.md +54 -0
  14. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/expected-pair-plan-registry.json +170 -0
  15. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/expected.json +84 -0
  16. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/metadata.json +21 -0
  17. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/pair-plan.sample-fail.json +214 -0
  18. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/pair-plan.sample-pass.json +223 -0
  19. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/setup.sh +5 -0
  20. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/spec.md +56 -0
  21. package/benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand/task.txt +14 -0
  22. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/NOTES.md +28 -0
  23. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/expected-pair-plan-registry.json +162 -0
  24. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/expected.json +65 -0
  25. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/metadata.json +19 -0
  26. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/setup.sh +4 -0
  27. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/spec.md +56 -0
  28. package/benchmark/auto-resolve/fixtures/F3-backend-contract-risk/task.txt +9 -0
  29. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/NOTES.md +40 -0
  30. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/expected.json +57 -0
  31. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/metadata.json +10 -0
  32. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/setup.sh +6 -0
  33. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/spec.md +49 -0
  34. package/benchmark/auto-resolve/fixtures/F4-web-browser-design/task.txt +9 -0
  35. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/NOTES.md +38 -0
  36. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/expected.json +65 -0
  37. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/metadata.json +10 -0
  38. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/setup.sh +55 -0
  39. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/spec.md +49 -0
  40. package/benchmark/auto-resolve/fixtures/F5-fix-loop-red-green/task.txt +7 -0
  41. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/NOTES.md +38 -0
  42. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/expected.json +77 -0
  43. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/metadata.json +10 -0
  44. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/setup.sh +4 -0
  45. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/spec.md +49 -0
  46. package/benchmark/auto-resolve/fixtures/F6-dep-audit-native-module/task.txt +10 -0
  47. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/NOTES.md +50 -0
  48. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/expected.json +76 -0
  49. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/metadata.json +10 -0
  50. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/setup.sh +36 -0
  51. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/spec.md +46 -0
  52. package/benchmark/auto-resolve/fixtures/F7-out-of-scope-trap/task.txt +7 -0
  53. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/NOTES.md +50 -0
  54. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/expected.json +63 -0
  55. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/metadata.json +10 -0
  56. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/setup.sh +4 -0
  57. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/spec.md +48 -0
  58. package/benchmark/auto-resolve/fixtures/F8-known-limit-ambiguous/task.txt +1 -0
  59. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/NOTES.md +93 -0
  60. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/expected.json +74 -0
  61. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/metadata.json +10 -0
  62. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/setup.sh +28 -0
  63. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/spec.md +62 -0
  64. package/benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve/task.txt +5 -0
  65. package/benchmark/auto-resolve/fixtures/SCHEMA.md +130 -0
  66. package/benchmark/auto-resolve/fixtures/test-repo/README.md +27 -0
  67. package/benchmark/auto-resolve/fixtures/test-repo/bin/cli.js +63 -0
  68. package/benchmark/auto-resolve/fixtures/test-repo/package-lock.json +823 -0
  69. package/benchmark/auto-resolve/fixtures/test-repo/package.json +22 -0
  70. package/benchmark/auto-resolve/fixtures/test-repo/playwright.config.js +17 -0
  71. package/benchmark/auto-resolve/fixtures/test-repo/server/index.js +37 -0
  72. package/benchmark/auto-resolve/fixtures/test-repo/tests/cli.test.js +25 -0
  73. package/benchmark/auto-resolve/fixtures/test-repo/tests/server.test.js +58 -0
  74. package/benchmark/auto-resolve/fixtures/test-repo/web/index.html +37 -0
  75. package/benchmark/auto-resolve/scripts/build-pair-eligible-manifest.py +174 -0
  76. package/benchmark/auto-resolve/scripts/check-f9-artifacts.py +256 -0
  77. package/benchmark/auto-resolve/scripts/compile-report.py +331 -0
  78. package/benchmark/auto-resolve/scripts/iter-0033c-compare.py +552 -0
  79. package/benchmark/auto-resolve/scripts/judge-opus-pass.sh +430 -0
  80. package/benchmark/auto-resolve/scripts/judge.sh +359 -0
  81. package/benchmark/auto-resolve/scripts/oracle-scope-tier-a.py +260 -0
  82. package/benchmark/auto-resolve/scripts/oracle-scope-tier-b.py +274 -0
  83. package/benchmark/auto-resolve/scripts/oracle-test-fidelity.py +328 -0
  84. package/benchmark/auto-resolve/scripts/pair-plan-idgen.py +401 -0
  85. package/benchmark/auto-resolve/scripts/pair-plan-lint.py +468 -0
  86. package/benchmark/auto-resolve/scripts/run-fixture.sh +691 -0
  87. package/benchmark/auto-resolve/scripts/run-iter-0033c.sh +234 -0
  88. package/benchmark/auto-resolve/scripts/run-suite.sh +214 -0
  89. package/benchmark/auto-resolve/scripts/ship-gate.py +222 -0
  90. package/bin/devlyn.js +175 -17
  91. package/config/skills/_shared/adapters/README.md +64 -0
  92. package/config/skills/_shared/adapters/gpt-5-5.md +29 -0
  93. package/config/skills/_shared/adapters/opus-4-7.md +29 -0
  94. package/config/skills/{devlyn:auto-resolve/scripts → _shared}/archive_run.py +26 -0
  95. package/config/skills/_shared/codex-config.md +54 -0
  96. package/config/skills/_shared/codex-monitored.sh +141 -0
  97. package/config/skills/_shared/engine-preflight.md +35 -0
  98. package/config/skills/_shared/expected.schema.json +93 -0
  99. package/config/skills/_shared/pair-plan-schema.md +298 -0
  100. package/config/skills/_shared/runtime-principles.md +110 -0
  101. package/config/skills/_shared/spec-verify-check.py +519 -0
  102. package/config/skills/devlyn:ideate/SKILL.md +99 -429
  103. package/config/skills/devlyn:ideate/references/elicitation.md +97 -0
  104. package/config/skills/devlyn:ideate/references/from-spec-mode.md +54 -0
  105. package/config/skills/devlyn:ideate/references/project-mode.md +76 -0
  106. package/config/skills/devlyn:ideate/references/spec-template.md +102 -0
  107. package/config/skills/devlyn:resolve/SKILL.md +172 -184
  108. package/config/skills/devlyn:resolve/references/free-form-mode.md +68 -0
  109. package/config/skills/devlyn:resolve/references/phases/build-gate.md +45 -0
  110. package/config/skills/devlyn:resolve/references/phases/cleanup.md +39 -0
  111. package/config/skills/devlyn:resolve/references/phases/implement.md +42 -0
  112. package/config/skills/devlyn:resolve/references/phases/plan.md +42 -0
  113. package/config/skills/devlyn:resolve/references/phases/verify.md +69 -0
  114. package/config/skills/devlyn:resolve/references/state-schema.md +106 -0
  115. package/{config/skills → optional-skills}/devlyn:design-system/SKILL.md +1 -0
  116. package/{config/skills → optional-skills}/devlyn:reap/SKILL.md +1 -0
  117. package/{config/skills → optional-skills}/devlyn:team-design-ui/SKILL.md +5 -0
  118. package/package.json +12 -2
  119. package/scripts/lint-skills.sh +431 -0
  120. package/config/skills/devlyn:auto-resolve/SKILL.md +0 -252
  121. package/config/skills/devlyn:auto-resolve/evals/evals.json +0 -21
  122. package/config/skills/devlyn:auto-resolve/evals/task-doctor-subcommand.md +0 -42
  123. package/config/skills/devlyn:auto-resolve/references/build-gate.md +0 -130
  124. package/config/skills/devlyn:auto-resolve/references/engine-routing.md +0 -82
  125. package/config/skills/devlyn:auto-resolve/references/findings-schema.md +0 -103
  126. package/config/skills/devlyn:auto-resolve/references/phases/phase-1-build.md +0 -54
  127. package/config/skills/devlyn:auto-resolve/references/phases/phase-2-evaluate.md +0 -45
  128. package/config/skills/devlyn:auto-resolve/references/phases/phase-3-critic.md +0 -84
  129. package/config/skills/devlyn:auto-resolve/references/pipeline-routing.md +0 -114
  130. package/config/skills/devlyn:auto-resolve/references/pipeline-state.md +0 -201
  131. package/config/skills/devlyn:auto-resolve/scripts/terminal_verdict.py +0 -96
  132. package/config/skills/devlyn:browser-validate/SKILL.md +0 -164
  133. package/config/skills/devlyn:browser-validate/references/flow-testing.md +0 -118
  134. package/config/skills/devlyn:browser-validate/references/tier1-chrome.md +0 -137
  135. package/config/skills/devlyn:browser-validate/references/tier2-playwright.md +0 -195
  136. package/config/skills/devlyn:browser-validate/references/tier3-curl.md +0 -57
  137. package/config/skills/devlyn:clean/SKILL.md +0 -285
  138. package/config/skills/devlyn:design-ui/SKILL.md +0 -351
  139. package/config/skills/devlyn:discover-product/SKILL.md +0 -124
  140. package/config/skills/devlyn:evaluate/SKILL.md +0 -564
  141. package/config/skills/devlyn:feature-spec/SKILL.md +0 -630
  142. package/config/skills/devlyn:ideate/references/challenge-rubric.md +0 -122
  143. package/config/skills/devlyn:ideate/references/codex-critic-template.md +0 -42
  144. package/config/skills/devlyn:ideate/references/templates/item-spec.md +0 -90
  145. package/config/skills/devlyn:implement-ui/SKILL.md +0 -466
  146. package/config/skills/devlyn:preflight/SKILL.md +0 -355
  147. package/config/skills/devlyn:preflight/references/auditors/browser-auditor.md +0 -32
  148. package/config/skills/devlyn:preflight/references/auditors/code-auditor.md +0 -86
  149. package/config/skills/devlyn:preflight/references/auditors/docs-auditor.md +0 -38
  150. package/config/skills/devlyn:product-spec/SKILL.md +0 -603
  151. package/config/skills/devlyn:recommend-features/SKILL.md +0 -286
  152. package/config/skills/devlyn:review/SKILL.md +0 -161
  153. package/config/skills/devlyn:team-resolve/SKILL.md +0 -631
  154. package/config/skills/devlyn:team-review/SKILL.md +0 -493
  155. package/config/skills/devlyn:update-docs/SKILL.md +0 -463
  156. package/config/skills/workflow-routing/SKILL.md +0 -73
  157. /package/{config/skills → optional-skills}/devlyn:reap/scripts/reap.sh +0 -0
  158. /package/{config/skills → optional-skills}/devlyn:reap/scripts/scan.sh +0 -0
@@ -0,0 +1,431 @@
1
+ #!/usr/bin/env bash
2
+ # lint-skills.sh — structural quality checks for the devlyn harness.
3
+ #
4
+ # Gates the three things that have drifted in the past:
5
+ # 1. Forbidden MCP / stale-model references in skills, README, installer.
6
+ # 2. Missing `name:` in skill frontmatter (Anthropic spec violation).
7
+ # 3. Source ↔ installed mirror drift on the harness critical path.
8
+ #
9
+ # Exit 0 = clean. Non-zero = fails; prints offending file:line per check.
10
+
11
+ set -u
12
+ cd "$(dirname "$0")/.."
13
+
14
+ red=$(printf '\033[31m'); green=$(printf '\033[32m'); dim=$(printf '\033[2m'); reset=$(printf '\033[0m')
15
+ fail=0
16
+
17
+ section() { printf '\n%s=== %s ===%s\n' "$dim" "$1" "$reset"; }
18
+ ok() { printf ' %s✓%s %s\n' "$green" "$reset" "$1"; }
19
+ bad() { printf ' %s✗%s %s\n' "$red" "$reset" "$1"; fail=1; }
20
+
21
+ # ---------------------------------------------------------------------------
22
+ # 1. No MCP references in managed source or user-facing docs.
23
+ # ---------------------------------------------------------------------------
24
+ section "Check 1: No mcp__codex-cli__ outside _shared / archive"
25
+ # Legal places: config/skills/_shared/codex-config.md (explicitly says "MCP is not used"),
26
+ # archival snapshots, and tests.
27
+ offenders=$(grep -RIln 'mcp__codex-cli__' \
28
+ config/skills \
29
+ benchmark \
30
+ README.md \
31
+ CLAUDE.md \
32
+ bin/ 2>/dev/null \
33
+ | grep -v 'config/skills/_shared/codex-config.md' \
34
+ | grep -v 'config/skills/roadmap-archival-workspace/' \
35
+ | grep -v 'config/skills/devlyn:auto-resolve-workspace/' \
36
+ | grep -v 'config/skills/devlyn:ideate-workspace/' \
37
+ | grep -v 'config/skills/preflight-workspace/' \
38
+ | grep -v 'benchmark/auto-resolve/PILOT-RESULTS' \
39
+ || true)
40
+ if [ -z "$offenders" ]; then
41
+ ok "no MCP references in managed files"
42
+ else
43
+ while IFS= read -r f; do bad "$f"; done <<< "$offenders"
44
+ fi
45
+
46
+ # ---------------------------------------------------------------------------
47
+ # 2. No "Requires Codex MCP" prose.
48
+ # ---------------------------------------------------------------------------
49
+ section "Check 2: No 'Requires Codex MCP' prose"
50
+ offenders=$(grep -RIln 'Requires Codex MCP\|Codex MCP server\|Codex MCP available\|Codex MCP disconnected' \
51
+ config/skills benchmark README.md CLAUDE.md bin/ 2>/dev/null \
52
+ | grep -v 'config/skills/roadmap-archival-workspace/' \
53
+ | grep -v 'config/skills/devlyn:auto-resolve-workspace/' \
54
+ | grep -v 'config/skills/devlyn:ideate-workspace/' \
55
+ | grep -v 'config/skills/preflight-workspace/' \
56
+ | grep -v 'benchmark/auto-resolve/PILOT-RESULTS' \
57
+ || true)
58
+ if [ -z "$offenders" ]; then
59
+ ok "no Codex MCP prose"
60
+ else
61
+ while IFS= read -r f; do bad "$f"; done <<< "$offenders"
62
+ fi
63
+
64
+ # ---------------------------------------------------------------------------
65
+ # 3. No stale model strings (gpt-5.0..5.4 hardcoded outside config).
66
+ # ---------------------------------------------------------------------------
67
+ section "Check 3: No hardcoded pre-5.5 model strings"
68
+ offenders=$(grep -RInE 'gpt-5\.[0-4][^.]' \
69
+ config/skills CLAUDE.md README.md 2>/dev/null \
70
+ | grep -v 'config/skills/_shared/codex-config.md' \
71
+ | grep -v 'config/skills/roadmap-archival-workspace/' \
72
+ | grep -v 'config/skills/devlyn:auto-resolve-workspace/' \
73
+ | grep -v 'config/skills/devlyn:ideate-workspace/' \
74
+ | grep -v 'config/skills/preflight-workspace/' \
75
+ | grep -v 'evals\.json' \
76
+ || true)
77
+ if [ -z "$offenders" ]; then
78
+ ok "no hardcoded pre-5.5 strings"
79
+ else
80
+ while IFS= read -r f; do bad "$f"; done <<< "$offenders"
81
+ fi
82
+
83
+ # ---------------------------------------------------------------------------
84
+ # 4. No stale Opus 4.6 benchmark references (should be 4.7 after P1).
85
+ # ---------------------------------------------------------------------------
86
+ section "Check 4: No stale 'Claude Opus 4.6' in routing table"
87
+ offenders=$(grep -RIln 'Claude Opus 4\.6' \
88
+ config/skills 2>/dev/null \
89
+ | grep -v 'config/skills/roadmap-archival-workspace/' \
90
+ | grep -v 'config/skills/devlyn:auto-resolve-workspace/' \
91
+ | grep -v 'config/skills/devlyn:ideate-workspace/' \
92
+ | grep -v 'config/skills/preflight-workspace/' \
93
+ || true)
94
+ if [ -z "$offenders" ]; then
95
+ ok "routing table on Opus 4.7"
96
+ else
97
+ while IFS= read -r f; do bad "$f"; done <<< "$offenders"
98
+ fi
99
+
100
+ # ---------------------------------------------------------------------------
101
+ # 5. Every devlyn:* skill has `name:` in frontmatter.
102
+ # ---------------------------------------------------------------------------
103
+ section "Check 5: devlyn:* SKILL.md has name: field"
104
+ missing=0
105
+ for skill in config/skills/devlyn:*/SKILL.md; do
106
+ [ -f "$skill" ] || continue
107
+ if ! head -20 "$skill" | grep -q '^name:'; then
108
+ bad "$skill — missing 'name:' in frontmatter"
109
+ missing=1
110
+ fi
111
+ done
112
+ if [ $missing -eq 0 ]; then
113
+ ok "all devlyn:* skills have name: field"
114
+ fi
115
+
116
+ # ---------------------------------------------------------------------------
117
+ # 6. Source ↔ installed mirror parity on critical path.
118
+ # Only runs if .claude/skills exists (i.e. installer has been run).
119
+ # ---------------------------------------------------------------------------
120
+ section "Check 6: Source ↔ installed mirror parity (critical path)"
121
+ if [ ! -d .claude/skills ]; then
122
+ ok "no .claude/skills (fresh checkout) — skipping parity check"
123
+ else
124
+ drift=0
125
+ # iter-0034 Phase 4 cutover (2026-05-03): legacy skill paths dropped.
126
+ # Surface is the 2-skill product (`/devlyn:resolve` + `/devlyn:ideate`)
127
+ # plus the `_shared/` kernel.
128
+ for rel in \
129
+ _shared/spec-verify-check.py \
130
+ devlyn:ideate/SKILL.md \
131
+ devlyn:ideate/references/spec-template.md \
132
+ devlyn:ideate/references/elicitation.md \
133
+ devlyn:ideate/references/project-mode.md \
134
+ devlyn:ideate/references/from-spec-mode.md \
135
+ devlyn:resolve/SKILL.md \
136
+ devlyn:resolve/references/state-schema.md \
137
+ devlyn:resolve/references/free-form-mode.md \
138
+ devlyn:resolve/references/phases/plan.md \
139
+ devlyn:resolve/references/phases/implement.md \
140
+ devlyn:resolve/references/phases/build-gate.md \
141
+ devlyn:resolve/references/phases/cleanup.md \
142
+ devlyn:resolve/references/phases/verify.md \
143
+ _shared/expected.schema.json \
144
+ _shared/adapters/README.md \
145
+ _shared/adapters/opus-4-7.md \
146
+ _shared/adapters/gpt-5-5.md \
147
+ _shared/codex-config.md \
148
+ _shared/codex-monitored.sh \
149
+ _shared/pair-plan-schema.md \
150
+ _shared/runtime-principles.md; do
151
+ src="config/skills/$rel"
152
+ dst=".claude/skills/$rel"
153
+ if [ ! -f "$src" ] || [ ! -f "$dst" ]; then
154
+ bad "missing file on critical path: $rel"; drift=1; continue
155
+ fi
156
+ if ! diff -q "$src" "$dst" >/dev/null 2>&1; then
157
+ bad "$rel — source and installed differ"
158
+ drift=1
159
+ fi
160
+ done
161
+ # iter-0009: codex-monitored.sh must be executable in the installed mirror
162
+ # (skills tree gets cp -R'd into $WORK_DIR for the variant arm; bash will
163
+ # refuse to run a non-executable wrapper).
164
+ if [ -f ".claude/skills/_shared/codex-monitored.sh" ] \
165
+ && [ ! -x ".claude/skills/_shared/codex-monitored.sh" ]; then
166
+ bad "_shared/codex-monitored.sh — not executable in installed mirror"
167
+ drift=1
168
+ fi
169
+ if [ $drift -eq 0 ]; then
170
+ ok "critical path parity clean"
171
+ fi
172
+ fi
173
+
174
+ # ---------------------------------------------------------------------------
175
+ # 8. CRITIC security sub-pass must be native, not Dual.
176
+ # Catches the specific drift where a section updates but a cross-reference doesn't.
177
+ # ---------------------------------------------------------------------------
178
+ section "Check 8: CRITIC security is native (no stale Dual references)"
179
+ # Match only the concrete bad patterns used when CRITIC security was routed to
180
+ # Dual (v3.4 and earlier):
181
+ # 1. Markdown table cell `| **Dual** |`
182
+ # 2. Prose `Dual (Claude + Codex parallel, merged)`
183
+ # Retrospective mentions like "drops the Dual-model token cost" are fine.
184
+ offenders=$(grep -RInE '\|\s*\*\*Dual\*\*\s*\||Dual\s*\(Claude\s*\+\s*Codex' \
185
+ config/skills 2>/dev/null \
186
+ | grep -v 'roadmap-archival-workspace/' \
187
+ | grep -v 'devlyn:auto-resolve-workspace/' \
188
+ | grep -v 'devlyn:ideate-workspace/' \
189
+ | grep -v 'preflight-workspace/' \
190
+ || true)
191
+ if [ -z "$offenders" ]; then
192
+ ok "CRITIC security uses native (no Dual stragglers)"
193
+ else
194
+ while IFS= read -r f; do bad "$f"; done <<< "$offenders"
195
+ fi
196
+
197
+ # ---------------------------------------------------------------------------
198
+ # 10. No raw `codex exec` invocation in skill prompts (iter-0010).
199
+ # iter-0009 wrapper + iter-0010 production rollout require every Codex
200
+ # invocation in skill SKILL.md / references to use codex-monitored.sh.
201
+ # Raw `codex exec ...` in a prompt re-introduces the iter-0008 byte-watchdog
202
+ # starvation: orchestrator pattern-primes from the doc and emits the raw
203
+ # shape, which can collapse into `... | tail -200` and starve the outer API
204
+ # stream. Descriptive phrases like "passes args through to `codex exec`
205
+ # verbatim" are allowed — only invocation-shaped uses are forbidden.
206
+ #
207
+ # Pattern: `codex exec[[:space:]]+\S` — catches any invocation shape
208
+ # (whitespace then a non-space character after `exec`). Passes backtick-
209
+ # closed descriptive prose like `` `codex exec` `` because the closing
210
+ # backtick is non-whitespace adjacent to `exec`, not whitespace.
211
+ # Concrete shapes caught:
212
+ # - single-line flag: `codex exec -C ...`
213
+ # - resume form: `codex exec resume --last`
214
+ # - multi-line cont.: `codex exec \` (space + `\` at EOL)
215
+ # - quoted prompt: `codex exec "prompt"` ← iter-0011
216
+ # - variable expansion: `codex exec $PROMPT` ← iter-0011
217
+ # - literal token: `codex exec prompt` ← iter-0011
218
+ # Excludes: _shared/codex-config.md (canonical doc may discuss the rule
219
+ # itself), workspace/, archive snapshots.
220
+ # ---------------------------------------------------------------------------
221
+ section "Check 10: No raw codex exec invocation in skill prompts"
222
+ offenders=$(grep -RInE 'codex exec[[:space:]]+[^[:space:]]' \
223
+ config/skills 2>/dev/null \
224
+ | grep -v 'config/skills/_shared/codex-config.md' \
225
+ | grep -v 'config/skills/_shared/codex-monitored.sh' \
226
+ | grep -v 'roadmap-archival-workspace/' \
227
+ | grep -v 'devlyn:auto-resolve-workspace/' \
228
+ | grep -v 'devlyn:ideate-workspace/' \
229
+ | grep -v 'preflight-workspace/' \
230
+ || true)
231
+ if [ -z "$offenders" ]; then
232
+ ok "no raw codex exec invocations in skill prompts (wrapper-form everywhere)"
233
+ else
234
+ while IFS= read -r f; do bad "$f"; done <<< "$offenders"
235
+ fi
236
+
237
+ # ---------------------------------------------------------------------------
238
+ # 9. Engine-downgrade string is canonical (codex-unavailable, not codex-ping failed).
239
+ # ---------------------------------------------------------------------------
240
+ section "Check 9: Downgrade string uses 'codex-unavailable'"
241
+ offenders=$(grep -RIln 'codex-ping failed\|codex-ping fail' \
242
+ config/skills CLAUDE.md README.md bin/ 2>/dev/null \
243
+ | grep -v 'roadmap-archival-workspace/' \
244
+ | grep -v 'devlyn:auto-resolve-workspace/' \
245
+ | grep -v 'devlyn:ideate-workspace/' \
246
+ | grep -v 'preflight-workspace/' \
247
+ || true)
248
+ if [ -z "$offenders" ]; then
249
+ ok "all downgrade strings canonical"
250
+ else
251
+ while IFS= read -r f; do bad "$f"; done <<< "$offenders"
252
+ fi
253
+
254
+ # ---------------------------------------------------------------------------
255
+ # (Check 7 retired iter-0034 Phase 4 cutover: the 4 findings-producing
256
+ # standalones — evaluate / review / clean / team-review — were deleted; the
257
+ # JSONL sidecar contract no longer has a surface to enforce.)
258
+ # ---------------------------------------------------------------------------
259
+ # 12. CLAUDE.md ↔ _shared/runtime-principles.md per-section excerpt parity (iter-0019.A).
260
+ # Sub-agent prompts inline the runtime contract from runtime-principles.md; that file
261
+ # must mirror the corresponding CLAUDE.md sections. Drift in one source-of-truth without
262
+ # the other produces silent behavioral divergence between session-level and sub-agent
263
+ # enforcement. Per-section markers `<!-- runtime-principles:section=NAME:begin/end -->`
264
+ # wrap each of the 4 sections (subtractive-first, goal-locked, no-workaround, evidence)
265
+ # in BOTH files. Check 12 extracts each named block from both files and diffs.
266
+ # ---------------------------------------------------------------------------
267
+ section "Check 12: CLAUDE.md ↔ runtime-principles.md per-section excerpt parity"
268
+ rp_src="config/skills/_shared/runtime-principles.md"
269
+ claude_src="CLAUDE.md"
270
+ rp_drift=0
271
+ expected_sections="subtractive-first goal-locked no-workaround evidence"
272
+
273
+ if [ ! -f "$rp_src" ]; then
274
+ bad "$rp_src — missing"
275
+ rp_drift=1
276
+ elif [ ! -f "$claude_src" ]; then
277
+ bad "$claude_src — missing"
278
+ rp_drift=1
279
+ else
280
+ # Topology: each marker appears exactly once per file.
281
+ for name in $expected_sections; do
282
+ for kind in begin end; do
283
+ marker="<!-- runtime-principles:section=${name}:${kind} -->"
284
+ for f in "$rp_src" "$claude_src"; do
285
+ count=$(grep -Fxc "$marker" "$f" 2>/dev/null || echo 0)
286
+ if [ "$count" -ne 1 ]; then
287
+ bad "${f}: marker '${marker}' appears ${count} times (expected 1)"
288
+ rp_drift=1
289
+ fi
290
+ done
291
+ done
292
+ done
293
+
294
+ # Topology: in runtime-principles.md, all 4 sections must sit INSIDE the
295
+ # outer `:contract:` block AND appear in the canonical order. CLAUDE.md
296
+ # placement is free (sections may live in any order, anywhere in the file).
297
+ contract_begin_line=$(grep -Fxn '<!-- runtime-principles:contract:begin -->' "$rp_src" | head -1 | cut -d: -f1)
298
+ contract_end_line=$(grep -Fxn '<!-- runtime-principles:contract:end -->' "$rp_src" | head -1 | cut -d: -f1)
299
+ if [ -z "$contract_begin_line" ] || [ -z "$contract_end_line" ]; then
300
+ bad "${rp_src}: outer ':contract:begin/end' markers missing"
301
+ rp_drift=1
302
+ else
303
+ prev_line=0
304
+ for name in $expected_sections; do
305
+ sec_begin_line=$(grep -Fxn "<!-- runtime-principles:section=${name}:begin -->" "$rp_src" | head -1 | cut -d: -f1)
306
+ sec_end_line=$(grep -Fxn "<!-- runtime-principles:section=${name}:end -->" "$rp_src" | head -1 | cut -d: -f1)
307
+ if [ -n "$sec_begin_line" ] && [ -n "$sec_end_line" ]; then
308
+ if [ "$sec_begin_line" -le "$contract_begin_line" ] || [ "$sec_end_line" -ge "$contract_end_line" ]; then
309
+ bad "${rp_src}: section '${name}' is outside the ':contract:' block"
310
+ rp_drift=1
311
+ fi
312
+ if [ "$sec_begin_line" -lt "$prev_line" ]; then
313
+ bad "${rp_src}: section '${name}' is out of canonical order (expected: ${expected_sections})"
314
+ rp_drift=1
315
+ fi
316
+ prev_line=$sec_end_line
317
+ fi
318
+ done
319
+ fi
320
+
321
+ # Content: byte-compare each section block via diff over temp files.
322
+ # awk-into-tmpfile preserves trailing newlines (command substitution strips them).
323
+ tmp_rp=$(mktemp)
324
+ tmp_claude=$(mktemp)
325
+ for name in $expected_sections; do
326
+ begin="<!-- runtime-principles:section=${name}:begin -->"
327
+ end="<!-- runtime-principles:section=${name}:end -->"
328
+ awk -v b="$begin" -v e="$end" '$0==b{f=1;next}$0==e{f=0}f' "$rp_src" > "$tmp_rp"
329
+ awk -v b="$begin" -v e="$end" '$0==b{f=1;next}$0==e{f=0}f' "$claude_src" > "$tmp_claude"
330
+ if [ ! -s "$tmp_rp" ]; then
331
+ bad "${name}: empty/missing block in $rp_src"
332
+ rp_drift=1
333
+ continue
334
+ fi
335
+ if [ ! -s "$tmp_claude" ]; then
336
+ bad "${name}: empty/missing block in $claude_src"
337
+ rp_drift=1
338
+ continue
339
+ fi
340
+ if ! diff -q "$tmp_rp" "$tmp_claude" >/dev/null 2>&1; then
341
+ bad "${name}: CLAUDE.md and runtime-principles.md content differ"
342
+ rp_drift=1
343
+ fi
344
+ done
345
+ rm -f "$tmp_rp" "$tmp_claude"
346
+
347
+ if [ $rp_drift -eq 0 ]; then
348
+ ok "all 4 contract sections in parity (subtractive-first / goal-locked / no-workaround / evidence) — markers, topology, content"
349
+ fi
350
+ fi
351
+
352
+ # ---------------------------------------------------------------------------
353
+ # 13. pair-plan idgen output is deterministic across consecutive runs (iter-0022).
354
+ # Same input → byte-identical canonical_id_registry.json. Catches accidental
355
+ # dict-order, float-printing, or timestamp-leak regressions in idgen.
356
+ # Runs twice on F2 with --generated-at pinned and compares sha256.
357
+ # ---------------------------------------------------------------------------
358
+ section "Check 13: pair-plan-idgen.py output deterministic across runs (F2)"
359
+ idgen="benchmark/auto-resolve/scripts/pair-plan-idgen.py"
360
+ fixture="benchmark/auto-resolve/fixtures/F2-cli-medium-subcommand"
361
+ if [ ! -x "$idgen" ] && [ ! -f "$idgen" ]; then
362
+ bad "Check 13 prerequisite missing: $idgen"
363
+ elif [ ! -d "$fixture" ]; then
364
+ bad "Check 13 prerequisite missing: $fixture"
365
+ else
366
+ tmp1=$(mktemp); tmp2=$(mktemp)
367
+ if python3 "$idgen" --fixture "$fixture" --generated-at 2026-04-29T18:30:00Z --output "$tmp1" >/dev/null 2>&1 \
368
+ && python3 "$idgen" --fixture "$fixture" --generated-at 2026-04-29T18:30:00Z --output "$tmp2" >/dev/null 2>&1; then
369
+ sha1=$(shasum -a 256 "$tmp1" | awk '{print $1}')
370
+ sha2=$(shasum -a 256 "$tmp2" | awk '{print $1}')
371
+ if [ "$sha1" = "$sha2" ]; then
372
+ ok "F2 registry sha256 stable across two idgen runs ($sha1)"
373
+ else
374
+ bad "F2 registry sha256 drift: run1=$sha1 run2=$sha2"
375
+ fi
376
+ else
377
+ bad "idgen invocation failed; cannot verify determinism"
378
+ fi
379
+ rm -f "$tmp1" "$tmp2"
380
+ fi
381
+
382
+ # ---------------------------------------------------------------------------
383
+ # 14. F9 fixture id matches the shipped 2-skill contract (iter-0033a, 2026-04-30).
384
+ # `/devlyn:preflight` was folded into `/devlyn:resolve`'s VERIFY phase; the
385
+ # legacy F9 dir name (`F9-e2e-ideate-to-preflight`) is misleading once
386
+ # preflight is gone. The retired copy lives under `fixtures/retired/` for
387
+ # replay; the live fixture must be `F9-e2e-ideate-to-resolve`. Any other
388
+ # non-retired reference to the old id is a stale rename.
389
+ # ---------------------------------------------------------------------------
390
+ section "Check 14: F9 fixture id matches 2-skill contract"
391
+ f9_drift=0
392
+ if [ ! -d "benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve" ]; then
393
+ bad "live F9 fixture missing at benchmark/auto-resolve/fixtures/F9-e2e-ideate-to-resolve"
394
+ f9_drift=1
395
+ fi
396
+ # Stale references outside fixtures/retired/ are bugs. Examine line content
397
+ # (not just filename) so files that legitimately mention the retired *path*
398
+ # (e.g. fixtures/F9-e2e-ideate-to-resolve/NOTES.md explaining where the OLD
399
+ # version lives) pass while genuine stale references fail. Excluded scopes:
400
+ # benchmark/auto-resolve/results/ (historical run artifacts, frozen) and
401
+ # scripts/lint-skills.sh itself (carries the pattern in this check).
402
+ stale=$(grep -RIn 'F9-e2e-ideate-to-preflight' \
403
+ config/skills \
404
+ benchmark \
405
+ scripts \
406
+ CLAUDE.md \
407
+ README.md 2>/dev/null \
408
+ | grep -v '^benchmark/auto-resolve/fixtures/retired/F9-e2e-ideate-to-preflight/' \
409
+ | grep -v '^benchmark/auto-resolve/results/' \
410
+ | grep -v '^scripts/lint-skills\.sh:' \
411
+ | grep -v 'fixtures/retired/F9-e2e-ideate-to-preflight' \
412
+ || true)
413
+ if [ -n "$stale" ]; then
414
+ while IFS= read -r f; do bad "stale F9-e2e-ideate-to-preflight reference: $f"; done <<< "$stale"
415
+ f9_drift=1
416
+ fi
417
+ if [ $f9_drift -eq 0 ]; then
418
+ ok "F9 fixture id is canonical (F9-e2e-ideate-to-resolve); no stale refs outside retired/"
419
+ fi
420
+
421
+ # ---------------------------------------------------------------------------
422
+ # Summary.
423
+ # ---------------------------------------------------------------------------
424
+ echo
425
+ if [ $fail -eq 0 ]; then
426
+ printf '%sAll checks passed.%s\n' "$green" "$reset"
427
+ exit 0
428
+ else
429
+ printf '%sLint failed.%s Fix the offenders above.\n' "$red" "$reset"
430
+ exit 1
431
+ fi