opencastle 0.31.7 → 0.32.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. package/README.md +4 -1
  2. package/bin/cli.mjs +15 -0
  3. package/dist/cli/agents.d.ts.map +1 -1
  4. package/dist/cli/agents.js +19 -5
  5. package/dist/cli/agents.js.map +1 -1
  6. package/dist/cli/artifacts-cli.d.ts +3 -0
  7. package/dist/cli/artifacts-cli.d.ts.map +1 -0
  8. package/dist/cli/artifacts-cli.js +36 -0
  9. package/dist/cli/artifacts-cli.js.map +1 -0
  10. package/dist/cli/baselines.d.ts.map +1 -1
  11. package/dist/cli/baselines.js +11 -0
  12. package/dist/cli/baselines.js.map +1 -1
  13. package/dist/cli/convoy/artifacts.d.ts +25 -0
  14. package/dist/cli/convoy/artifacts.d.ts.map +1 -0
  15. package/dist/cli/convoy/artifacts.js +129 -0
  16. package/dist/cli/convoy/artifacts.js.map +1 -0
  17. package/dist/cli/convoy/artifacts.test.d.ts +2 -0
  18. package/dist/cli/convoy/artifacts.test.d.ts.map +1 -0
  19. package/dist/cli/convoy/artifacts.test.js +169 -0
  20. package/dist/cli/convoy/artifacts.test.js.map +1 -0
  21. package/dist/cli/convoy/compaction.d.ts +23 -0
  22. package/dist/cli/convoy/compaction.d.ts.map +1 -0
  23. package/dist/cli/convoy/compaction.js +117 -0
  24. package/dist/cli/convoy/compaction.js.map +1 -0
  25. package/dist/cli/convoy/compaction.test.d.ts +2 -0
  26. package/dist/cli/convoy/compaction.test.d.ts.map +1 -0
  27. package/dist/cli/convoy/compaction.test.js +205 -0
  28. package/dist/cli/convoy/compaction.test.js.map +1 -0
  29. package/dist/cli/convoy/contracts.d.ts +22 -0
  30. package/dist/cli/convoy/contracts.d.ts.map +1 -0
  31. package/dist/cli/convoy/contracts.js +254 -0
  32. package/dist/cli/convoy/contracts.js.map +1 -0
  33. package/dist/cli/convoy/contracts.test.d.ts +2 -0
  34. package/dist/cli/convoy/contracts.test.d.ts.map +1 -0
  35. package/dist/cli/convoy/contracts.test.js +239 -0
  36. package/dist/cli/convoy/contracts.test.js.map +1 -0
  37. package/dist/cli/convoy/dag-analysis.d.ts +40 -0
  38. package/dist/cli/convoy/dag-analysis.d.ts.map +1 -0
  39. package/dist/cli/convoy/dag-analysis.js +282 -0
  40. package/dist/cli/convoy/dag-analysis.js.map +1 -0
  41. package/dist/cli/convoy/dag-analysis.test.d.ts +2 -0
  42. package/dist/cli/convoy/dag-analysis.test.d.ts.map +1 -0
  43. package/dist/cli/convoy/dag-analysis.test.js +289 -0
  44. package/dist/cli/convoy/dag-analysis.test.js.map +1 -0
  45. package/dist/cli/convoy/effort-scaling.d.ts +20 -0
  46. package/dist/cli/convoy/effort-scaling.d.ts.map +1 -0
  47. package/dist/cli/convoy/effort-scaling.js +82 -0
  48. package/dist/cli/convoy/effort-scaling.js.map +1 -0
  49. package/dist/cli/convoy/effort-scaling.test.d.ts +2 -0
  50. package/dist/cli/convoy/effort-scaling.test.d.ts.map +1 -0
  51. package/dist/cli/convoy/effort-scaling.test.js +120 -0
  52. package/dist/cli/convoy/effort-scaling.test.js.map +1 -0
  53. package/dist/cli/convoy/engine.d.ts.map +1 -1
  54. package/dist/cli/convoy/engine.js +280 -6
  55. package/dist/cli/convoy/engine.js.map +1 -1
  56. package/dist/cli/convoy/engine.test.js +155 -18
  57. package/dist/cli/convoy/engine.test.js.map +1 -1
  58. package/dist/cli/convoy/event-schemas.d.ts.map +1 -1
  59. package/dist/cli/convoy/event-schemas.js +55 -0
  60. package/dist/cli/convoy/event-schemas.js.map +1 -1
  61. package/dist/cli/convoy/isolation.d.ts +27 -0
  62. package/dist/cli/convoy/isolation.d.ts.map +1 -0
  63. package/dist/cli/convoy/isolation.js +120 -0
  64. package/dist/cli/convoy/isolation.js.map +1 -0
  65. package/dist/cli/convoy/isolation.test.d.ts +2 -0
  66. package/dist/cli/convoy/isolation.test.d.ts.map +1 -0
  67. package/dist/cli/convoy/isolation.test.js +105 -0
  68. package/dist/cli/convoy/isolation.test.js.map +1 -0
  69. package/dist/cli/convoy/review-stages.d.ts +9 -0
  70. package/dist/cli/convoy/review-stages.d.ts.map +1 -0
  71. package/dist/cli/convoy/review-stages.js +134 -0
  72. package/dist/cli/convoy/review-stages.js.map +1 -0
  73. package/dist/cli/convoy/review-stages.test.d.ts +2 -0
  74. package/dist/cli/convoy/review-stages.test.d.ts.map +1 -0
  75. package/dist/cli/convoy/review-stages.test.js +197 -0
  76. package/dist/cli/convoy/review-stages.test.js.map +1 -0
  77. package/dist/cli/convoy/skill-refinement.d.ts +39 -0
  78. package/dist/cli/convoy/skill-refinement.d.ts.map +1 -0
  79. package/dist/cli/convoy/skill-refinement.js +239 -0
  80. package/dist/cli/convoy/skill-refinement.js.map +1 -0
  81. package/dist/cli/convoy/skill-refinement.test.d.ts +2 -0
  82. package/dist/cli/convoy/skill-refinement.test.d.ts.map +1 -0
  83. package/dist/cli/convoy/skill-refinement.test.js +230 -0
  84. package/dist/cli/convoy/skill-refinement.test.js.map +1 -0
  85. package/dist/cli/convoy/spec-builder.d.ts +1 -0
  86. package/dist/cli/convoy/spec-builder.d.ts.map +1 -1
  87. package/dist/cli/convoy/spec-builder.js +11 -0
  88. package/dist/cli/convoy/spec-builder.js.map +1 -1
  89. package/dist/cli/convoy/spec-builder.test.js +54 -0
  90. package/dist/cli/convoy/spec-builder.test.js.map +1 -1
  91. package/dist/cli/convoy/store.d.ts +3 -2
  92. package/dist/cli/convoy/store.d.ts.map +1 -1
  93. package/dist/cli/convoy/store.js +20 -2
  94. package/dist/cli/convoy/store.js.map +1 -1
  95. package/dist/cli/convoy/store.test.js +15 -15
  96. package/dist/cli/convoy/store.test.js.map +1 -1
  97. package/dist/cli/convoy/tdd-gate.d.ts +15 -0
  98. package/dist/cli/convoy/tdd-gate.d.ts.map +1 -0
  99. package/dist/cli/convoy/tdd-gate.js +119 -0
  100. package/dist/cli/convoy/tdd-gate.js.map +1 -0
  101. package/dist/cli/convoy/tdd-gate.test.d.ts +2 -0
  102. package/dist/cli/convoy/tdd-gate.test.d.ts.map +1 -0
  103. package/dist/cli/convoy/tdd-gate.test.js +227 -0
  104. package/dist/cli/convoy/tdd-gate.test.js.map +1 -0
  105. package/dist/cli/convoy/types.d.ts +91 -0
  106. package/dist/cli/convoy/types.d.ts.map +1 -1
  107. package/dist/cli/convoy/types.js +8 -0
  108. package/dist/cli/convoy/types.js.map +1 -1
  109. package/dist/cli/insights.d.ts +3 -0
  110. package/dist/cli/insights.d.ts.map +1 -0
  111. package/dist/cli/insights.js +94 -0
  112. package/dist/cli/insights.js.map +1 -0
  113. package/dist/cli/lesson.d.ts.map +1 -1
  114. package/dist/cli/lesson.js +7 -0
  115. package/dist/cli/lesson.js.map +1 -1
  116. package/dist/cli/log.d.ts.map +1 -1
  117. package/dist/cli/log.js +7 -0
  118. package/dist/cli/log.js.map +1 -1
  119. package/dist/cli/package-config.d.ts +12 -0
  120. package/dist/cli/package-config.d.ts.map +1 -0
  121. package/dist/cli/package-config.js +37 -0
  122. package/dist/cli/package-config.js.map +1 -0
  123. package/dist/cli/package.d.ts +23 -0
  124. package/dist/cli/package.d.ts.map +1 -0
  125. package/dist/cli/package.js +285 -0
  126. package/dist/cli/package.js.map +1 -0
  127. package/dist/cli/package.test.d.ts +2 -0
  128. package/dist/cli/package.test.d.ts.map +1 -0
  129. package/dist/cli/package.test.js +236 -0
  130. package/dist/cli/package.test.js.map +1 -0
  131. package/dist/cli/pipeline.d.ts +6 -0
  132. package/dist/cli/pipeline.d.ts.map +1 -1
  133. package/dist/cli/pipeline.js +15 -2
  134. package/dist/cli/pipeline.js.map +1 -1
  135. package/dist/cli/run/schema.d.ts.map +1 -1
  136. package/dist/cli/run/schema.js +32 -0
  137. package/dist/cli/run/schema.js.map +1 -1
  138. package/dist/cli/run/schema.test.js +51 -0
  139. package/dist/cli/run/schema.test.js.map +1 -1
  140. package/dist/cli/skills.d.ts +3 -0
  141. package/dist/cli/skills.d.ts.map +1 -0
  142. package/dist/cli/skills.js +107 -0
  143. package/dist/cli/skills.js.map +1 -0
  144. package/dist/cli/types.d.ts +4 -1
  145. package/dist/cli/types.d.ts.map +1 -1
  146. package/dist/dashboard/scripts/etl.d.ts.map +1 -1
  147. package/dist/dashboard/scripts/etl.js +44 -11
  148. package/dist/dashboard/scripts/etl.js.map +1 -1
  149. package/package.json +2 -1
  150. package/src/cli/agents.ts +20 -5
  151. package/src/cli/artifacts-cli.ts +41 -0
  152. package/src/cli/baselines.ts +12 -0
  153. package/src/cli/convoy/artifacts.test.ts +201 -0
  154. package/src/cli/convoy/artifacts.ts +186 -0
  155. package/src/cli/convoy/compaction.test.ts +245 -0
  156. package/src/cli/convoy/compaction.ts +164 -0
  157. package/src/cli/convoy/contracts.test.ts +279 -0
  158. package/src/cli/convoy/contracts.ts +280 -0
  159. package/src/cli/convoy/dag-analysis.test.ts +349 -0
  160. package/src/cli/convoy/dag-analysis.ts +371 -0
  161. package/src/cli/convoy/effort-scaling.test.ts +140 -0
  162. package/src/cli/convoy/effort-scaling.ts +90 -0
  163. package/src/cli/convoy/engine.test.ts +175 -18
  164. package/src/cli/convoy/engine.ts +301 -7
  165. package/src/cli/convoy/event-schemas.ts +55 -0
  166. package/src/cli/convoy/isolation.test.ts +137 -0
  167. package/src/cli/convoy/isolation.ts +165 -0
  168. package/src/cli/convoy/review-stages.test.ts +235 -0
  169. package/src/cli/convoy/review-stages.ts +166 -0
  170. package/src/cli/convoy/skill-refinement.test.ts +277 -0
  171. package/src/cli/convoy/skill-refinement.ts +306 -0
  172. package/src/cli/convoy/spec-builder.test.ts +61 -0
  173. package/src/cli/convoy/spec-builder.ts +9 -0
  174. package/src/cli/convoy/store.test.ts +15 -15
  175. package/src/cli/convoy/store.ts +26 -4
  176. package/src/cli/convoy/tdd-gate.test.ts +281 -0
  177. package/src/cli/convoy/tdd-gate.ts +154 -0
  178. package/src/cli/convoy/types.ts +51 -0
  179. package/src/cli/insights.ts +99 -0
  180. package/src/cli/lesson.ts +8 -0
  181. package/src/cli/log.ts +8 -0
  182. package/src/cli/package-config.ts +48 -0
  183. package/src/cli/package.test.ts +276 -0
  184. package/src/cli/package.ts +329 -0
  185. package/src/cli/pipeline.ts +21 -2
  186. package/src/cli/run/schema.test.ts +58 -0
  187. package/src/cli/run/schema.ts +33 -0
  188. package/src/cli/skills.ts +121 -0
  189. package/src/cli/types.ts +4 -1
  190. package/src/dashboard/dist/_astro/index.D6quLrA6.css +1 -0
  191. package/src/dashboard/dist/data/convoy-list.json +21 -7
  192. package/src/dashboard/dist/data/convoys/demo-api-v2.json +3 -3
  193. package/src/dashboard/dist/data/convoys/demo-auth-revamp.json +5 -5
  194. package/src/dashboard/dist/data/convoys/demo-convoy-1.json +2 -2
  195. package/src/dashboard/dist/data/convoys/demo-convoy-2.json +1 -1
  196. package/src/dashboard/dist/data/convoys/demo-dashboard-ui.json +7 -7
  197. package/src/dashboard/dist/data/convoys/demo-data-pipeline.json +3 -3
  198. package/src/dashboard/dist/data/convoys/demo-deploy-ci.json +2 -2
  199. package/src/dashboard/dist/data/convoys/demo-docs-update.json +2 -2
  200. package/src/dashboard/dist/data/convoys/demo-perf-opt.json +4 -4
  201. package/src/dashboard/dist/index.html +306 -33
  202. package/src/dashboard/node_modules/.vite/deps/_metadata.json +6 -6
  203. package/src/dashboard/public/data/convoy-list.json +21 -7
  204. package/src/dashboard/public/data/convoys/demo-api-v2.json +3 -3
  205. package/src/dashboard/public/data/convoys/demo-auth-revamp.json +5 -5
  206. package/src/dashboard/public/data/convoys/demo-convoy-1.json +2 -2
  207. package/src/dashboard/public/data/convoys/demo-convoy-2.json +1 -1
  208. package/src/dashboard/public/data/convoys/demo-dashboard-ui.json +7 -7
  209. package/src/dashboard/public/data/convoys/demo-data-pipeline.json +3 -3
  210. package/src/dashboard/public/data/convoys/demo-deploy-ci.json +2 -2
  211. package/src/dashboard/public/data/convoys/demo-docs-update.json +2 -2
  212. package/src/dashboard/public/data/convoys/demo-perf-opt.json +4 -4
  213. package/src/dashboard/scripts/etl.test.ts +14 -0
  214. package/src/dashboard/scripts/etl.ts +48 -16
  215. package/src/dashboard/scripts/generate-demo-db.ts +18 -10
  216. package/src/dashboard/src/pages/index.astro +348 -45
  217. package/src/dashboard/src/styles/dashboard.css +56 -0
  218. package/src/orchestrator/prompts/assess-complexity.prompt.md +13 -0
  219. package/src/orchestrator/prompts/generate-convoy.prompt.md +19 -0
  220. package/src/dashboard/dist/_astro/index.BRDFmNzR.css +0 -1
@@ -0,0 +1,165 @@
1
+ import { normalizePath, pathsOverlap } from './partition.js'
2
+ import type { ConvoyStore } from './store.js'
3
+ import { listArtifacts, type ArtifactRef } from './artifacts.js'
4
+
5
+ // ── Interfaces ────────────────────────────────────────────────────────────────
6
+
7
+ export interface DependencyResult {
8
+ taskId: string
9
+ agent: string
10
+ status: string
11
+ summary: string | null
12
+ filesChanged: string[]
13
+ artifactRefs?: ArtifactRef[] // filesystem artifact references
14
+ }
15
+
16
+ export interface PartitionViolation {
17
+ taskId: string
18
+ allowedFiles: string[]
19
+ actualFiles: string[]
20
+ violations: string[]
21
+ }
22
+
23
+ // ── Formatting ────────────────────────────────────────────────────────────────
24
+
25
+ export function formatDependencyResults(deps: DependencyResult[]): string {
26
+ return deps
27
+ .map(dep => {
28
+ let text = '#### ' + dep.taskId + ' (' + dep.agent + ') \u2014 ' + dep.status + '\n'
29
+ + (dep.summary ?? 'No summary available.') + '\n'
30
+ + 'Files changed: ' + (dep.filesChanged.length > 0 ? dep.filesChanged.join(', ') : 'none')
31
+
32
+ if (dep.artifactRefs && dep.artifactRefs.length > 0) {
33
+ text += '\nArtifacts available:\n'
34
+ + dep.artifactRefs.map(r => '- ' + r.path + ' \u2014 "' + r.summary + '"').join('\n')
35
+ + '\n\nTo read an artifact, open the file at the path above.'
36
+ }
37
+
38
+ return text
39
+ })
40
+ .join('\n\n')
41
+ }
42
+
43
+ export function buildIsolationPreamble(
44
+ task: { id: string; description: string; prompt: string; files: string[]; agent: string },
45
+ dependencyResults: DependencyResult[],
46
+ ): string {
47
+ const objective = task.description || task.prompt.slice(0, 200)
48
+ const fileList = task.files.length > 0 ? task.files.map(f => '- ' + f).join('\n') : '- (none specified)'
49
+ const depSection = dependencyResults.length > 0
50
+ ? formatDependencyResults(dependencyResults)
51
+ : 'No dependencies \u2014 you are in the first phase.'
52
+
53
+ return [
54
+ '## Context Isolation Notice',
55
+ 'You are a fresh agent with NO prior context. You have no knowledge of other tasks',
56
+ 'in this convoy. Your only context is what follows.',
57
+ '',
58
+ '### Your Task',
59
+ '- **ID:** ' + task.id,
60
+ '- **Agent:** ' + task.agent,
61
+ '- **Objective:** ' + objective,
62
+ '',
63
+ '### Your File Partition',
64
+ 'You may ONLY read and modify files within this partition:',
65
+ fileList,
66
+ '',
67
+ 'Do NOT modify files outside this partition. If you discover a need to change files',
68
+ 'outside your partition, note it in your output but do not make the change.',
69
+ '',
70
+ '### Dependency Results',
71
+ depSection,
72
+ '',
73
+ '### Project Conventions',
74
+ 'Read `.github/instructions/general.instructions.md` for coding standards.',
75
+ ].join('\n')
76
+ }
77
+
78
+ // ── Partition violation detection ─────────────────────────────────────────────
79
+
80
+ export function detectPartitionViolations(
81
+ taskId: string,
82
+ allowedFiles: string[],
83
+ actualFiles: string[],
84
+ ): PartitionViolation | null {
85
+ const violations: string[] = []
86
+
87
+ for (const actual of actualFiles) {
88
+ let isAllowed = false
89
+ for (const allowed of allowedFiles) {
90
+ try {
91
+ const normalizedAllowed = normalizePath(allowed)
92
+ const normalizedActual = normalizePath(actual)
93
+ if (pathsOverlap(normalizedAllowed, normalizedActual)) {
94
+ isAllowed = true
95
+ break
96
+ }
97
+ } catch {
98
+ // Fallback for unusual paths: exact match or directory prefix
99
+ const allowedDir = allowed.endsWith('/') ? allowed : allowed + '/'
100
+ if (actual === allowed || actual.startsWith(allowedDir)) {
101
+ isAllowed = true
102
+ break
103
+ }
104
+ }
105
+ }
106
+ if (!isAllowed) {
107
+ violations.push(actual)
108
+ }
109
+ }
110
+
111
+ if (violations.length === 0) return null
112
+
113
+ return { taskId, allowedFiles, actualFiles, violations }
114
+ }
115
+
116
+ // ── Dependency result resolution ──────────────────────────────────────────────
117
+
118
+ export function resolveDependencyResults(
119
+ store: ConvoyStore,
120
+ convoyId: string,
121
+ dependsOn: string[],
122
+ ): DependencyResult[] {
123
+ return dependsOn
124
+ .map((depId) => {
125
+ const record = store.getTask(depId, convoyId)
126
+ if (!record) return null
127
+
128
+ let summary: string | null = null
129
+ let filesChanged: string[] = []
130
+
131
+ if (record.contract_result) {
132
+ try {
133
+ const cr = JSON.parse(record.contract_result) as {
134
+ valid: boolean
135
+ missing: string[]
136
+ warnings: string[]
137
+ data?: Record<string, unknown>
138
+ }
139
+ if (cr.data) {
140
+ summary = typeof cr.data['summary'] === 'string' ? cr.data['summary'] as string : null
141
+ const files = cr.data['files_changed']
142
+ if (Array.isArray(files)) {
143
+ filesChanged = files.filter((f): f is string => typeof f === 'string')
144
+ }
145
+ }
146
+ } catch { /* non-critical */ }
147
+ }
148
+
149
+ let artifactRefs: ArtifactRef[] | undefined
150
+ try {
151
+ const refs = listArtifacts(convoyId, depId)
152
+ if (refs.length > 0) artifactRefs = refs
153
+ } catch { /* non-critical */ }
154
+
155
+ return {
156
+ taskId: record.id,
157
+ agent: record.agent,
158
+ status: record.status,
159
+ summary,
160
+ filesChanged,
161
+ artifactRefs,
162
+ } as DependencyResult
163
+ })
164
+ .filter((r): r is DependencyResult => r !== null)
165
+ }
@@ -0,0 +1,235 @@
1
+ import { describe, it, expect, vi } from 'vitest'
2
+ import {
3
+ buildSpecCompliancePrompt,
4
+ buildCodeQualityPrompt,
5
+ parseStageVerdict,
6
+ runTwoStageReview,
7
+ } from './review-stages.js'
8
+ import type { ReviewRunnerFn } from './review-stages.js'
9
+ import type { TaskRecord } from './types.js'
10
+
11
+ // ── Fixtures ──────────────────────────────────────────────────────────────────
12
+
13
+ function makeTask(overrides: Partial<TaskRecord> = {}): TaskRecord {
14
+ return {
15
+ id: 'task-1',
16
+ convoy_id: 'convoy-1',
17
+ phase: 1,
18
+ prompt: 'Implement the feature.\n\n## Acceptance Criteria\n- Feature works\n- Tests pass',
19
+ agent: 'developer',
20
+ adapter: null,
21
+ model: null,
22
+ timeout_ms: 30000,
23
+ status: 'done',
24
+ worker_id: null,
25
+ worktree: null,
26
+ output: 'Done.',
27
+ exit_code: 0,
28
+ started_at: null,
29
+ finished_at: null,
30
+ retries: 0,
31
+ max_retries: 3,
32
+ files: 'src/feature.ts',
33
+ depends_on: null,
34
+ prompt_tokens: null,
35
+ completion_tokens: null,
36
+ total_tokens: null,
37
+ cost_usd: null,
38
+ gates: null,
39
+ on_exhausted: 'dlq',
40
+ injected: 0,
41
+ provenance: null,
42
+ idempotency_key: null,
43
+ current_step: null,
44
+ total_steps: null,
45
+ review_level: null,
46
+ review_verdict: null,
47
+ review_tokens: null,
48
+ review_model: null,
49
+ panel_attempts: 0,
50
+ dispute_id: null,
51
+ drift_score: null,
52
+ drift_retried: 0,
53
+ ...overrides,
54
+ } as TaskRecord
55
+ }
56
+
57
+ function makePassVerdict(stage: 'spec-compliance' | 'code-quality'): string {
58
+ return `Analysis done.\n<!-- REVIEW_VERDICT { "stage": "${stage}", "verdict": "pass", "issues": [] } -->`
59
+ }
60
+
61
+ function makeBlockVerdict(stage: 'spec-compliance' | 'code-quality', issues: string[]): string {
62
+ return `Analysis done.\n<!-- REVIEW_VERDICT { "stage": "${stage}", "verdict": "block", "issues": ${JSON.stringify(issues)} } -->`
63
+ }
64
+
65
+ // ── parseStageVerdict ─────────────────────────────────────────────────────────
66
+
67
+ describe('parseStageVerdict', () => {
68
+ it('parses a valid pass verdict', () => {
69
+ const output = makePassVerdict('spec-compliance')
70
+ const result = parseStageVerdict(output, 'spec-compliance')
71
+ expect(result.stage).toBe('spec-compliance')
72
+ expect(result.verdict).toBe('pass')
73
+ expect(result.issues).toEqual([])
74
+ })
75
+
76
+ it('parses a valid block verdict with issues', () => {
77
+ const output = makeBlockVerdict('code-quality', ['Missing types', 'Unsafe cast'])
78
+ const result = parseStageVerdict(output, 'code-quality')
79
+ expect(result.stage).toBe('code-quality')
80
+ expect(result.verdict).toBe('block')
81
+ expect(result.issues).toEqual(['Missing types', 'Unsafe cast'])
82
+ })
83
+
84
+ it('falls back to block verdict on invalid/missing comment', () => {
85
+ const result = parseStageVerdict('No verdict here.', 'spec-compliance')
86
+ expect(result.stage).toBe('spec-compliance')
87
+ expect(result.verdict).toBe('block')
88
+ expect(result.issues).toEqual(['Failed to parse reviewer output'])
89
+ })
90
+
91
+ it('falls back to block verdict on malformed JSON', () => {
92
+ const output = '<!-- REVIEW_VERDICT { invalid json } -->'
93
+ const result = parseStageVerdict(output, 'code-quality')
94
+ expect(result.verdict).toBe('block')
95
+ expect(result.issues).toEqual(['Failed to parse reviewer output'])
96
+ })
97
+ })
98
+
99
+ // ── buildSpecCompliancePrompt ─────────────────────────────────────────────────
100
+
101
+ describe('buildSpecCompliancePrompt', () => {
102
+ it('includes acceptance criteria context from task prompt', () => {
103
+ const task = makeTask()
104
+ const prompt = buildSpecCompliancePrompt(task)
105
+ expect(prompt).toContain('Acceptance Criteria')
106
+ expect(prompt).toContain('spec-compliance')
107
+ expect(prompt).toContain('REVIEW_VERDICT')
108
+ })
109
+
110
+ it('includes diff section when diff is provided', () => {
111
+ const task = makeTask()
112
+ const prompt = buildSpecCompliancePrompt(task, '+ added line')
113
+ expect(prompt).toContain('## Diff')
114
+ expect(prompt).toContain('+ added line')
115
+ })
116
+
117
+ it('includes file partition section when task has files', () => {
118
+ const task = makeTask({ files: 'src/feature.ts\nsrc/feature.test.ts' })
119
+ const prompt = buildSpecCompliancePrompt(task)
120
+ expect(prompt).toContain('File Partition')
121
+ expect(prompt).toContain('src/feature.ts')
122
+ })
123
+ })
124
+
125
+ // ── buildCodeQualityPrompt ────────────────────────────────────────────────────
126
+
127
+ describe('buildCodeQualityPrompt', () => {
128
+ it('includes code quality focus areas', () => {
129
+ const task = makeTask()
130
+ const prompt = buildCodeQualityPrompt(task)
131
+ expect(prompt).toContain('code-quality')
132
+ expect(prompt).toContain('TypeScript')
133
+ expect(prompt).toContain('as any')
134
+ expect(prompt).toContain('REVIEW_VERDICT')
135
+ })
136
+
137
+ it('includes diff section when diff is provided', () => {
138
+ const task = makeTask()
139
+ const prompt = buildCodeQualityPrompt(task, '- old line\n+ new line')
140
+ expect(prompt).toContain('## Diff')
141
+ expect(prompt).toContain('- old line')
142
+ })
143
+ })
144
+
145
+ // ── runTwoStageReview ─────────────────────────────────────────────────────────
146
+
147
+ describe('runTwoStageReview', () => {
148
+ it('Stage 1 PASS → Stage 2 runs, both stages in result', async () => {
149
+ const task = makeTask()
150
+ let callCount = 0
151
+ const runner: ReviewRunnerFn = vi.fn().mockImplementation((_t, _l, m) => {
152
+ callCount++
153
+ const stage = callCount === 1 ? 'spec-compliance' : 'code-quality'
154
+ return Promise.resolve({ verdict: 'pass' as const, feedback: makePassVerdict(stage), tokens: 50, model: m })
155
+ })
156
+
157
+ const result = await runTwoStageReview(task, runner, 'test-model')
158
+
159
+ expect(runner).toHaveBeenCalledTimes(2)
160
+ expect(result.stages).toHaveLength(2)
161
+ expect(result.stages[0].stage).toBe('spec-compliance')
162
+ expect(result.stages[0].verdict).toBe('pass')
163
+ expect(result.stages[1].stage).toBe('code-quality')
164
+ expect(result.overall_verdict).toBe('pass')
165
+ })
166
+
167
+ it('Stage 1 BLOCK → Stage 2 skipped, overall is block', async () => {
168
+ const task = makeTask()
169
+ const runner: ReviewRunnerFn = vi.fn().mockResolvedValue({
170
+ verdict: 'block' as const,
171
+ feedback: makeBlockVerdict('spec-compliance', ['Missing tests']),
172
+ tokens: 75,
173
+ model: 'test-model',
174
+ })
175
+
176
+ const result = await runTwoStageReview(task, runner, 'test-model')
177
+
178
+ expect(runner).toHaveBeenCalledTimes(1)
179
+ expect(result.stages).toHaveLength(1)
180
+ expect(result.stages[0].stage).toBe('spec-compliance')
181
+ expect(result.stages[0].verdict).toBe('block')
182
+ expect(result.overall_verdict).toBe('block')
183
+ })
184
+
185
+ it('Stage 1 PASS + Stage 2 BLOCK → overall BLOCK with 2 stages', async () => {
186
+ const task = makeTask()
187
+ let callCount = 0
188
+ const runner: ReviewRunnerFn = vi.fn().mockImplementation((_t, _l, m) => {
189
+ callCount++
190
+ if (callCount === 1) {
191
+ return Promise.resolve({ verdict: 'pass' as const, feedback: makePassVerdict('spec-compliance'), tokens: 60, model: m })
192
+ }
193
+ return Promise.resolve({ verdict: 'block' as const, feedback: makeBlockVerdict('code-quality', ['Uses as any']), tokens: 80, model: m })
194
+ })
195
+
196
+ const result = await runTwoStageReview(task, runner, 'test-model')
197
+
198
+ expect(result.stages).toHaveLength(2)
199
+ expect(result.stages[1].verdict).toBe('block')
200
+ expect(result.overall_verdict).toBe('block')
201
+ })
202
+
203
+ it('Stage 1 PASS + Stage 2 PASS → overall PASS (happy path)', async () => {
204
+ const task = makeTask()
205
+ let callCount = 0
206
+ const runner: ReviewRunnerFn = vi.fn().mockImplementation((_t, _l, m) => {
207
+ callCount++
208
+ const stage = callCount === 1 ? 'spec-compliance' : 'code-quality'
209
+ return Promise.resolve({ verdict: 'pass' as const, feedback: makePassVerdict(stage), tokens: 40, model: m })
210
+ })
211
+
212
+ const result = await runTwoStageReview(task, runner, 'test-model')
213
+
214
+ expect(result.overall_verdict).toBe('pass')
215
+ expect(result.stages).toHaveLength(2)
216
+ expect(result.stages.every(s => s.verdict === 'pass')).toBe(true)
217
+ })
218
+
219
+ it('total_tokens is sum of both stage tokens', async () => {
220
+ const task = makeTask()
221
+ let callCount = 0
222
+ const runner: ReviewRunnerFn = vi.fn().mockImplementation((_t, _l, m) => {
223
+ callCount++
224
+ const stage = callCount === 1 ? 'spec-compliance' : 'code-quality'
225
+ const tokens = callCount === 1 ? 100 : 150
226
+ return Promise.resolve({ verdict: 'pass' as const, feedback: makePassVerdict(stage), tokens, model: m })
227
+ })
228
+
229
+ const result = await runTwoStageReview(task, runner, 'test-model')
230
+
231
+ expect(result.total_tokens).toBe(250)
232
+ expect(result.stages[0].tokens_used).toBe(100)
233
+ expect(result.stages[1].tokens_used).toBe(150)
234
+ })
235
+ })
@@ -0,0 +1,166 @@
1
+ import type { TaskRecord } from './types.js'
2
+ import type { ReviewStage, StageVerdict, TwoStageReviewResult } from './types.js'
3
+ import type { ReviewLevel, ReviewResult } from './engine.js'
4
+
5
+ export type ReviewRunnerFn = (
6
+ task: TaskRecord,
7
+ level: ReviewLevel,
8
+ model: string,
9
+ ) => Promise<ReviewResult>
10
+
11
+ // ── Stage prompt builders ─────────────────────────────────────────────────────
12
+
13
+ export function buildSpecCompliancePrompt(task: TaskRecord, diff?: string): string {
14
+ const diffSection = diff ? `\n\n## Diff\n\`\`\`\n${diff}\n\`\`\`` : ''
15
+ const filesSection = task.files ? `\n\n## File Partition\n${task.files}` : ''
16
+
17
+ return `You are a spec-compliance reviewer. Your job is to verify the implementation matches the original specification.
18
+
19
+ ## Task Prompt
20
+ ${task.prompt}
21
+ ${filesSection}${diffSection}
22
+
23
+ ## Review Criteria (Stage 1 — Spec Compliance)
24
+
25
+ 1. Does the implementation match ALL acceptance criteria from the task prompt?
26
+ 2. Are all required deliverables present and complete?
27
+ 3. Do tests exist for all new functionality?
28
+ 4. Is the change confined to the assigned file partition (no out-of-scope files modified)?
29
+
30
+ Respond with a single HTML comment containing a JSON verdict. Do not include any other text after the verdict comment.
31
+
32
+ <!-- REVIEW_VERDICT { "stage": "spec-compliance", "verdict": "pass", "issues": [] } -->
33
+
34
+ Use "block" if ANY criterion fails, and list the specific issues. Example:
35
+
36
+ <!-- REVIEW_VERDICT { "stage": "spec-compliance", "verdict": "block", "issues": ["Missing tests for edge case X", "File src/other.ts is outside the partition"] } -->`
37
+ }
38
+
39
+ export function buildCodeQualityPrompt(task: TaskRecord, diff?: string): string {
40
+ const diffSection = diff ? `\n\n## Diff\n\`\`\`\n${diff}\n\`\`\`` : ''
41
+
42
+ return `You are a code-quality reviewer. The implementation has already passed spec compliance. Now review for code quality.
43
+
44
+ ## Task Prompt
45
+ ${task.prompt}
46
+ ${diffSection}
47
+
48
+ ## Review Criteria (Stage 2 — Code Quality)
49
+
50
+ 1. Is the code idiomatic for the project conventions (TypeScript-first, no \`as any\`, proper types)?
51
+ 2. Are there type safety issues (missing types, unsafe casts, untyped props)?
52
+ 3. Are there obvious bugs, race conditions, or error handling gaps?
53
+ 4. Are there DRY violations, dead code, or unnecessary complexity?
54
+
55
+ Respond with a single HTML comment containing a JSON verdict. Do not include any other text after the verdict comment.
56
+
57
+ <!-- REVIEW_VERDICT { "stage": "code-quality", "verdict": "pass", "issues": [] } -->
58
+
59
+ Use "block" if ANY criterion fails, and list the specific issues. Example:
60
+
61
+ <!-- REVIEW_VERDICT { "stage": "code-quality", "verdict": "block", "issues": ["Uses 'as any' cast on line 42", "Swallowed exception in catch block"] } -->`
62
+ }
63
+
64
+ // ── Verdict parser ────────────────────────────────────────────────────────────
65
+
66
+ export function parseStageVerdict(output: string, expectedStage: ReviewStage): StageVerdict {
67
+ const fallback: StageVerdict = {
68
+ stage: expectedStage,
69
+ verdict: 'block',
70
+ issues: ['Failed to parse reviewer output'],
71
+ tokens_used: 0,
72
+ }
73
+
74
+ const match = output.match(/<!--\s*REVIEW_VERDICT\s*(\{[\s\S]*?\})\s*-->/)
75
+ if (!match) return fallback
76
+
77
+ let parsed: unknown
78
+ try {
79
+ parsed = JSON.parse(match[1])
80
+ } catch {
81
+ return fallback
82
+ }
83
+
84
+ if (
85
+ typeof parsed !== 'object' ||
86
+ parsed === null ||
87
+ !('stage' in parsed) ||
88
+ !('verdict' in parsed) ||
89
+ !('issues' in parsed)
90
+ ) {
91
+ return fallback
92
+ }
93
+
94
+ const p = parsed as Record<string, unknown>
95
+ const verdict = p['verdict'] === 'pass' ? 'pass' : 'block'
96
+ const issues = Array.isArray(p['issues'])
97
+ ? (p['issues'] as unknown[]).filter((i): i is string => typeof i === 'string')
98
+ : []
99
+
100
+ return {
101
+ stage: expectedStage,
102
+ verdict,
103
+ issues,
104
+ tokens_used: 0,
105
+ }
106
+ }
107
+
108
+ // ── Two-stage runner ──────────────────────────────────────────────────────────
109
+
110
+ export async function runTwoStageReview(
111
+ task: TaskRecord,
112
+ reviewRunner: ReviewRunnerFn,
113
+ reviewerModel: string,
114
+ ): Promise<TwoStageReviewResult> {
115
+ // Stage 1: spec compliance
116
+ const stage1Result = await reviewRunner(task, 'fast', reviewerModel)
117
+ // Use ReviewResult.verdict as authoritative gate; parseStageVerdict extracts issues only on successful parse
118
+ const stage1Parsed = parseStageVerdict(stage1Result.feedback, 'spec-compliance')
119
+ const stage1Issues = resolveIssues(stage1Parsed, stage1Result)
120
+ const stage1Verdict: StageVerdict = {
121
+ stage: 'spec-compliance',
122
+ verdict: stage1Result.verdict,
123
+ issues: stage1Issues,
124
+ tokens_used: stage1Result.tokens,
125
+ }
126
+
127
+ if (stage1Verdict.verdict === 'block') {
128
+ return {
129
+ stages: [stage1Verdict],
130
+ overall_verdict: 'block',
131
+ total_tokens: stage1Verdict.tokens_used,
132
+ }
133
+ }
134
+
135
+ // Stage 2: code quality (only runs if stage 1 passes)
136
+ const stage2Result = await reviewRunner(task, 'fast', reviewerModel)
137
+ const stage2Parsed = parseStageVerdict(stage2Result.feedback, 'code-quality')
138
+ const stage2Issues = resolveIssues(stage2Parsed, stage2Result)
139
+ const stage2Verdict: StageVerdict = {
140
+ stage: 'code-quality',
141
+ verdict: stage2Result.verdict,
142
+ issues: stage2Issues,
143
+ tokens_used: stage2Result.tokens,
144
+ }
145
+
146
+ return {
147
+ stages: [stage1Verdict, stage2Verdict],
148
+ overall_verdict: stage2Verdict.verdict,
149
+ total_tokens: stage1Verdict.tokens_used + stage2Verdict.tokens_used,
150
+ }
151
+ }
152
+
153
+ /**
154
+ * Resolve issues from stage verdict, falling back to raw feedback when
155
+ * structured parsing failed (e.g. legacy reviewers or test mocks).
156
+ */
157
+ function resolveIssues(parsed: StageVerdict, raw: ReviewResult): string[] {
158
+ if (
159
+ parsed.issues.length === 1 &&
160
+ parsed.issues[0] === 'Failed to parse reviewer output' &&
161
+ raw.feedback
162
+ ) {
163
+ return [raw.feedback]
164
+ }
165
+ return parsed.issues
166
+ }