@bastani/atomic 0.5.31 → 0.5.32-0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,19 +1,89 @@
1
1
  /**
2
2
  * Ralph Prompt Utilities
3
3
  *
4
- * Prompts used by the Ralph plan → orchestrate → review → debug loop:
5
- * - buildPlannerPrompt: initial planning OR re-planning from a debugger report
4
+ * Prompts used by the Ralph plan → orchestrate → review loop:
5
+ * - buildPlannerPrompt: initial planning OR re-planning from reviewer findings
6
6
  * - buildOrchestratorPrompt: spawn workers to execute the task list
7
7
  * - buildInfraDiscoveryPrompts: prompts for parallel sub-agent infrastructure discovery
8
8
  * - buildReviewPrompt: structured code review with injected changeset + discovery context
9
- * - buildDebuggerReportPrompt: diagnose review findings, produce a re-plan brief
10
9
  *
11
10
  * Plus Zod schemas for structured output, parsing helpers for the reviewer
12
- * JSON output, and the debugger markdown report.
11
+ * JSON output, and {@link formatReviewForReplan} which renders the merged
12
+ * reviewer output as the markdown brief consumed by the next planner
13
+ * iteration.
13
14
  */
14
15
 
15
16
  import { z } from "zod";
16
17
 
18
+ // ============================================================================
19
+ // RESPONSE STYLE (token reduction)
20
+ // ============================================================================
21
+
22
+ /**
23
+ * Caveman response-style directive injected into every Ralph prompt.
24
+ *
25
+ * Goal: shrink free-form prose across many loop iterations to cut tokens
26
+ * without dropping technical substance. Carve-outs preserve every
27
+ * machine-consumed contract (schemas, headers, enums, tool args, code).
28
+ *
29
+ * Placement: appended via {@link withCaveman} so each builder's strict
30
+ * output-format block remains the final instruction the model reads.
31
+ */
32
+ export const CAVEMAN_INSTRUCTION = `## Response Style — Terse Caveman
33
+
34
+ Respond terse like smart caveman. All technical substance stay. Only fluff die.
35
+
36
+ ### Persistence
37
+ ACTIVE EVERY RESPONSE. No revert after many turns. No filler drift. Still active if unsure.
38
+
39
+ ### Rules
40
+ Drop: articles (a/an/the), filler (just/really/basically/actually/simply), pleasantries (sure/certainly/of course/happy to), hedging. Fragments OK. Short synonyms (big not extensive, fix not "implement a solution for"). Technical terms exact. Code blocks unchanged. Errors quoted exact.
41
+
42
+ Pattern: \`[thing] [action] [reason]. [next step].\`
43
+
44
+ Not: "Sure! I'd be happy to help you with that. The issue you're experiencing is likely caused by..."
45
+ Yes: "Bug in auth middleware. Token expiry check use \`<\` not \`<=\`. Fix:"
46
+
47
+ ### Intensity
48
+ Drop articles, fragments OK, short synonyms.
49
+
50
+ Example — "Why React component re-render?"
51
+ "New object ref each render. Inline object prop = new ref = re-render. Wrap in \`useMemo\`."
52
+
53
+ Example — "Explain database connection pooling."
54
+ "Pool reuse open DB connections. No new connection per request. Skip handshake overhead."
55
+
56
+ ### Auto-Clarity
57
+ Drop caveman for: security warnings, irreversible action confirmations, multi-step sequences where fragment order risks misread, user asks to clarify or repeats question. Resume caveman after clear part done.
58
+
59
+ Example — destructive op:
60
+ > **Warning:** This will permanently delete all rows in the \`users\` table and cannot be undone.
61
+ > \`\`\`sql
62
+ > DROP TABLE users;
63
+ > \`\`\`
64
+ > Caveman resume. Verify backup exist first.
65
+
66
+ ### Boundaries — caveman MUST NOT touch
67
+ Caveman compresses free-form prose only. Leave the following exactly as the prompt or schema specifies:
68
+
69
+ - Code blocks, commit messages, PR descriptions: write normal.
70
+ - Exact enum / literal strings the schema or prompt specifies (e.g. \`"patch is correct"\`, \`"patch is incorrect"\`, task statuses \`pending\` / \`in_progress\` / \`completed\` / \`error\`).
71
+ - Required section headers and template scaffolding (e.g. \`# Debugger Report\`, \`## Issues Identified\`, RFC section names) — verbatim.
72
+ - Tool names, tool arguments, JSON keys, schema field names.
73
+ - File paths, URLs, command invocations, error text quoted from tools.
74
+ - SQL, JSON, Markdown templates: compress prose inside, never the structure.
75
+ - Task titles / descriptions persisted via task-management tools: keep them self-contained and unambiguous.
76
+ - When the prompt says output ONLY a path / ONLY a fenced block / ONLY a JSON payload, obey that exactly — caveman does not override output contracts.`;
77
+
78
+ /**
79
+ * Append the caveman style directive after the prompt body but BEFORE any
80
+ * trailing strict output-format / schema instructions in the caller. Each
81
+ * builder positions the marker so the format contract remains last.
82
+ */
83
+ function withCaveman(prompt: string): string {
84
+ return `${prompt}\n\n${CAVEMAN_INSTRUCTION}`;
85
+ }
86
+
17
87
  // ============================================================================
18
88
  // STRUCTURED OUTPUT SCHEMAS
19
89
  // ============================================================================
@@ -65,8 +135,10 @@ export const ReviewResultSchema = z.object({
65
135
  .array(ReviewFindingSchema)
66
136
  .describe("List of review findings, ordered by priority"),
67
137
  overall_correctness: z
68
- .string()
69
- .describe("'patch is correct' or 'patch is incorrect'"),
138
+ .enum(["patch is correct", "patch is incorrect"])
139
+ .describe(
140
+ "Exact literal: 'patch is correct' or 'patch is incorrect'. No paraphrase.",
141
+ ),
70
142
  overall_explanation: z
71
143
  .string()
72
144
  .describe("Summary of overall quality and correctness"),
@@ -157,31 +229,38 @@ export function mergeReviewResults(
157
229
  export interface PlannerContext {
158
230
  /** 1-indexed loop iteration. Iteration 1 = initial plan; >1 = re-plan. */
159
231
  iteration: number;
160
- /** Markdown report from the previous iteration's debugger sub-agent. */
161
- debuggerReport?: string;
232
+ /**
233
+ * Markdown rendering of the previous iteration's merged reviewer
234
+ * findings. Produced by {@link formatReviewForReplan}. The planner is
235
+ * responsible for validating, deduping, and clustering findings into
236
+ * shared root causes before revising the RFC — there is no separate
237
+ * debugger stage.
238
+ */
239
+ reviewReport?: string;
162
240
  }
163
241
 
164
242
  /**
165
243
  * Build the planner prompt. The first iteration authors an RFC from the
166
- * original spec; subsequent iterations revise the RFC using the debugger
167
- * report from the previous loop iteration.
244
+ * original spec; subsequent iterations revise the RFC using the merged
245
+ * reviewer findings from the previous loop iteration.
168
246
  *
169
247
  * The planner's deliverable is a filled-in Technical Design Document / RFC
170
- * rendered as markdown text
171
- * consumes the RFC as design context
248
+ * rendered as markdown text; the orchestrator consumes the RFC as design
249
+ * context.
172
250
  */
173
251
  export function buildPlannerPrompt(
174
252
  spec: string,
175
253
  context: PlannerContext = { iteration: 1 },
176
254
  ): string {
177
- const debuggerReport = context.debuggerReport?.trim() ?? "";
178
- const isReplan = context.iteration > 1 && debuggerReport.length > 0;
255
+ const reviewReport = context.reviewReport?.trim() ?? "";
256
+ const isReplan = context.iteration > 1 && reviewReport.length > 0;
179
257
 
180
258
  const header = isReplan
181
259
  ? `# Technical Design Revision (Iteration ${context.iteration})
182
260
 
183
- The previous iteration's implementation was flagged by the reviewer, and the
184
- debugger investigated. Revise the RFC so it reflects the corrected approach.`
261
+ The previous iteration's implementation was flagged by the reviewer.
262
+ Investigate the findings, identify shared root causes, and revise the RFC
263
+ so it reflects the corrected approach.`
185
264
  : `# Technical Design (Iteration 1)
186
265
 
187
266
  Author a Technical Design Document / RFC for the specification below.`;
@@ -192,43 +271,57 @@ Author a Technical Design Document / RFC for the specification below.`;
192
271
  ${spec}
193
272
  </specification>`;
194
273
 
195
- const debuggerBlock = isReplan
274
+ const reviewBlock = isReplan
196
275
  ? `
197
276
 
198
- ## Debugger Report (authoritative)
277
+ ## Reviewer Findings (previous iteration)
278
+
279
+ <reviewer_findings>
280
+ ${reviewReport}
281
+ </reviewer_findings>
199
282
 
200
- <debugger_report>
201
- ${debuggerReport}
202
- </debugger_report>
283
+ ### Triage Before Revising
284
+
285
+ The findings above are reviewer hypotheses, not root causes. Before touching
286
+ the RFC:
287
+
288
+ 1. **Validate** — for each finding, Read the cited file/lines (Grep/Glob/LSP)
289
+ and confirm the issue exists. Drop findings that are stale or wrong.
290
+ 2. **Dedupe & cluster** — group findings that share a file, module, or
291
+ underlying defect. Multiple symptoms often share one root cause.
292
+ 3. **Root-cause** — for each cluster, identify the underlying defect (not
293
+ the symptom). Note files that must change and any invariants the next
294
+ workers must respect.
203
295
 
204
296
  ### Revision Focus
205
297
 
206
- Fold every issue in the debugger report into the revised RFC:
298
+ Fold the validated, clustered root causes into the RFC:
207
299
 
208
- - **Section 5 (Detailed Design)** — specify the corrected approach. Every
209
- "Issue Identified" in the report should map to a concrete design change.
210
- - **Section 6 (Alternatives Considered)** — if the root cause points to a
300
+ - **Section 5 (Detailed Design)** — specify the corrected approach. Each
301
+ root cause should map to a concrete design change.
302
+ - **Section 6 (Alternatives Considered)** — if a root cause points to a
211
303
  better option than the one previously chosen, promote it and demote the
212
304
  current choice to "rejected" with the new rejection reason.
213
305
  - **Section 8 (Migration, Rollout, and Testing)** — add validation steps
214
- that would have caught the regression.
306
+ (tests, lint rules, type checks) that would have caught the regression.
215
307
  - **Section 9 (Open Questions / Unresolved Issues)** — surface any
216
- uncertainty the debugger flagged as unresolved.`
308
+ uncertainty triage left unresolved.`
217
309
  : "";
218
310
 
219
- return `${header}
311
+ return withCaveman(`${header}
220
312
 
221
- ${specBlock}${debuggerBlock}
313
+ ${specBlock}${reviewBlock}
222
314
 
223
315
  ${
224
316
  isReplan
225
317
  ? `## Step 1: Author a Revised RFC
226
318
 
227
- This is a re-plan iteration — the debugger report above MUST be folded into
228
- the design. Always author a revised RFC here, even if the original
229
- specification was a file path. If the spec is a path, Read the file first to
230
- get the original design, then produce a revised RFC that incorporates the
231
- debugger findings. Do NOT short-circuit to just the path on re-plan.`
319
+ This is a re-plan iteration — the validated, clustered findings above MUST
320
+ be folded into the design. Always author a revised RFC here, even if the
321
+ original specification was a file path. If the spec is a path, Read the
322
+ file first to get the original design, then produce a revised RFC that
323
+ incorporates the corrected approach. Do NOT short-circuit to just the path
324
+ on re-plan.`
232
325
  : `## Step 1: Spec Path Short-Circuit (do this FIRST)
233
326
 
234
327
  The specification above may be either a **file path** to an existing spec
@@ -279,7 +372,7 @@ forward the path. Duplicating the spec wastes tokens and introduces drift.`
279
372
  - Output nothing else after the RFC (or path) — no meta-commentary, no
280
373
  summary. The document (or path) stands on its own.
281
374
  - Match depth to stakes: a greenfield service warrants deep sections 5-7; a
282
- small refactor can abbreviate them, but every section header must be present.`;
375
+ small refactor can abbreviate them, but every section header must be present.`);
283
376
  }
284
377
  // ============================================================================
285
378
  // ORCHESTRATOR
@@ -320,7 +413,7 @@ ${plannerNotes}
320
413
  (empty — fall back to the Original User Specification below)
321
414
  </planner_output>`;
322
415
 
323
- return `You are the workflow orchestrator. You run a three-phase loop:
416
+ return withCaveman(`You are the workflow orchestrator. You run a three-phase loop:
324
417
 
325
418
  1. **Decompose** the design document into a task list.
326
419
  2. **Execute** the tasks by spawning parallel worker sub-agents.
@@ -444,7 +537,21 @@ Update statuses **immediately** at every transition via task tool.
444
537
  - When multiple workers complete in parallel, issue a SEPARATE update per
445
538
  task.
446
539
  - Mark previous tasks \`completed\` before marking new ones
447
- \`in_progress\`.`;
540
+ \`in_progress\`.
541
+
542
+ ## Worker Sub-Agent Response Style
543
+
544
+ When you spawn a worker via \`Agent\` / \`Task\` / \`agent\`, append this exact
545
+ clause to its prompt so the worker inherits the terse style:
546
+
547
+ > Respond terse like smart caveman. Drop articles, filler, pleasantries,
548
+ > hedging. Fragments OK. Technical terms exact. Code blocks unchanged.
549
+ > Errors quoted exact. Never compress: tool names, tool args, file paths,
550
+ > commit messages, code, exact enum/literal strings, schema field names,
551
+ > required section headers, task titles persisted to task tools.
552
+
553
+ Do NOT compress the worker's task subject, description, or persisted task
554
+ records — those must remain self-contained and unambiguous.`);
448
555
  }
449
556
 
450
557
  // ============================================================================
@@ -472,7 +579,7 @@ export interface InfraDiscoveryPrompts {
472
579
  */
473
580
  export function buildInfraDiscoveryPrompts(): InfraDiscoveryPrompts {
474
581
  return {
475
- locator: `# Locate Build & Test Infrastructure Files
582
+ locator: withCaveman(`# Locate Build & Test Infrastructure Files
476
583
 
477
584
  Find ALL files in this repository that define or configure the build, test,
478
585
  lint, type-check, and CI/CD infrastructure. Report their paths and a
@@ -498,9 +605,9 @@ Respond with a flat list:
498
605
 
499
606
  Be exhaustive. Do NOT skip files just because they seem minor — CI configs
500
607
  and agent instruction files often contain the authoritative command list.
501
- End with a brief trailing summary (1-2 sentences) of what you found.`,
608
+ End with a brief trailing summary (1-2 sentences) of what you found.`),
502
609
 
503
- analyzer: `# Analyze Build & Test Infrastructure
610
+ analyzer: withCaveman(`# Analyze Build & Test Infrastructure
504
611
 
505
612
  Examine this repository's build, test, lint, and type-check infrastructure.
506
613
  Your goal is to produce a concise reference that tells a reviewer exactly
@@ -541,9 +648,9 @@ which commands to run to verify an implementation.
541
648
 
542
649
  Be specific — include the exact invocation string (e.g. \`bun test\`, not
543
650
  just "run tests"). If a command has variants (e.g. test:unit, test:e2e),
544
- list each separately. End with a brief trailing summary.`,
651
+ list each separately. End with a brief trailing summary.`),
545
652
 
546
- patternFinder: `# Find Build & Test Patterns
653
+ patternFinder: withCaveman(`# Find Build & Test Patterns
547
654
 
548
655
  Search this repository for existing patterns that show how code is built,
549
656
  tested, and validated. A reviewer needs to know not just WHAT commands exist,
@@ -572,7 +679,7 @@ For each pattern found, report:
572
679
  - A brief explanation of when/how it's used
573
680
 
574
681
  End with a brief trailing summary of the overall build/test workflow order
575
- (e.g. "install → typecheck → lint → test → build").`,
682
+ (e.g. "install → typecheck → lint → test → build").`),
576
683
  };
577
684
  }
578
685
 
@@ -834,155 +941,7 @@ Begin your review now.`;
834
941
  }
835
942
 
836
943
  // ============================================================================
837
- // DEBUGGER
838
- // ============================================================================
839
-
840
- export interface DebuggerContext {
841
- /** 1-indexed loop iteration the debugger is investigating. */
842
- iteration: number;
843
- /**
844
- * Branch changeset captured immediately before the review. Provides the
845
- * debugger with the same file-level context as the reviewer.
846
- */
847
- changeset: {
848
- baseBranch: string;
849
- diffStat: string;
850
- uncommitted: string;
851
- nameStatus: string;
852
- errors: string[];
853
- };
854
- }
855
-
856
- /**
857
- * Build a prompt asking the debugger sub-agent to investigate a set of review
858
- * findings and produce a structured report. The debugger MUST NOT apply
859
- * fixes — its only deliverable is the report, which the next iteration's
860
- * planner consumes.
861
- */
862
- export function buildDebuggerReportPrompt(
863
- review: ReviewResult | null,
864
- rawReview: string,
865
- context: DebuggerContext,
866
- ): string {
867
- let findingsSection: string;
868
- if (review !== null && review.findings.length > 0) {
869
- const sorted = [...review.findings].sort(
870
- (a, b) => (a.priority ?? 3) - (b.priority ?? 3),
871
- );
872
- findingsSection = sorted
873
- .map((f, i) => {
874
- const pri = f.priority !== undefined ? `P${f.priority}` : "P2";
875
- const loc = f.code_location
876
- ? `${f.code_location.file_path}:${f.code_location.line_range.start}-${f.code_location.line_range.end}`
877
- : "unspecified";
878
- return `### Finding ${i + 1}: [${pri}] ${f.title}
879
- - **Location:** ${loc}
880
- - **Issue:** ${f.body}`;
881
- })
882
- .join("\n\n");
883
- } else {
884
- const trimmed = rawReview.trim();
885
- findingsSection =
886
- trimmed.length > 0
887
- ? `Reviewer output (could not parse as JSON):
888
-
889
- \`\`\`
890
- ${trimmed}
891
- \`\`\``
892
- : `(no reviewer output captured)`;
893
- }
894
-
895
- const { changeset } = context;
896
- const hasChanges =
897
- changeset.nameStatus.length > 0 || changeset.uncommitted.length > 0;
898
- const hasErrors = changeset.errors.length > 0;
899
-
900
- let changesetSection: string;
901
- if (hasChanges || hasErrors) {
902
- const parts: string[] = [];
903
- if (hasErrors) {
904
- parts.push(
905
- "**Git errors** (changeset may be incomplete — re-run these yourself):",
906
- ...changeset.errors.map((e) => `- ${e}`),
907
- "",
908
- );
909
- }
910
- if (changeset.nameStatus.length > 0) {
911
- parts.push(
912
- `Changed files (relative to \`${changeset.baseBranch}\`):`,
913
- "```",
914
- changeset.nameStatus,
915
- "```",
916
- );
917
- }
918
- if (changeset.uncommitted.length > 0) {
919
- parts.push(
920
- `Uncommitted (\`git status -s\`):`,
921
- "```",
922
- changeset.uncommitted,
923
- "```",
924
- );
925
- }
926
- changesetSection = parts.join("\n");
927
- } else {
928
- changesetSection = "(no changes detected)";
929
- }
930
-
931
- return `# Debugging Report Request (Iteration ${context.iteration})
932
-
933
- The reviewer flagged the issues below. Investigate them as a debugger and
934
- produce a structured report that the planner will consume on the next loop
935
- iteration.
936
-
937
- **You are NOT applying fixes.** Your only deliverable is the report. Do not
938
- edit files. Investigation tool calls (Read, grep, LSP, running tests in
939
- read-only mode) are fine; mutations are not.
940
-
941
- ## Reviewer Findings
942
-
943
- ${findingsSection}
944
-
945
- ## Branch Changeset
946
-
947
- ${changesetSection}
948
-
949
- ## Investigation Steps
950
-
951
- For each finding:
952
- 1. Locate the relevant code (LSP / grep / Read).
953
- 2. Identify the **root cause**, not just the symptom.
954
- 3. List the repo-relative file paths that must change.
955
- 4. Note constraints, pitfalls, or invariants the next planner must respect.
956
-
957
- ## Output Format
958
-
959
- Respond with EXACTLY one fenced \`\`\`markdown block containing the report.
960
- No prose before or after the block. Use this exact section structure:
961
-
962
- \`\`\`markdown
963
- # Debugger Report
964
-
965
- ## Issues Identified
966
- - [P<priority>] <one-line issue summary>
967
- - **Root cause:** <one or two sentences>
968
- - **Files:** <path/to/file.ext, path/to/other.ext>
969
- - **Fix approach:** <imperative description>
970
-
971
- ## Suggested Plan Adjustments
972
- 1. <imperative task description, suitable as a planner task>
973
- 2. <...>
974
-
975
- ## Pitfalls
976
- - <invariant or gotcha the planner/workers must respect>
977
- - <...>
978
- \`\`\`
979
-
980
- Keep the report tight — every line must be load-bearing for re-planning. Omit
981
- the "Pitfalls" section entirely if there are none. Begin now.`;
982
- }
983
-
984
- // ============================================================================
985
- // PARSING HELPERS
944
+ // PARSING & RE-PLAN HELPERS
986
945
  // ============================================================================
987
946
 
988
947
  export function filterActionable(parsed: {
@@ -1003,20 +962,80 @@ export function filterActionable(parsed: {
1003
962
  }
1004
963
 
1005
964
  /**
1006
- * Extract the LAST fenced ```markdown block from a piece of text. Used for
1007
- * parsing the debugger's structured report out of a long Claude pane
1008
- * scrollback or any other output that may include extra prose.
965
+ * Render the merged reviewer result as the markdown brief consumed by the
966
+ * next iteration's planner.
1009
967
  *
1010
- * Falls back to the trimmed full input when no fenced block is present, so
1011
- * the planner still receives the debugger's content even if formatting drifts.
968
+ * Findings are grouped by file path so the planner sees clusters of related
969
+ * symptoms together (often a hint at a shared root cause). Within each
970
+ * group findings are ordered by ascending priority (P0 first). The
971
+ * `overall_explanation` is included verbatim so the planner has the
972
+ * reviewers' overall narrative.
973
+ *
974
+ * When `parsed === null` (SDK validation failed) the raw transcript is
975
+ * surfaced inside a clearly-labelled fenced block so the planner knows the
976
+ * data is unstructured and must be investigated rather than trusted.
1012
977
  */
1013
- export function extractMarkdownBlock(content: string): string {
1014
- const blockRe = /```markdown\s*\n([\s\S]*?)\n```/g;
1015
- let last: string | null = null;
1016
- let match: RegExpExecArray | null;
1017
- while ((match = blockRe.exec(content)) !== null) {
1018
- if (match[1]) last = match[1];
978
+ export function formatReviewForReplan(
979
+ parsed: ReviewResult | null,
980
+ rawText: string,
981
+ ): string {
982
+ if (parsed === null) {
983
+ const trimmed = rawText.trim();
984
+ if (trimmed.length === 0) {
985
+ return "(no reviewer output captured — investigate the previous iteration's branch state directly)";
986
+ }
987
+ return `## Unparseable Reviewer Output
988
+
989
+ The reviewer's structured output failed schema validation. Raw transcript
990
+ below — investigate the branch state to determine what (if anything) needs
991
+ revision.
992
+
993
+ \`\`\`
994
+ ${trimmed}
995
+ \`\`\``;
996
+ }
997
+
998
+ if (parsed.findings.length === 0) {
999
+ return `## Reviewer Verdict
1000
+
1001
+ ${parsed.overall_explanation || "No actionable findings, but the reviewers did not sign off."}`;
1002
+ }
1003
+
1004
+ // Group by file path so clusters of related symptoms surface together.
1005
+ const groups = new Map<string, ReviewFinding[]>();
1006
+ for (const f of parsed.findings) {
1007
+ const key = f.code_location?.file_path ?? "(unspecified location)";
1008
+ const bucket = groups.get(key) ?? [];
1009
+ bucket.push(f);
1010
+ groups.set(key, bucket);
1011
+ }
1012
+
1013
+ const sections: string[] = [];
1014
+
1015
+ if (parsed.overall_explanation && parsed.overall_explanation.length > 0) {
1016
+ sections.push(`## Reviewer Summary\n\n${parsed.overall_explanation}`);
1019
1017
  }
1020
- if (last !== null) return last.trim();
1021
- return content.trim();
1018
+
1019
+ sections.push(`## Findings (${parsed.findings.length}, grouped by file)`);
1020
+
1021
+ const sortedFiles = Array.from(groups.keys()).sort();
1022
+ for (const filePath of sortedFiles) {
1023
+ const findings = (groups.get(filePath) ?? []).slice().sort(
1024
+ (a, b) => (a.priority ?? 3) - (b.priority ?? 3),
1025
+ );
1026
+ const lines: string[] = [`### \`${filePath}\``];
1027
+ for (const f of findings) {
1028
+ const pri = f.priority !== undefined ? `P${f.priority}` : "P2";
1029
+ const range = f.code_location
1030
+ ? `:${f.code_location.line_range.start}-${f.code_location.line_range.end}`
1031
+ : "";
1032
+ lines.push(
1033
+ `- **[${pri}] ${f.title}**${range ? ` (lines${range})` : ""}`,
1034
+ ` ${f.body.replace(/\n/g, "\n ")}`,
1035
+ );
1036
+ }
1037
+ sections.push(lines.join("\n"));
1038
+ }
1039
+
1040
+ return sections.join("\n\n");
1022
1041
  }
@@ -8,25 +8,26 @@
8
8
  import type { ReviewResult } from "./prompts.ts";
9
9
 
10
10
  /**
11
- * Check whether the reviewer produced actionable findings.
11
+ * Check whether the loop should iterate again.
12
12
  *
13
13
  * Returns true when:
14
- * 1. The parsed ReviewResult has one or more findings, OR
15
- * 2. The review could not be parsed (null) but the raw response
16
- * text is non-empty (treat unparseable output as actionable).
14
+ * 1. The review could not be parsed (null) but the raw response text is
15
+ * non-empty treat unparseable output as actionable so the loop keeps
16
+ * iterating instead of silently exiting on a missing reviewer.
17
+ * 2. The merged review reports `overall_correctness === "patch is incorrect"`.
18
+ * {@link mergeReviewResults} sets the merged value to "patch is incorrect"
19
+ * if EITHER reviewer flagged it, so "patch is correct" here means BOTH
20
+ * reviewers signed off — the only stop condition.
17
21
  *
18
- * @param review - Parsed ReviewResult, or null if parsing failed.
22
+ * @param review - Parsed (merged) ReviewResult, or null if parsing failed.
19
23
  * @param rawText - The raw reviewer response text.
20
24
  */
21
25
  export function hasActionableFindings(
22
26
  review: ReviewResult | null,
23
27
  rawText: string,
24
28
  ): boolean {
25
- if (review !== null && review.findings.length > 0) {
26
- return true;
29
+ if (review === null) {
30
+ return rawText.trim().length > 0;
27
31
  }
28
- if (review === null && rawText.trim().length > 0) {
29
- return true;
30
- }
31
- return false;
32
+ return review.overall_correctness === "patch is incorrect";
32
33
  }