@bastani/atomic 0.5.0-1 → 0.5.0-2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.atomic/workflows/hello/claude/index.ts +44 -0
- package/.atomic/workflows/hello/copilot/index.ts +58 -0
- package/.atomic/workflows/hello/opencode/index.ts +58 -0
- package/.atomic/workflows/hello-parallel/claude/index.ts +76 -0
- package/.atomic/workflows/hello-parallel/copilot/index.ts +105 -0
- package/.atomic/workflows/hello-parallel/opencode/index.ts +115 -0
- package/.atomic/workflows/ralph/claude/index.ts +149 -0
- package/.atomic/workflows/ralph/copilot/index.ts +162 -0
- package/.atomic/workflows/ralph/helpers/git.ts +34 -0
- package/.atomic/workflows/ralph/helpers/prompts.ts +538 -0
- package/.atomic/workflows/ralph/helpers/review.ts +32 -0
- package/.atomic/workflows/ralph/opencode/index.ts +164 -0
- package/.atomic/workflows/tsconfig.json +22 -0
- package/.claude/agents/code-simplifier.md +52 -0
- package/.claude/agents/codebase-analyzer.md +166 -0
- package/.claude/agents/codebase-locator.md +122 -0
- package/.claude/agents/codebase-online-researcher.md +148 -0
- package/.claude/agents/codebase-pattern-finder.md +247 -0
- package/.claude/agents/codebase-research-analyzer.md +179 -0
- package/.claude/agents/codebase-research-locator.md +145 -0
- package/.claude/agents/debugger.md +91 -0
- package/.claude/agents/orchestrator.md +19 -0
- package/.claude/agents/planner.md +106 -0
- package/.claude/agents/reviewer.md +97 -0
- package/.claude/agents/worker.md +165 -0
- package/.github/agents/code-simplifier.md +52 -0
- package/.github/agents/codebase-analyzer.md +166 -0
- package/.github/agents/codebase-locator.md +122 -0
- package/.github/agents/codebase-online-researcher.md +146 -0
- package/.github/agents/codebase-pattern-finder.md +247 -0
- package/.github/agents/codebase-research-analyzer.md +179 -0
- package/.github/agents/codebase-research-locator.md +145 -0
- package/.github/agents/debugger.md +98 -0
- package/.github/agents/orchestrator.md +27 -0
- package/.github/agents/planner.md +131 -0
- package/.github/agents/reviewer.md +94 -0
- package/.github/agents/worker.md +237 -0
- package/.github/lsp.json +93 -0
- package/.opencode/agents/code-simplifier.md +62 -0
- package/.opencode/agents/codebase-analyzer.md +171 -0
- package/.opencode/agents/codebase-locator.md +127 -0
- package/.opencode/agents/codebase-online-researcher.md +152 -0
- package/.opencode/agents/codebase-pattern-finder.md +252 -0
- package/.opencode/agents/codebase-research-analyzer.md +183 -0
- package/.opencode/agents/codebase-research-locator.md +149 -0
- package/.opencode/agents/debugger.md +99 -0
- package/.opencode/agents/orchestrator.md +27 -0
- package/.opencode/agents/planner.md +146 -0
- package/.opencode/agents/reviewer.md +102 -0
- package/.opencode/agents/worker.md +165 -0
- package/README.md +355 -299
- package/assets/settings.schema.json +0 -5
- package/package.json +7 -2
- package/src/cli.ts +16 -8
- package/src/commands/cli/workflow.ts +209 -15
- package/src/lib/spawn.ts +106 -31
- package/src/sdk/runtime/loader.ts +1 -1
- package/src/services/config/config-path.ts +1 -1
- package/src/services/config/settings.ts +0 -9
- package/src/services/system/agents.ts +94 -0
- package/src/services/system/auto-sync.ts +131 -0
- package/src/services/system/install-ui.ts +158 -0
- package/src/services/system/skills.ts +26 -17
- package/src/services/system/workflows.ts +105 -0
- package/src/theme/colors.ts +2 -0
- package/src/commands/cli/update.ts +0 -46
- package/src/services/system/download.ts +0 -325
|
@@ -0,0 +1,538 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Ralph Prompt Utilities
|
|
3
|
+
*
|
|
4
|
+
* Prompts used by the Ralph plan → orchestrate → review → debug loop:
|
|
5
|
+
* - buildPlannerPrompt: initial planning OR re-planning from a debugger report
|
|
6
|
+
* - buildOrchestratorPrompt: spawn workers to execute the task list
|
|
7
|
+
* - buildReviewPrompt: structured code review with injected git status
|
|
8
|
+
* - buildDebuggerReportPrompt: diagnose review findings, produce a re-plan brief
|
|
9
|
+
*
|
|
10
|
+
* Plus parsing helpers for the reviewer JSON output and the debugger markdown
|
|
11
|
+
* report.
|
|
12
|
+
*
|
|
13
|
+
* Zero-dependency: no imports from the Atomic runtime.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
// ============================================================================
|
|
17
|
+
// PLANNER
|
|
18
|
+
// ============================================================================
|
|
19
|
+
|
|
20
|
+
export interface PlannerContext {
|
|
21
|
+
/** 1-indexed loop iteration. Iteration 1 = initial plan; >1 = re-plan. */
|
|
22
|
+
iteration: number;
|
|
23
|
+
/** Markdown report from the previous iteration's debugger sub-agent. */
|
|
24
|
+
debuggerReport?: string;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Build the planner prompt. The first iteration decomposes the original spec;
|
|
29
|
+
* subsequent iterations decompose the work needed to resolve the debugger
|
|
30
|
+
* report from the previous loop iteration.
|
|
31
|
+
*/
|
|
32
|
+
export function buildPlannerPrompt(
|
|
33
|
+
spec: string,
|
|
34
|
+
context: PlannerContext = { iteration: 1 },
|
|
35
|
+
): string {
|
|
36
|
+
const debuggerReport = context.debuggerReport?.trim() ?? "";
|
|
37
|
+
const isReplan = context.iteration > 1 && debuggerReport.length > 0;
|
|
38
|
+
|
|
39
|
+
if (isReplan) {
|
|
40
|
+
return `# Re-Planning (Iteration ${context.iteration})
|
|
41
|
+
|
|
42
|
+
The previous Ralph iteration produced an implementation that the reviewer
|
|
43
|
+
flagged as incomplete or incorrect. The debugger investigated and produced
|
|
44
|
+
the report below. Use it to re-plan.
|
|
45
|
+
|
|
46
|
+
## Original Specification
|
|
47
|
+
|
|
48
|
+
<specification>
|
|
49
|
+
${spec}
|
|
50
|
+
</specification>
|
|
51
|
+
|
|
52
|
+
## Debugger Report (authoritative)
|
|
53
|
+
|
|
54
|
+
<debugger_report>
|
|
55
|
+
${debuggerReport}
|
|
56
|
+
</debugger_report>
|
|
57
|
+
|
|
58
|
+
## Your Task
|
|
59
|
+
|
|
60
|
+
Decompose the work needed to resolve every issue in the debugger report into
|
|
61
|
+
an ordered task list, then persist them via TaskCreate.
|
|
62
|
+
|
|
63
|
+
<instructions>
|
|
64
|
+
1. Treat the debugger report as authoritative. Every "Issue Identified" must
|
|
65
|
+
map to at least one task. Every "Suggested Plan Adjustment" must appear as
|
|
66
|
+
(or be subsumed by) a task.
|
|
67
|
+
2. Drop any work from the original specification that is already complete and
|
|
68
|
+
unaffected by the report.
|
|
69
|
+
3. Order tasks by priority: P0 fixes first, then dependent work, then
|
|
70
|
+
validation/tests.
|
|
71
|
+
4. Optimize for parallel execution — minimize blockedBy dependencies.
|
|
72
|
+
5. After creating all tasks via TaskCreate, call TaskList to verify.
|
|
73
|
+
</instructions>
|
|
74
|
+
|
|
75
|
+
<constraints>
|
|
76
|
+
- All tasks start as "pending".
|
|
77
|
+
- blockedBy must reference IDs that exist in the task list.
|
|
78
|
+
- Do not split fixes that touch the same file across multiple tasks unless they are truly independent.
|
|
79
|
+
</constraints>`;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Initial iteration
|
|
83
|
+
return `# Planning (Iteration 1)
|
|
84
|
+
|
|
85
|
+
You are a task decomposition engine.
|
|
86
|
+
|
|
87
|
+
<specification>
|
|
88
|
+
${spec}
|
|
89
|
+
</specification>
|
|
90
|
+
|
|
91
|
+
<instructions>
|
|
92
|
+
Decompose the specification above into an ordered list of implementation tasks
|
|
93
|
+
and persist them via TaskCreate.
|
|
94
|
+
|
|
95
|
+
1. Read the specification and identify every distinct deliverable.
|
|
96
|
+
2. Order tasks by priority: foundational/infrastructure first, then features,
|
|
97
|
+
then tests, then polish.
|
|
98
|
+
3. Analyze technical dependencies between tasks.
|
|
99
|
+
4. After creating all tasks via TaskCreate, call TaskList to verify.
|
|
100
|
+
</instructions>
|
|
101
|
+
|
|
102
|
+
<constraints>
|
|
103
|
+
- All tasks start as "pending".
|
|
104
|
+
- blockedBy must only reference IDs that exist in the task list.
|
|
105
|
+
- Optimize for parallel execution — minimize unnecessary dependencies.
|
|
106
|
+
</constraints>`;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// ============================================================================
|
|
110
|
+
// ORCHESTRATOR
|
|
111
|
+
// ============================================================================
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Build the orchestrator prompt. The orchestrator retrieves the planner's
|
|
115
|
+
* task list, validates the dependency graph, and spawns parallel workers.
|
|
116
|
+
*/
|
|
117
|
+
export function buildOrchestratorPrompt(): string {
|
|
118
|
+
return `You are an orchestrator managing a set of implementation tasks.
|
|
119
|
+
|
|
120
|
+
## Retrieve Task List
|
|
121
|
+
|
|
122
|
+
Start by retrieving the current task list using your TaskList tool. The
|
|
123
|
+
planner has already created all tasks; you MUST retrieve them before any
|
|
124
|
+
execution.
|
|
125
|
+
|
|
126
|
+
## Dependency Graph Integrity Check
|
|
127
|
+
|
|
128
|
+
BEFORE executing any tasks, validate the dependency graph:
|
|
129
|
+
|
|
130
|
+
1. For each task, check that every ID in its "blockedBy" array corresponds to
|
|
131
|
+
an actual task ID in the list.
|
|
132
|
+
2. If a blockedBy reference points to a task ID that does NOT exist, that
|
|
133
|
+
reference is a **dangling dependency** caused by data corruption during
|
|
134
|
+
planning.
|
|
135
|
+
3. **Remove dangling dependencies**: Drop any blockedBy entry that references
|
|
136
|
+
a non-existent task ID. The task is still valid — only the corrupted
|
|
137
|
+
reference should be removed.
|
|
138
|
+
4. After cleanup, re-evaluate which tasks are ready.
|
|
139
|
+
|
|
140
|
+
This step is critical. Dangling dependencies will permanently block tasks.
|
|
141
|
+
|
|
142
|
+
## Dependency Rules
|
|
143
|
+
|
|
144
|
+
A task is READY only when:
|
|
145
|
+
1. Its status is "pending"
|
|
146
|
+
2. ALL tasks in its "blockedBy" array are "completed"
|
|
147
|
+
|
|
148
|
+
Do NOT spawn a worker for a task whose dependencies are not yet completed.
|
|
149
|
+
|
|
150
|
+
## Instructions
|
|
151
|
+
|
|
152
|
+
1. **Retrieve the task list** via TaskList. This is your source of truth.
|
|
153
|
+
2. **Validate the dependency graph** as above. Remove dangling dependencies.
|
|
154
|
+
3. **Identify ready tasks**: pending tasks whose blockedBy is fully completed.
|
|
155
|
+
4. **Spawn parallel workers**: for each ready task, spawn a worker via the
|
|
156
|
+
Task tool with a focused prompt containing the task description, context
|
|
157
|
+
from completed dependencies, and instructions to implement and test.
|
|
158
|
+
5. **Monitor completions**: as workers finish, mark tasks completed and spawn
|
|
159
|
+
the newly-unblocked tasks immediately.
|
|
160
|
+
6. **Continue until ALL tasks are complete.** Do NOT stop early.
|
|
161
|
+
7. **Report a summary** when finished, listing each task and its final status.
|
|
162
|
+
|
|
163
|
+
## IMPORTANT
|
|
164
|
+
|
|
165
|
+
Spawn ALL ready tasks in parallel — do not serialize when multiple tasks are
|
|
166
|
+
ready simultaneously.
|
|
167
|
+
|
|
168
|
+
## Error Handling
|
|
169
|
+
|
|
170
|
+
When a worker task FAILS:
|
|
171
|
+
|
|
172
|
+
1. **Diagnose** the error.
|
|
173
|
+
2. **Retry with fix**: spawn a new worker with the error context included.
|
|
174
|
+
3. **Retry limit**: up to 3 retries per task. After that, mark it as "error".
|
|
175
|
+
4. **Continue regardless**: do NOT stop. Execute all other unblocked tasks.
|
|
176
|
+
5. **Unblocked tasks proceed**: only direct dependents of an "error" task
|
|
177
|
+
should be skipped.
|
|
178
|
+
|
|
179
|
+
NEVER mark tasks as "blocked-by-failure" and stop. Complete as much work as
|
|
180
|
+
possible.
|
|
181
|
+
|
|
182
|
+
## Task Status Protocol
|
|
183
|
+
|
|
184
|
+
Update task statuses **immediately** at every transition via TaskUpdate.
|
|
185
|
+
|
|
186
|
+
### Required update sequence per task
|
|
187
|
+
|
|
188
|
+
1. **IMMEDIATELY BEFORE spawning** a worker for a task → mark "in_progress".
|
|
189
|
+
2. **IMMEDIATELY AFTER** the worker returns → mark "completed" or "error".
|
|
190
|
+
|
|
191
|
+
### Timing rules
|
|
192
|
+
|
|
193
|
+
- Update status in the same turn as the event that triggered it. Never batch.
|
|
194
|
+
- When multiple workers complete in parallel, issue a SEPARATE update for
|
|
195
|
+
each.
|
|
196
|
+
- Mark previous tasks "completed" before marking new ones "in_progress".`;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// ============================================================================
|
|
200
|
+
// REVIEWER
|
|
201
|
+
// ============================================================================
|
|
202
|
+
|
|
203
|
+
/** A single finding from the reviewer sub-agent. */
|
|
204
|
+
export interface ReviewFinding {
|
|
205
|
+
title: string;
|
|
206
|
+
body: string;
|
|
207
|
+
confidence_score?: number;
|
|
208
|
+
priority?: number;
|
|
209
|
+
code_location?: {
|
|
210
|
+
absolute_file_path: string;
|
|
211
|
+
line_range: { start: number; end: number };
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/** Parsed reviewer JSON output. */
|
|
216
|
+
export interface ReviewResult {
|
|
217
|
+
findings: ReviewFinding[];
|
|
218
|
+
overall_correctness: string;
|
|
219
|
+
overall_explanation: string;
|
|
220
|
+
overall_confidence_score?: number;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
export interface ReviewContext {
|
|
224
|
+
/** Output of `git status -s` captured immediately before the review. */
|
|
225
|
+
gitStatus: string;
|
|
226
|
+
/** 1-indexed loop iteration, used in the prompt header. */
|
|
227
|
+
iteration?: number;
|
|
228
|
+
/**
|
|
229
|
+
* Whether this is the second consecutive review pass within the same loop
|
|
230
|
+
* iteration (i.e. the previous pass had zero findings and we are
|
|
231
|
+
* confirming before counting two clean reviews in a row).
|
|
232
|
+
*/
|
|
233
|
+
isConfirmationPass?: boolean;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* Build the reviewer prompt. Injects deterministic `git status -s` so the
|
|
238
|
+
* reviewer doesn't have to re-discover what changed.
|
|
239
|
+
*/
|
|
240
|
+
export function buildReviewPrompt(
|
|
241
|
+
spec: string,
|
|
242
|
+
context: ReviewContext,
|
|
243
|
+
): string {
|
|
244
|
+
const gitStatus = context.gitStatus.trim();
|
|
245
|
+
const gitSection =
|
|
246
|
+
gitStatus.length > 0
|
|
247
|
+
? `## Working Tree (\`git status -s\`)
|
|
248
|
+
|
|
249
|
+
These files have uncommitted changes — they are the files actually touched in
|
|
250
|
+
this iteration. Use them to focus your review:
|
|
251
|
+
|
|
252
|
+
\`\`\`
|
|
253
|
+
${gitStatus}
|
|
254
|
+
\`\`\``
|
|
255
|
+
: `## Working Tree (\`git status -s\`)
|
|
256
|
+
|
|
257
|
+
The working tree is clean. Either nothing was implemented this iteration or
|
|
258
|
+
all changes were already committed. Cross-check the task list to verify
|
|
259
|
+
whether the implementation actually ran.`;
|
|
260
|
+
|
|
261
|
+
const header = context.iteration
|
|
262
|
+
? `# Code Review Request (Iteration ${context.iteration}${context.isConfirmationPass ? ", confirmation pass" : ""})`
|
|
263
|
+
: "# Code Review Request";
|
|
264
|
+
|
|
265
|
+
const confirmationNote = context.isConfirmationPass
|
|
266
|
+
? `\n\n**Note**: This is a confirmation pass. The previous review of this same iteration produced zero findings. Re-verify with fresh eyes; do not assume the prior pass was correct.`
|
|
267
|
+
: "";
|
|
268
|
+
|
|
269
|
+
return `${header}${confirmationNote}
|
|
270
|
+
|
|
271
|
+
## Original Specification
|
|
272
|
+
|
|
273
|
+
<user_request>
|
|
274
|
+
${spec}
|
|
275
|
+
</user_request>
|
|
276
|
+
|
|
277
|
+
${gitSection}
|
|
278
|
+
|
|
279
|
+
## Retrieve Task List
|
|
280
|
+
|
|
281
|
+
Call \`TaskList\` to fetch the current task plan and statuses. Use it to:
|
|
282
|
+
1. Identify completed vs incomplete tasks.
|
|
283
|
+
2. Cross-reference the plan against the specification.
|
|
284
|
+
3. Calculate completion metrics.
|
|
285
|
+
|
|
286
|
+
## Review Focus Areas (priority order)
|
|
287
|
+
|
|
288
|
+
1. **Task Completion & Specification Gap Analysis** — HIGHEST priority. Every
|
|
289
|
+
task in PENDING / IN_PROGRESS / ERROR status MUST become a P0 finding.
|
|
290
|
+
Every spec requirement not covered by any task is a P0 finding. Do NOT
|
|
291
|
+
mark the patch correct if any task is incomplete.
|
|
292
|
+
2. **Correctness of Logic** — does the code implement the requirements?
|
|
293
|
+
3. **Error Handling & Edge Cases** — boundary, empty/null, error paths.
|
|
294
|
+
4. **Security** — injection, secret leakage, auth bypasses.
|
|
295
|
+
5. **Performance** — obvious resource leaks, N+1, hot loops.
|
|
296
|
+
6. **Test Coverage** — critical paths and edge cases tested.
|
|
297
|
+
|
|
298
|
+
## Output Format
|
|
299
|
+
|
|
300
|
+
Output ONLY a JSON object inside a single fenced \`\`\`json block. No prose
|
|
301
|
+
before or after. Use this schema exactly:
|
|
302
|
+
|
|
303
|
+
\`\`\`json
|
|
304
|
+
{
|
|
305
|
+
"findings": [
|
|
306
|
+
{
|
|
307
|
+
"title": "[P0] Brief title (P0=critical, P1=important, P2=moderate, P3=minor)",
|
|
308
|
+
"body": "Detailed explanation, why it matters, and a suggested fix",
|
|
309
|
+
"confidence_score": 0.95,
|
|
310
|
+
"priority": 0,
|
|
311
|
+
"code_location": {
|
|
312
|
+
"absolute_file_path": "/full/path/to/file.ts",
|
|
313
|
+
"line_range": { "start": 42, "end": 45 }
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
],
|
|
317
|
+
"overall_correctness": "patch is correct",
|
|
318
|
+
"overall_explanation": "Summary of overall quality and correctness",
|
|
319
|
+
"overall_confidence_score": 0.85
|
|
320
|
+
}
|
|
321
|
+
\`\`\`
|
|
322
|
+
|
|
323
|
+
Set \`overall_correctness\` to \`"patch is incorrect"\` whenever there is at
|
|
324
|
+
least one P0 or P1 finding (including incomplete tasks). Use
|
|
325
|
+
\`"patch is correct"\` only when findings are empty or strictly P3.
|
|
326
|
+
|
|
327
|
+
Begin your review now.`;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// ============================================================================
|
|
331
|
+
// DEBUGGER
|
|
332
|
+
// ============================================================================
|
|
333
|
+
|
|
334
|
+
export interface DebuggerContext {
|
|
335
|
+
/** 1-indexed loop iteration the debugger is investigating. */
|
|
336
|
+
iteration: number;
|
|
337
|
+
/** Output of `git status -s` from immediately before the review. */
|
|
338
|
+
gitStatus: string;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
/**
|
|
342
|
+
* Build a prompt asking the debugger sub-agent to investigate a set of review
|
|
343
|
+
* findings and produce a structured report. The debugger MUST NOT apply
|
|
344
|
+
* fixes — its only deliverable is the report, which the next iteration's
|
|
345
|
+
* planner consumes.
|
|
346
|
+
*/
|
|
347
|
+
export function buildDebuggerReportPrompt(
|
|
348
|
+
review: ReviewResult | null,
|
|
349
|
+
rawReview: string,
|
|
350
|
+
context: DebuggerContext,
|
|
351
|
+
): string {
|
|
352
|
+
let findingsSection: string;
|
|
353
|
+
if (review !== null && review.findings.length > 0) {
|
|
354
|
+
const sorted = [...review.findings].sort(
|
|
355
|
+
(a, b) => (a.priority ?? 3) - (b.priority ?? 3),
|
|
356
|
+
);
|
|
357
|
+
findingsSection = sorted
|
|
358
|
+
.map((f, i) => {
|
|
359
|
+
const pri = f.priority !== undefined ? `P${f.priority}` : "P2";
|
|
360
|
+
const loc = f.code_location
|
|
361
|
+
? `${f.code_location.absolute_file_path}:${f.code_location.line_range.start}-${f.code_location.line_range.end}`
|
|
362
|
+
: "unspecified";
|
|
363
|
+
return `### Finding ${i + 1}: [${pri}] ${f.title}
|
|
364
|
+
- **Location:** ${loc}
|
|
365
|
+
- **Issue:** ${f.body}`;
|
|
366
|
+
})
|
|
367
|
+
.join("\n\n");
|
|
368
|
+
} else {
|
|
369
|
+
const trimmed = rawReview.trim();
|
|
370
|
+
findingsSection =
|
|
371
|
+
trimmed.length > 0
|
|
372
|
+
? `Reviewer output (could not parse as JSON):
|
|
373
|
+
|
|
374
|
+
\`\`\`
|
|
375
|
+
${trimmed}
|
|
376
|
+
\`\`\``
|
|
377
|
+
: `(no reviewer output captured)`;
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
const gitStatus = context.gitStatus.trim();
|
|
381
|
+
const gitSection =
|
|
382
|
+
gitStatus.length > 0
|
|
383
|
+
? `\`\`\`
|
|
384
|
+
${gitStatus}
|
|
385
|
+
\`\`\``
|
|
386
|
+
: `(working tree clean)`;
|
|
387
|
+
|
|
388
|
+
return `# Debugging Report Request (Iteration ${context.iteration})
|
|
389
|
+
|
|
390
|
+
The reviewer flagged the issues below. Investigate them as a debugger and
|
|
391
|
+
produce a structured report that the planner will consume on the next loop
|
|
392
|
+
iteration.
|
|
393
|
+
|
|
394
|
+
**You are NOT applying fixes.** Your only deliverable is the report. Do not
|
|
395
|
+
edit files. Investigation tool calls (Read, grep, LSP, running tests in
|
|
396
|
+
read-only mode) are fine; mutations are not.
|
|
397
|
+
|
|
398
|
+
## Reviewer Findings
|
|
399
|
+
|
|
400
|
+
${findingsSection}
|
|
401
|
+
|
|
402
|
+
## Working Tree (\`git status -s\`)
|
|
403
|
+
|
|
404
|
+
${gitSection}
|
|
405
|
+
|
|
406
|
+
## Investigation Steps
|
|
407
|
+
|
|
408
|
+
For each finding:
|
|
409
|
+
1. Locate the relevant code (LSP / grep / Read).
|
|
410
|
+
2. Identify the **root cause**, not just the symptom.
|
|
411
|
+
3. List the absolute file paths that must change.
|
|
412
|
+
4. Note constraints, pitfalls, or invariants the next planner must respect.
|
|
413
|
+
|
|
414
|
+
## Output Format
|
|
415
|
+
|
|
416
|
+
Respond with EXACTLY one fenced \`\`\`markdown block containing the report.
|
|
417
|
+
No prose before or after the block. Use this exact section structure:
|
|
418
|
+
|
|
419
|
+
\`\`\`markdown
|
|
420
|
+
# Debugger Report
|
|
421
|
+
|
|
422
|
+
## Issues Identified
|
|
423
|
+
- [P<priority>] <one-line issue summary>
|
|
424
|
+
- **Root cause:** <one or two sentences>
|
|
425
|
+
- **Files:** <abs/path/file.ext, abs/path/other.ext>
|
|
426
|
+
- **Fix approach:** <imperative description>
|
|
427
|
+
|
|
428
|
+
## Suggested Plan Adjustments
|
|
429
|
+
1. <imperative task description, suitable as a planner task>
|
|
430
|
+
2. <...>
|
|
431
|
+
|
|
432
|
+
## Pitfalls
|
|
433
|
+
- <invariant or gotcha the planner/workers must respect>
|
|
434
|
+
- <...>
|
|
435
|
+
\`\`\`
|
|
436
|
+
|
|
437
|
+
Keep the report tight — every line must be load-bearing for re-planning. Omit
|
|
438
|
+
the "Pitfalls" section entirely if there are none. Begin now.`;
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
// ============================================================================
|
|
442
|
+
// PARSING HELPERS
|
|
443
|
+
// ============================================================================
|
|
444
|
+
|
|
445
|
+
/**
|
|
446
|
+
* Parse the reviewer's JSON output. Tries, in order:
|
|
447
|
+
* 1. Direct JSON.parse on the entire content.
|
|
448
|
+
* 2. The LAST fenced ```json (or unlabelled) code block.
|
|
449
|
+
* 3. The LAST balanced object containing a "findings" key in surrounding prose.
|
|
450
|
+
*
|
|
451
|
+
* Filters out P3 (minor/style) findings — only P0/P1/P2 count as actionable.
|
|
452
|
+
* Returns null when no parse strategy succeeds.
|
|
453
|
+
*/
|
|
454
|
+
export function parseReviewResult(content: string): ReviewResult | null {
|
|
455
|
+
// Strategy 1: direct JSON
|
|
456
|
+
try {
|
|
457
|
+
const parsed = JSON.parse(content);
|
|
458
|
+
if (parsed && parsed.findings && parsed.overall_correctness) {
|
|
459
|
+
return filterActionable(parsed);
|
|
460
|
+
}
|
|
461
|
+
} catch {
|
|
462
|
+
/* fall through */
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
// Strategy 2: last fenced code block
|
|
466
|
+
const blockRe = /```(?:json)?\s*\n([\s\S]*?)\n```/g;
|
|
467
|
+
let lastBlock: string | null = null;
|
|
468
|
+
let blockMatch: RegExpExecArray | null;
|
|
469
|
+
while ((blockMatch = blockRe.exec(content)) !== null) {
|
|
470
|
+
if (blockMatch[1]) lastBlock = blockMatch[1];
|
|
471
|
+
}
|
|
472
|
+
if (lastBlock !== null) {
|
|
473
|
+
try {
|
|
474
|
+
const parsed = JSON.parse(lastBlock);
|
|
475
|
+
if (parsed && parsed.findings && parsed.overall_correctness) {
|
|
476
|
+
return filterActionable(parsed);
|
|
477
|
+
}
|
|
478
|
+
} catch {
|
|
479
|
+
/* fall through */
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
// Strategy 3: last "{...findings...}" object in surrounding prose
|
|
484
|
+
const objRe = /\{[\s\S]*?"findings"[\s\S]*?\}/g;
|
|
485
|
+
let lastObj: string | null = null;
|
|
486
|
+
let objMatch: RegExpExecArray | null;
|
|
487
|
+
while ((objMatch = objRe.exec(content)) !== null) {
|
|
488
|
+
lastObj = objMatch[0];
|
|
489
|
+
}
|
|
490
|
+
if (lastObj !== null) {
|
|
491
|
+
try {
|
|
492
|
+
const parsed = JSON.parse(lastObj);
|
|
493
|
+
if (parsed && parsed.findings && parsed.overall_correctness) {
|
|
494
|
+
return filterActionable(parsed);
|
|
495
|
+
}
|
|
496
|
+
} catch {
|
|
497
|
+
/* nothing more to try */
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
return null;
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
function filterActionable(parsed: {
|
|
505
|
+
findings: ReviewFinding[];
|
|
506
|
+
overall_correctness: string;
|
|
507
|
+
overall_explanation?: string;
|
|
508
|
+
overall_confidence_score?: number;
|
|
509
|
+
}): ReviewResult {
|
|
510
|
+
const actionable = parsed.findings.filter(
|
|
511
|
+
(f) => f.priority === undefined || f.priority <= 2,
|
|
512
|
+
);
|
|
513
|
+
return {
|
|
514
|
+
findings: actionable,
|
|
515
|
+
overall_correctness: parsed.overall_correctness,
|
|
516
|
+
overall_explanation: parsed.overall_explanation ?? "",
|
|
517
|
+
overall_confidence_score: parsed.overall_confidence_score,
|
|
518
|
+
};
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
/**
|
|
522
|
+
* Extract the LAST fenced ```markdown block from a piece of text. Used for
|
|
523
|
+
* parsing the debugger's structured report out of a long Claude pane
|
|
524
|
+
* scrollback or any other output that may include extra prose.
|
|
525
|
+
*
|
|
526
|
+
* Falls back to the trimmed full input when no fenced block is present, so
|
|
527
|
+
* the planner still receives the debugger's content even if formatting drifts.
|
|
528
|
+
*/
|
|
529
|
+
export function extractMarkdownBlock(content: string): string {
|
|
530
|
+
const blockRe = /```markdown\s*\n([\s\S]*?)\n```/g;
|
|
531
|
+
let last: string | null = null;
|
|
532
|
+
let match: RegExpExecArray | null;
|
|
533
|
+
while ((match = blockRe.exec(content)) !== null) {
|
|
534
|
+
if (match[1]) last = match[1];
|
|
535
|
+
}
|
|
536
|
+
if (last !== null) return last.trim();
|
|
537
|
+
return content.trim();
|
|
538
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Review analysis helpers for the Ralph workflow.
|
|
3
|
+
*
|
|
4
|
+
* Simplified versions of the internal conductor-based helpers,
|
|
5
|
+
* operating on direct values instead of StageOutput maps.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { ReviewResult } from "./prompts.ts";
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Check whether the reviewer produced actionable findings.
|
|
12
|
+
*
|
|
13
|
+
* Returns true when:
|
|
14
|
+
* 1. The parsed ReviewResult has one or more findings, OR
|
|
15
|
+
* 2. The review could not be parsed (null) but the raw response
|
|
16
|
+
* text is non-empty (treat unparseable output as actionable).
|
|
17
|
+
*
|
|
18
|
+
* @param review - Parsed ReviewResult, or null if parsing failed.
|
|
19
|
+
* @param rawText - The raw reviewer response text.
|
|
20
|
+
*/
|
|
21
|
+
export function hasActionableFindings(
|
|
22
|
+
review: ReviewResult | null,
|
|
23
|
+
rawText: string,
|
|
24
|
+
): boolean {
|
|
25
|
+
if (review !== null && review.findings.length > 0) {
|
|
26
|
+
return true;
|
|
27
|
+
}
|
|
28
|
+
if (review === null && rawText.trim().length > 0) {
|
|
29
|
+
return true;
|
|
30
|
+
}
|
|
31
|
+
return false;
|
|
32
|
+
}
|