@bastani/atomic 0.5.13 → 0.5.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. package/.claude/agents/planner.md +256 -67
  2. package/.github/agents/planner.md +262 -88
  3. package/.opencode/agents/planner.md +270 -107
  4. package/dist/sdk/components/workflow-picker-panel.d.ts.map +1 -1
  5. package/dist/sdk/runtime/discovery.d.ts +6 -3
  6. package/dist/sdk/runtime/discovery.d.ts.map +1 -1
  7. package/dist/sdk/runtime/executor.d.ts.map +1 -1
  8. package/dist/sdk/workflows/builtin/deep-research-codebase/claude/index.d.ts.map +1 -1
  9. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/heuristic.d.ts +5 -17
  10. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/heuristic.d.ts.map +1 -1
  11. package/dist/sdk/workflows/builtin/ralph/claude/index.d.ts.map +1 -1
  12. package/dist/sdk/workflows/builtin/ralph/copilot/index.d.ts.map +1 -1
  13. package/dist/sdk/workflows/builtin/ralph/helpers/prompts.d.ts +18 -15
  14. package/dist/sdk/workflows/builtin/ralph/helpers/prompts.d.ts.map +1 -1
  15. package/dist/services/config/definitions.d.ts.map +1 -1
  16. package/package.json +4 -2
  17. package/src/sdk/components/workflow-picker-panel.tsx +34 -43
  18. package/src/sdk/runtime/discovery.ts +13 -41
  19. package/src/sdk/runtime/executor.ts +1 -0
  20. package/src/sdk/workflows/builtin/deep-research-codebase/claude/index.ts +42 -24
  21. package/src/sdk/workflows/builtin/deep-research-codebase/helpers/heuristic.ts +9 -23
  22. package/src/sdk/workflows/builtin/ralph/claude/index.ts +63 -37
  23. package/src/sdk/workflows/builtin/ralph/copilot/index.ts +12 -4
  24. package/src/sdk/workflows/builtin/ralph/helpers/prompts.ts +267 -155
  25. package/src/services/config/definitions.ts +3 -1
@@ -20,25 +20,62 @@ import { z } from "zod";
20
20
 
21
21
  /** Zod schema for a single review finding. */
22
22
  export const ReviewFindingSchema = z.object({
23
- title: z.string().describe("Brief title prefixed with priority, e.g. '[P0] Missing null check'"),
24
- body: z.string().describe("Detailed explanation of the issue, its impact, and a suggested fix"),
25
- confidence_score: z.number().min(0).max(1).optional().describe("Confidence in the finding (0.0–1.0)"),
26
- priority: z.number().int().min(0).max(3).optional().describe("Severity: 0=P0 critical, 1=P1 important, 2=P2 moderate, 3=P3 minor"),
27
- code_location: z.object({
28
- absolute_file_path: z.string().describe("Absolute path to the file containing the issue"),
29
- line_range: z.object({
30
- start: z.number().int().describe("Start line number"),
31
- end: z.number().int().describe("End line number"),
32
- }),
33
- }).optional().describe("Location of the issue in the codebase"),
23
+ title: z
24
+ .string()
25
+ .describe(
26
+ "Brief title prefixed with priority, e.g. '[P0] Missing null check'",
27
+ ),
28
+ body: z
29
+ .string()
30
+ .describe(
31
+ "Detailed explanation of the issue, its impact, and a suggested fix",
32
+ ),
33
+ confidence_score: z
34
+ .number()
35
+ .min(0)
36
+ .max(1)
37
+ .optional()
38
+ .describe("Confidence in the finding (0.0–1.0)"),
39
+ priority: z
40
+ .number()
41
+ .int()
42
+ .min(0)
43
+ .max(3)
44
+ .optional()
45
+ .describe(
46
+ "Severity: 0=P0 critical, 1=P1 important, 2=P2 moderate, 3=P3 minor",
47
+ ),
48
+ code_location: z
49
+ .object({
50
+ absolute_file_path: z
51
+ .string()
52
+ .describe("Absolute path to the file containing the issue"),
53
+ line_range: z.object({
54
+ start: z.number().int().describe("Start line number"),
55
+ end: z.number().int().describe("End line number"),
56
+ }),
57
+ })
58
+ .optional()
59
+ .describe("Location of the issue in the codebase"),
34
60
  });
35
61
 
36
62
  /** Zod schema for the full structured review output. */
37
63
  export const ReviewResultSchema = z.object({
38
- findings: z.array(ReviewFindingSchema).describe("List of review findings, ordered by priority"),
39
- overall_correctness: z.string().describe("'patch is correct' or 'patch is incorrect'"),
40
- overall_explanation: z.string().describe("Summary of overall quality and correctness"),
41
- overall_confidence_score: z.number().min(0).max(1).optional().describe("Overall confidence in the review (0.0–1.0)"),
64
+ findings: z
65
+ .array(ReviewFindingSchema)
66
+ .describe("List of review findings, ordered by priority"),
67
+ overall_correctness: z
68
+ .string()
69
+ .describe("'patch is correct' or 'patch is incorrect'"),
70
+ overall_explanation: z
71
+ .string()
72
+ .describe("Summary of overall quality and correctness"),
73
+ overall_confidence_score: z
74
+ .number()
75
+ .min(0)
76
+ .max(1)
77
+ .optional()
78
+ .describe("Overall confidence in the review (0.0–1.0)"),
42
79
  });
43
80
 
44
81
  /** JSON Schema derived from the Zod schema — used by Claude and OpenCode SDKs. */
@@ -67,8 +104,10 @@ export function mergeReviewResults(
67
104
  const rawCombined = [a.raw, b.raw].filter(Boolean).join("\n\n---\n\n");
68
105
 
69
106
  // Resolve: prefer structured output, fall back to text parsing
70
- const parsedA = a.structured ?? (a.raw.trim() ? parseReviewResult(a.raw) : null);
71
- const parsedB = b.structured ?? (b.raw.trim() ? parseReviewResult(b.raw) : null);
107
+ const parsedA =
108
+ a.structured ?? (a.raw.trim() ? parseReviewResult(a.raw) : null);
109
+ const parsedB =
110
+ b.structured ?? (b.raw.trim() ? parseReviewResult(b.raw) : null);
72
111
 
73
112
  if (!parsedA && !parsedB) {
74
113
  return { structured: null, raw: rawCombined };
@@ -96,7 +135,9 @@ export function mergeReviewResults(
96
135
  return {
97
136
  structured: {
98
137
  findings: [...findingsA, ...findingsB],
99
- overall_correctness: isIncorrect ? "patch is incorrect" : "patch is correct",
138
+ overall_correctness: isIncorrect
139
+ ? "patch is incorrect"
140
+ : "patch is correct",
100
141
  overall_explanation: explanations.join(" | "),
101
142
  overall_confidence_score:
102
143
  confidences.length > 0 ? Math.max(...confidences) : undefined,
@@ -117,9 +158,13 @@ export interface PlannerContext {
117
158
  }
118
159
 
119
160
  /**
120
- * Build the planner prompt. The first iteration decomposes the original spec;
121
- * subsequent iterations decompose the work needed to resolve the debugger
161
+ * Build the planner prompt. The first iteration authors an RFC from the
162
+ * original spec; subsequent iterations revise the RFC using the debugger
122
163
  * report from the previous loop iteration.
164
+ *
165
+ * The planner's deliverable is a filled-in Technical Design Document / RFC
166
+ * rendered as markdown text
167
+ * consumes the RFC as design context
123
168
  */
124
169
  export function buildPlannerPrompt(
125
170
  spec: string,
@@ -128,18 +173,23 @@ export function buildPlannerPrompt(
128
173
  const debuggerReport = context.debuggerReport?.trim() ?? "";
129
174
  const isReplan = context.iteration > 1 && debuggerReport.length > 0;
130
175
 
131
- if (isReplan) {
132
- return `# Re-Planning (Iteration ${context.iteration})
176
+ const header = isReplan
177
+ ? `# Technical Design Revision (Iteration ${context.iteration})
133
178
 
134
- The previous Ralph iteration produced an implementation that the reviewer
135
- flagged as incomplete or incorrect. The debugger investigated and produced
136
- the report below. Use it to re-plan.
179
+ The previous iteration's implementation was flagged by the reviewer, and the
180
+ debugger investigated. Revise the RFC so it reflects the corrected approach.`
181
+ : `# Technical Design (Iteration 1)
137
182
 
138
- ## Original Specification
183
+ Author a Technical Design Document / RFC for the specification below.`;
184
+
185
+ const specBlock = `## Original Specification
139
186
 
140
187
  <specification>
141
188
  ${spec}
142
- </specification>
189
+ </specification>`;
190
+
191
+ const debuggerBlock = isReplan
192
+ ? `
143
193
 
144
194
  ## Debugger Report (authoritative)
145
195
 
@@ -147,82 +197,110 @@ ${spec}
147
197
  ${debuggerReport}
148
198
  </debugger_report>
149
199
 
150
- ## Your Task
151
-
152
- Decompose the work needed to resolve every issue in the debugger report into
153
- an ordered task list, then persist them via TaskCreate.
154
-
155
- <instructions>
156
- 1. Treat the debugger report as authoritative. Every "Issue Identified" must
157
- map to at least one task. Every "Suggested Plan Adjustment" must appear as
158
- (or be subsumed by) a task.
159
- 2. Drop any work from the original specification that is already complete and
160
- unaffected by the report.
161
- 3. Order tasks by priority: P0 fixes first, then dependent work, then
162
- validation/tests.
163
- 4. Optimize for parallel execution — minimize blockedBy dependencies.
164
- 5. After creating all tasks via TaskCreate, call TaskList to verify.
165
- </instructions>
166
-
167
- <constraints>
168
- - All tasks start as "pending".
169
- - blockedBy must reference IDs that exist in the task list.
170
- - Do not split fixes that touch the same file across multiple tasks unless they are truly independent.
171
- </constraints>`;
172
- }
200
+ ### Revision Focus
173
201
 
174
- // Initial iteration
175
- return `# Planning (Iteration 1)
202
+ Fold every issue in the debugger report into the revised RFC:
176
203
 
177
- You are a task decomposition engine.
204
+ - **Section 5 (Detailed Design)** — specify the corrected approach. Every
205
+ "Issue Identified" in the report should map to a concrete design change.
206
+ - **Section 6 (Alternatives Considered)** — if the root cause points to a
207
+ better option than the one previously chosen, promote it and demote the
208
+ current choice to "rejected" with the new rejection reason.
209
+ - **Section 8 (Migration, Rollout, and Testing)** — add validation steps
210
+ that would have caught the regression.
211
+ - **Section 9 (Open Questions / Unresolved Issues)** — surface any
212
+ uncertainty the debugger flagged as unresolved.`
213
+ : "";
178
214
 
179
- <specification>
180
- ${spec}
181
- </specification>
215
+ return `${header}
182
216
 
183
- <instructions>
184
- Decompose the specification above into an ordered list of implementation tasks
185
- and persist them via TaskCreate.
186
-
187
- 1. Read the specification and identify every distinct deliverable.
188
- 2. Order tasks by priority: foundational/infrastructure first, then features,
189
- then tests, then polish.
190
- 3. Analyze technical dependencies between tasks.
191
- 4. After creating all tasks via TaskCreate, call TaskList to verify.
192
- </instructions>
193
-
194
- <constraints>
195
- - All tasks start as "pending".
196
- - blockedBy must only reference IDs that exist in the task list.
197
- - Optimize for parallel execution minimize unnecessary dependencies.
198
- </constraints>`;
217
+ ${specBlock}${debuggerBlock}
218
+
219
+ ${
220
+ isReplan
221
+ ? `## Step 1: Author a Revised RFC
222
+
223
+ This is a re-plan iteration — the debugger report above MUST be folded into
224
+ the design. Always author a revised RFC here, even if the original
225
+ specification was a file path. If the spec is a path, Read the file first to
226
+ get the original design, then produce a revised RFC that incorporates the
227
+ debugger findings. Do NOT short-circuit to just the path on re-plan.`
228
+ : `## Step 1: Spec Path Short-Circuit (do this FIRST)
229
+
230
+ The specification above may be either a **file path** to an existing spec
231
+ document, or **raw prose** describing a feature.
232
+
233
+ Before doing anything else, determine which case you're in:
234
+
235
+ - If the specification looks like a path (ends in \`.md\`, \`.txt\`, \`.rst\`,
236
+ or similar; starts with \`/\`, \`./\`, or \`~/\`; or contains \`/\` and no
237
+ line breaks), attempt to Read it.
238
+ - If the Read succeeds, the user has already authored a spec file — there is
239
+ **nothing to draft**. Resolve the path to an absolute path (via Bash
240
+ \`realpath <path>\` or equivalent) and output ONLY that absolute path as
241
+ your final message. Emit nothing else: no RFC, no summary, no commentary.
242
+ The orchestrator will read the file itself.
243
+ - If Read fails, or the specification is clearly inline prose (multiple
244
+ sentences, paragraph structure, no file extension), proceed to Step 2 and
245
+ author the full RFC below.
246
+
247
+ Do NOT author an RFC when the user has already provided a spec file — just
248
+ forward the path. Duplicating the spec wastes tokens and introduces drift.`
199
249
  }
200
250
 
251
+ ## Step 2: Author the RFC${isReplan ? " (revision)" : " (only if Step 1 did not short-circuit)"}
252
+
253
+ 1. **Investigate first.** Use Grep/Glob/Read to ground the RFC in the actual
254
+ codebase — the services, modules, data models, and external integrations
255
+ this feature will touch. Use Bash for metadata:
256
+ - \`git config user.name\` → Author(s)
257
+ - \`date '+%Y-%m-%d'\` → Created / Last Updated
258
+ 2. **Render the RFC template below as your final message.** Preserve every
259
+ section header verbatim and the metadata table exactly. Replace each
260
+ \`_Instruction:_\` italicized block and each \`> **Example:**\` blockquote
261
+ with real, feature-specific content — the templates are authoring guides,
262
+ not final copy.
263
+ 3. **Diagrams are load-bearing.** Section 4.1 MUST include a Mermaid System
264
+ Architecture diagram grounded in the real components this feature touches.
265
+ 4. **Non-goals matter.** Section 3.2 prevents scope creep. Always fill it in
266
+ with explicit exclusions — do not leave it generic.
267
+ 5. **Alternatives must be real.** Section 6 must list at least two concrete
268
+ alternatives (not strawmen) with honest pros, cons, and rejection reasons.
269
+ 6. **Surface uncertainty.** Put unresolved decisions in Section 9 with an
270
+ owner placeholder (e.g., \`[OWNER: infra team]\`) — do not paper over gaps
271
+ with vague language.
272
+
273
+ ## Constraints
274
+
275
+ - Output nothing else after the RFC (or path) — no meta-commentary, no
276
+ summary. The document (or path) stands on its own.
277
+ - Match depth to stakes: a greenfield service warrants deep sections 5-7; a
278
+ small refactor can abbreviate them, but every section header must be present.`;
279
+ }
201
280
  // ============================================================================
202
281
  // ORCHESTRATOR
203
282
  // ============================================================================
204
283
 
205
284
  export interface OrchestratorContext {
206
285
  /**
207
- * Trailing commentary from the planner's last assistant message, if any.
208
- * The Copilot and OpenCode workflows create a fresh session for each
209
- * sub-agent, so the planner's in-session output is NOT automatically
210
- * visible to the orchestrator only what the planner persisted via
211
- * `TaskCreate`. Forward the planner's final text here so the orchestrator
212
- * sees any caveats, risks, or execution hints that didn't fit into task
213
- * bodies.
286
+ * The planner's final assistant message. Under the RFC-based Ralph flow,
287
+ * this is the authoritative design input either an absolute path to a
288
+ * pre-existing spec file or an inline RFC markdown document. The
289
+ * orchestrator decomposes it into the task list using its SDK-specific
290
+ * task-persistence tool (`TaskCreate` / `sql` / `todowrite`).
214
291
  */
215
292
  plannerNotes?: string;
216
293
  }
217
294
 
218
295
  /**
219
- * Build the orchestrator prompt. The orchestrator retrieves the planner's
220
- * task list, validates the dependency graph, and spawns parallel workers.
296
+ * Build the orchestrator prompt. The orchestrator decomposes the planner's
297
+ * design output (a spec path or inline RFC) into a task list using its
298
+ * SDK-specific task-persistence tool, validates the dependency graph, and
299
+ * spawns parallel workers.
221
300
  *
222
- * @param spec - The original user specification. Required because the
223
- * orchestrator runs in a fresh session on Copilot/OpenCode and needs the
224
- * end-user goal to resolve ambiguous tasks.
225
- * @param context - Optional planner handoff context (trailing commentary).
301
+ * @param spec - The user's original specification. Used as context/fallback
302
+ * when the planner output is missing or ambiguous.
303
+ * @param context - Planner handoff (the spec path or RFC markdown).
226
304
  */
227
305
  export function buildOrchestratorPrompt(
228
306
  spec: string,
@@ -231,73 +309,105 @@ export function buildOrchestratorPrompt(
231
309
  const plannerNotes = context.plannerNotes?.trim() ?? "";
232
310
  const plannerSection =
233
311
  plannerNotes.length > 0
234
- ? `## Planner Notes (trailing commentary)
312
+ ? `<planner_output>
313
+ ${plannerNotes}
314
+ </planner_output>`
315
+ : `<planner_output>
316
+ (empty — fall back to the Original User Specification below)
317
+ </planner_output>`;
235
318
 
236
- The planner produced the notes below alongside the task list. They capture
237
- caveats, risks, or execution hints that did not fit into individual task
238
- bodies. Treat them as guidance, not as task definitions.
319
+ return `You are the workflow orchestrator. You run a three-phase loop:
239
320
 
240
- <planner_notes>
241
- ${plannerNotes}
242
- </planner_notes>
321
+ 1. **Decompose** the design document into a task list.
322
+ 2. **Execute** the tasks by spawning parallel worker sub-agents.
323
+ 3. **Report** completion status.
243
324
 
244
- `
245
- : "";
325
+ ## Design Input (authoritative)
326
+
327
+ The planner produced the output below. It is in **one of two formats**:
328
+ - **A file path** (single line, ends in \`.md\`/\`.txt\`/similar, or starts
329
+ with \`/\` / \`./\` / \`~/\`). Read the file to get the spec — its contents
330
+ are what you decompose.
331
+ - **An inline RFC markdown document** (multi-section, starts with a metadata
332
+ table or \`# ... Technical Design Document\` header). Decompose it directly.
246
333
 
247
- return `You are an orchestrator managing a set of implementation tasks.
334
+ ${plannerSection}
248
335
 
249
- ## Original User Specification
336
+ ## Original User Specification (context / fallback)
250
337
 
251
338
  <specification>
252
339
  ${spec}
253
340
  </specification>
254
341
 
255
- ${plannerSection}## Retrieve Task List
342
+ ## Phase 1: Decompose the Spec into a Task List
256
343
 
257
- Start by retrieving the current task list using your TaskList tool. The
258
- planner has already created all tasks; you MUST retrieve them before any
259
- execution.
344
+ Read the spec (from the path or the inline RFC) and decompose it into an
345
+ ordered, parallelism-friendly list of implementation tasks. For each task,
346
+ derive:
260
347
 
261
- ## Dependency Graph Integrity Check
348
+ - A short **gerund subject** (e.g., "Implementing auth middleware").
349
+ - An **actionable description** (5-10 words, imperative, specific).
350
+ - A **blockedBy / dependency list** (IDs of tasks that must complete first).
262
351
 
263
- BEFORE executing any tasks, validate the dependency graph:
352
+ **Decomposition guidelines:**
264
353
 
265
- 1. For each task, check that every ID in its "blockedBy" array corresponds to
266
- an actual task ID in the list.
267
- 2. If a blockedBy reference points to a task ID that does NOT exist, that
268
- reference is a **dangling dependency** caused by data corruption during
269
- planning.
270
- 3. **Remove dangling dependencies**: Drop any blockedBy entry that references
271
- a non-existent task ID. The task is still valid only the corrupted
272
- reference should be removed.
273
- 4. After cleanup, re-evaluate which tasks are ready.
354
+ 1. **Maximize parallelism.** Tasks with empty dependencies form the first
355
+ wave and run concurrently. Split independent work streams into separate
356
+ tasks rather than chaining them.
357
+ 2. **Compartmentalize.** Each task should be self-contained minimize
358
+ shared state and file conflicts. Prefer tasks that touch distinct
359
+ modules/files.
360
+ 3. **Dependencies only when truly necessary.** Every unnecessary dependency
361
+ reduces throughput. Ask: "Can this genuinely not start without the
362
+ blocked task?"
363
+ 4. **Start with foundations.** Setup, schema, and shared utilities come
364
+ before feature code. Tests come after the code they cover.
365
+ 5. **Match sections to task categories.** RFC Section 5 (Detailed Design)
366
+ typically yields 60-80% of tasks. Sections 8.3 (Test Plan) and 7
367
+ (Cross-Cutting) yield validation and infra tasks.
274
368
 
275
- This step is critical. Dangling dependencies will permanently block tasks.
369
+ ### Persist the Task List
276
370
 
277
- ## Dependency Rules
371
+ Persist every task using task management tools and encode dependencies. Use your task tools to better manage the status of tasks and mark tasks as complete when their work is done.
278
372
 
279
- A task is READY only when:
280
- 1. Its status is "pending"
281
- 2. ALL tasks in its "blockedBy" array are "completed"
373
+ ## Phase 2: Dependency Graph Integrity Check
282
374
 
283
- Do NOT spawn a worker for a task whose dependencies are not yet completed.
375
+ BEFORE executing any tasks, validate the graph you just persisted:
376
+
377
+ 1. For each task, check that every dependency reference points to a task ID
378
+ that actually exists.
379
+ 2. Any reference to a non-existent task ID is a **dangling dependency** —
380
+ drop it. The task itself is still valid; only the corrupted reference
381
+ is removed.
382
+ 3. Re-evaluate readiness after cleanup.
383
+
384
+ This step is critical. Dangling dependencies will permanently block tasks.
385
+
386
+ ## Phase 3: Execute
284
387
 
285
- ## Instructions
388
+ ### Readiness Rules
286
389
 
287
- 1. **Retrieve the task list** via TaskList. This is your source of truth.
288
- 2. **Validate the dependency graph** as above. Remove dangling dependencies.
289
- 3. **Identify ready tasks**: pending tasks whose blockedBy is fully completed.
290
- 4. **Spawn parallel workers**: for each ready task, spawn a worker via the
291
- Task tool with a focused prompt containing the task description, context
292
- from completed dependencies, and instructions to implement and test.
293
- 5. **Monitor completions**: as workers finish, mark tasks completed and spawn
294
- the newly-unblocked tasks immediately.
295
- 6. **Continue until ALL tasks are complete.** Do NOT stop early.
296
- 7. **Report a summary** when finished, listing each task and its final status.
390
+ A task is READY only when:
391
+ 1. Its status is \`pending\`.
392
+ 2. ALL tasks it depends on are \`completed\`.
297
393
 
298
- ## IMPORTANT
394
+ Do NOT spawn a worker for a task whose dependencies are not yet completed.
299
395
 
300
- Spawn ALL ready tasks in parallel — do not serialize when multiple tasks are
396
+ ### Execution Loop
397
+
398
+ 1. **Identify all ready tasks** — pending tasks whose dependencies are
399
+ completed.
400
+ 2. **Spawn parallel workers** — for each ready task, dispatch a worker
401
+ sub-agent (via \`Agent\`/\`Task\`/\`agent\` tool) with a focused prompt
402
+ containing: the task subject + description, relevant context from the
403
+ spec/RFC, and instructions to implement and test.
404
+ 3. **Monitor completions** — as workers finish, mark tasks \`completed\` and
405
+ spawn newly-unblocked tasks IMMEDIATELY.
406
+ 4. **Continue until ALL tasks are \`completed\` or \`error\`.** Do NOT stop
407
+ early.
408
+ 5. **Report a summary** when finished: each task and its final status.
409
+
410
+ Spawn ALL ready tasks in parallel — do not serialize when multiple are
301
411
  ready simultaneously.
302
412
 
303
413
  ## Error Handling
@@ -306,29 +416,31 @@ When a worker task FAILS:
306
416
 
307
417
  1. **Diagnose** the error.
308
418
  2. **Retry with fix**: spawn a new worker with the error context included.
309
- 3. **Retry limit**: up to 3 retries per task. After that, mark it as "error".
419
+ 3. **Retry limit**: up to 3 retries per task. After that, mark it \`error\`.
310
420
  4. **Continue regardless**: do NOT stop. Execute all other unblocked tasks.
311
- 5. **Unblocked tasks proceed**: only direct dependents of an "error" task
421
+ 5. **Unblocked tasks proceed**: only direct dependents of an \`error\` task
312
422
  should be skipped.
313
423
 
314
- NEVER mark tasks as "blocked-by-failure" and stop. Complete as much work as
424
+ NEVER mark tasks "blocked-by-failure" and stop. Complete as much work as
315
425
  possible.
316
426
 
317
427
  ## Task Status Protocol
318
428
 
319
- Update task statuses **immediately** at every transition via TaskUpdate.
429
+ Update statuses **immediately** at every transition via task tool.
320
430
 
321
431
  ### Required update sequence per task
322
432
 
323
- 1. **IMMEDIATELY BEFORE spawning** a worker for a task → mark "in_progress".
324
- 2. **IMMEDIATELY AFTER** the worker returns → mark "completed" or "error".
433
+ 1. **IMMEDIATELY BEFORE spawning** a worker → mark \`in_progress\`.
434
+ 2. **IMMEDIATELY AFTER** the worker returns → mark \`completed\` or
435
+ \`error\`.
325
436
 
326
437
  ### Timing rules
327
438
 
328
- - Update status in the same turn as the event that triggered it. Never batch.
329
- - When multiple workers complete in parallel, issue a SEPARATE update for
330
- each.
331
- - Mark previous tasks "completed" before marking new ones "in_progress".`;
439
+ - Update status in the same turn as the triggering event. Never batch.
440
+ - When multiple workers complete in parallel, issue a SEPARATE update per
441
+ task.
442
+ - Mark previous tasks \`completed\` before marking new ones
443
+ \`in_progress\`.`;
332
444
  }
333
445
 
334
446
  // ============================================================================
@@ -526,8 +638,7 @@ export function buildReviewPrompt(
526
638
  ): string {
527
639
  const { changeset } = context;
528
640
  const hasChanges =
529
- changeset.diffStat.length > 0 ||
530
- changeset.uncommitted.length > 0;
641
+ changeset.diffStat.length > 0 || changeset.uncommitted.length > 0;
531
642
  const hasErrors = changeset.errors.length > 0;
532
643
 
533
644
  // ── Changeset section ──────────────────────────────────────────────────
@@ -537,9 +648,7 @@ export function buildReviewPrompt(
537
648
  if (hasChanges || hasErrors) {
538
649
  const parts: string[] = [];
539
650
 
540
- parts.push(
541
- `## Branch Changeset (relative to \`${changeset.baseBranch}\`)`,
542
- );
651
+ parts.push(`## Branch Changeset (relative to \`${changeset.baseBranch}\`)`);
543
652
 
544
653
  // Surface git errors first — the agent needs to know the data is partial
545
654
  if (hasErrors) {
@@ -576,14 +685,7 @@ export function buildReviewPrompt(
576
685
  }
577
686
 
578
687
  if (changeset.diffStat.length > 0) {
579
- parts.push(
580
- "",
581
- "### Diff Summary",
582
- "",
583
- "```",
584
- changeset.diffStat,
585
- "```",
586
- );
688
+ parts.push("", "### Diff Summary", "", "```", changeset.diffStat, "```");
587
689
  }
588
690
 
589
691
  if (changeset.uncommitted.length > 0) {
@@ -802,10 +904,20 @@ ${trimmed}
802
904
  );
803
905
  }
804
906
  if (changeset.nameStatus.length > 0) {
805
- parts.push(`Changed files (relative to \`${changeset.baseBranch}\`):`, "```", changeset.nameStatus, "```");
907
+ parts.push(
908
+ `Changed files (relative to \`${changeset.baseBranch}\`):`,
909
+ "```",
910
+ changeset.nameStatus,
911
+ "```",
912
+ );
806
913
  }
807
914
  if (changeset.uncommitted.length > 0) {
808
- parts.push(`Uncommitted (\`git status -s\`):`, "```", changeset.uncommitted, "```");
915
+ parts.push(
916
+ `Uncommitted (\`git status -s\`):`,
917
+ "```",
918
+ changeset.uncommitted,
919
+ "```",
920
+ );
809
921
  }
810
922
  changesetSection = parts.join("\n");
811
923
  } else {
@@ -36,7 +36,9 @@ export const AGENT_CONFIG: Record<AgentKey, AgentConfig> = {
36
36
  "--allow-dangerously-skip-permissions",
37
37
  "--dangerously-skip-permissions",
38
38
  ],
39
- env_vars: {},
39
+ env_vars: {
40
+ CLAUDE_CODE_NO_FLICKER: "1",
41
+ },
40
42
  folder: ".claude",
41
43
  install_url: "https://code.claude.com/docs/en/setup",
42
44
  exclude: [".DS_Store", "settings.json"],