@bastani/atomic 0.8.17-0 → 0.8.18-0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/CHANGELOG.md +16 -0
  2. package/dist/builtin/intercom/CHANGELOG.md +5 -0
  3. package/dist/builtin/intercom/package.json +1 -1
  4. package/dist/builtin/mcp/CHANGELOG.md +5 -0
  5. package/dist/builtin/mcp/package.json +1 -1
  6. package/dist/builtin/subagents/CHANGELOG.md +5 -0
  7. package/dist/builtin/subagents/package.json +1 -1
  8. package/dist/builtin/web-access/CHANGELOG.md +5 -0
  9. package/dist/builtin/web-access/package.json +1 -1
  10. package/dist/builtin/workflows/CHANGELOG.md +25 -0
  11. package/dist/builtin/workflows/README.md +62 -3
  12. package/dist/builtin/workflows/builtin/deep-research-codebase.ts +555 -537
  13. package/dist/builtin/workflows/builtin/goal.ts +5 -0
  14. package/dist/builtin/workflows/builtin/open-claude-design.ts +3 -3
  15. package/dist/builtin/workflows/builtin/ralph.ts +737 -713
  16. package/dist/builtin/workflows/builtin/shared-prompts.ts +11 -0
  17. package/dist/builtin/workflows/package.json +1 -1
  18. package/dist/builtin/workflows/src/extension/discovery.ts +61 -22
  19. package/dist/builtin/workflows/src/extension/index.ts +2 -0
  20. package/dist/builtin/workflows/src/extension/runtime.ts +4 -0
  21. package/dist/builtin/workflows/src/extension/workflow-schema.ts +4 -0
  22. package/dist/builtin/workflows/src/runs/foreground/executor.ts +96 -6
  23. package/dist/builtin/workflows/src/runs/foreground/stage-runner.ts +2 -0
  24. package/dist/builtin/workflows/src/runs/shared/workflow-runner.ts +7 -0
  25. package/dist/builtin/workflows/src/runs/shared/worktree.ts +214 -1
  26. package/dist/builtin/workflows/src/sdk-surface.ts +2 -0
  27. package/dist/builtin/workflows/src/shared/types.ts +32 -3
  28. package/dist/builtin/workflows/src/workflows/define-workflow.ts +18 -1
  29. package/dist/core/agent-session-services.d.ts +2 -1
  30. package/dist/core/agent-session-services.d.ts.map +1 -1
  31. package/dist/core/agent-session-services.js +1 -0
  32. package/dist/core/agent-session-services.js.map +1 -1
  33. package/dist/core/agent-session.d.ts +3 -0
  34. package/dist/core/agent-session.d.ts.map +1 -1
  35. package/dist/core/agent-session.js +16 -5
  36. package/dist/core/agent-session.js.map +1 -1
  37. package/dist/core/atomic-guide-command.d.ts.map +1 -1
  38. package/dist/core/atomic-guide-command.js +40 -28
  39. package/dist/core/atomic-guide-command.js.map +1 -1
  40. package/dist/core/sdk.d.ts +9 -1
  41. package/dist/core/sdk.d.ts.map +1 -1
  42. package/dist/core/sdk.js +2 -2
  43. package/dist/core/sdk.js.map +1 -1
  44. package/dist/core/system-prompt.d.ts +2 -0
  45. package/dist/core/system-prompt.d.ts.map +1 -1
  46. package/dist/core/system-prompt.js +22 -13
  47. package/dist/core/system-prompt.js.map +1 -1
  48. package/docs/quickstart.md +13 -5
  49. package/docs/sdk.md +20 -5
  50. package/docs/workflows.md +44 -17
  51. package/examples/sdk/05-tools.ts +22 -1
  52. package/examples/sdk/README.md +7 -3
  53. package/package.json +1 -1
@@ -9,9 +9,13 @@
9
9
 
10
10
  import { mkdir, mkdtemp, writeFile } from "node:fs/promises";
11
11
  import { tmpdir } from "node:os";
12
- import { dirname, extname, join } from "node:path";
12
+ import { dirname, join, resolve } from "node:path";
13
13
  import { defineWorkflow } from "../src/index.js";
14
- import type { WorkflowTaskResult } from "../src/shared/types.js";
14
+ import type {
15
+ WorkflowRunContext,
16
+ WorkflowTaskResult,
17
+ } from "../src/shared/types.js";
18
+ import { WORKER_PREFLIGHT_CONTRACT } from "./shared-prompts.js";
15
19
 
16
20
  const DEFAULT_MAX_LOOPS = 10;
17
21
  const DEFAULT_SPEC_DIR = "specs";
@@ -260,32 +264,12 @@ function defaultSpecPath(prompt: string, now = new Date()): string {
260
264
  return join(DEFAULT_SPEC_DIR, `${date}-${slugifySpecTopic(prompt)}.md`);
261
265
  }
262
266
 
263
- function suffixedPath(path: string, suffix: number): string {
264
- const extension = extname(path);
265
- const stem = extension.length === 0 ? path : path.slice(0, -extension.length);
266
- return `${stem}-${suffix}${extension}`;
267
- }
268
-
269
- function isFileExistsError(error: unknown): boolean {
270
- return error instanceof Error && (error as { readonly code?: string }).code === "EEXIST";
271
- }
272
-
273
267
  async function writeSpecFile(path: string, content: string): Promise<string> {
274
268
  await mkdir(dirname(path), { recursive: true });
275
-
276
- for (let suffix = 0; ; suffix += 1) {
277
- const candidate = suffix === 0 ? path : suffixedPath(path, suffix + 1);
278
- try {
279
- await writeFile(candidate, content.endsWith("\n") ? content : `${content}\n`, {
280
- encoding: "utf8",
281
- flag: "wx",
282
- });
283
- return candidate;
284
- } catch (error) {
285
- if (isFileExistsError(error)) continue;
286
- throw error;
287
- }
288
- }
269
+ await writeFile(path, content.endsWith("\n") ? content : `${content}\n`, {
270
+ encoding: "utf8",
271
+ });
272
+ return path;
289
273
  }
290
274
 
291
275
  async function createImplementationNotesFile(prompt: string): Promise<string> {
@@ -365,13 +349,16 @@ function reviewerErrorResult(
365
349
  function discoveryContextLabel(name: string | undefined): string {
366
350
  if (name?.startsWith("infra-locate-")) return "Infrastructure locator";
367
351
  if (name?.startsWith("infra-analyze-")) return "Infrastructure analyzer";
368
- if (name?.startsWith("infra-patterns-")) return "Infrastructure pattern finder";
352
+ if (name?.startsWith("infra-patterns-"))
353
+ return "Infrastructure pattern finder";
369
354
  return "Infrastructure discovery";
370
355
  }
371
356
 
372
357
  function formatDiscovery(results: readonly WorkflowTaskResult[]): string {
373
358
  return results
374
- .map((result) => `### ${discoveryContextLabel(result.name)}\n\n${result.text}`)
359
+ .map(
360
+ (result) => `### ${discoveryContextLabel(result.name)}\n\n${result.text}`,
361
+ )
375
362
  .join("\n\n---\n\n");
376
363
  }
377
364
 
@@ -381,774 +368,811 @@ function formatReview(results: readonly WorkflowTaskResult[]): string {
381
368
  .join("\n\n---\n\n");
382
369
  }
383
370
 
384
- export default defineWorkflow("ralph")
385
- .description(
386
- "Plan orchestrate → simplify → parallel review loop with bounded iteration.",
387
- )
388
- .input("prompt", {
389
- type: "text",
390
- required: true,
391
- description: "The task or goal to plan, execute, and refine.",
392
- })
393
- .input("max_loops", {
394
- type: "number",
395
- default: DEFAULT_MAX_LOOPS,
396
- description: `Maximum plan/orchestrate/review iterations (default ${DEFAULT_MAX_LOOPS}).`,
397
- })
398
- .input("base_branch", {
399
- type: "string",
400
- default: "origin/main",
401
- description:
402
- "Branch reviewers compare the current code delta against (default origin/main).",
403
- })
404
- .run(async (ctx) => {
405
- const inputs = ctx.inputs as {
406
- prompt?: string;
407
- max_loops?: number;
408
- base_branch?: string;
409
- };
410
- const prompt = inputs.prompt ?? "";
411
- const maxLoops = positiveInteger(inputs.max_loops, DEFAULT_MAX_LOOPS);
412
- const comparisonBaseBranch = normalizeBranchInput(inputs.base_branch, "origin/main");
413
-
414
- let reviewReport = "";
415
- let finalPlan = "";
416
- let finalPlanPath = "";
417
- let finalResult = "";
418
- let finalPrReport = "";
419
- const implementationNotesPath = await createImplementationNotesFile(prompt);
420
- let approved = false;
421
- let iterationsCompleted = 0;
422
-
423
- let noAskQuestionToolSet = [
424
- "read",
425
- "bash",
426
- "edit",
427
- "write",
428
- "todo",
429
- "subagent",
430
- "web_search",
431
- "code_search",
432
- "fetch_content",
433
- "get_search_content",
434
- "intercom",
435
- ];
436
-
437
- let plannerModelConfig = {
438
- model: "openai/gpt-5.5",
439
- fallbackModels: [
440
- "openai-codex/gpt-5.5",
441
- "github-copilot/gpt-5.5",
442
- "anthropic/claude-opus-4-7",
443
- "github-copilot/claude-opus-4.7",
444
- ],
445
- thinkingLevel: "high" as const,
446
- tools: noAskQuestionToolSet,
447
- };
371
+ type RalphInputs = {
372
+ readonly prompt?: string;
373
+ readonly max_loops?: number;
374
+ readonly base_branch?: string;
375
+ readonly git_worktree_dir?: string;
376
+ };
448
377
 
449
- let orchestratorModelConfig = {
450
- model: "openai/gpt-5.5",
451
- fallbackModels: [
452
- "openai-codex/gpt-5.5",
453
- "github-copilot/gpt-5.5",
454
- "anthropic/claude-sonnet-4-6",
455
- "github-copilot/claude-sonnet-4.6",
456
- ],
457
- thinkingLevel: "medium" as const,
458
- tools: noAskQuestionToolSet,
459
- };
378
+ type RalphWorkflowOptions = {
379
+ readonly prompt: string;
380
+ readonly maxLoops: number;
381
+ readonly comparisonBaseBranch: string;
382
+ readonly workflowStartCwd: string;
383
+ };
460
384
 
461
- let simplifierModelConfig = {
462
- model: "openai/gpt-5.5",
463
- fallbackModels: [
464
- "openai-codex/gpt-5.5",
465
- "github-copilot/gpt-5.5",
466
- "anthropic/claude-sonnet-4-6",
467
- "github-copilot/claude-sonnet-4.6",
468
- ],
469
- thinkingLevel: "medium" as const,
470
- tools: noAskQuestionToolSet,
471
- };
385
+ type RalphWorkflowResult = {
386
+ readonly result: string;
387
+ readonly plan: string;
388
+ readonly plan_path: string;
389
+ readonly implementation_notes_path: string;
390
+ readonly pr_report: string;
391
+ readonly approved: boolean;
392
+ readonly iterations_completed: number;
393
+ readonly review_report: string;
394
+ };
472
395
 
473
- let reviewerModelConfig = {
474
- model: "openai/gpt-5.5",
475
- fallbackModels: [
476
- "openai-codex/gpt-5.5",
477
- "github-copilot/gpt-5.5",
478
- "anthropic/claude-opus-4-7",
479
- "github-copilot/claude-opus-4.7",
480
- ],
481
- thinkingLevel: "high" as const,
482
- tools: noAskQuestionToolSet,
483
- customTools: [reviewDecisionTool],
484
- };
396
+ async function runRalphWorkflow(
397
+ ctx: WorkflowRunContext<RalphInputs>,
398
+ options: RalphWorkflowOptions,
399
+ ): Promise<RalphWorkflowResult> {
400
+ const { prompt, maxLoops, comparisonBaseBranch, workflowStartCwd } = options;
401
+
402
+ let reviewReport = "";
403
+ let finalPlan = "";
404
+ let finalPlanPath = "";
405
+ let finalResult = "";
406
+ let finalPrReport = "";
407
+ // Keep generated specs under the workflow runtime cwd. When Ralph is invoked
408
+ // with git_worktree_dir, the executor defaults ctx.cwd to the matching
409
+ // worktree cwd so specs and stage writes land in the same checkout.
410
+ const workflowSpecPath = resolve(workflowStartCwd, defaultSpecPath(prompt));
411
+ const implementationNotesPath = await createImplementationNotesFile(prompt);
412
+ let approved = false;
413
+ let iterationsCompleted = 0;
414
+
415
+ const plannerModelConfig = {
416
+ model: "openai/gpt-5.5",
417
+ fallbackModels: [
418
+ "openai-codex/gpt-5.5",
419
+ "github-copilot/gpt-5.5",
420
+ "anthropic/claude-opus-4-7",
421
+ "github-copilot/claude-opus-4.7",
422
+ ],
423
+ thinkingLevel: "high" as const,
424
+ excludeTools: ["ask_user_question"],
425
+ };
485
426
 
486
- let explorerModelConfig = {
487
- model: "openai/gpt-5.4-mini",
488
- fallbackModels: [
489
- "openai-codex/gpt-5.4-mini",
490
- "github-copilot/gpt-5.4-mini",
491
- "anthropic/claude-haiku-4-5",
492
- "github-copilot/claude-haiku-4.5",
493
- ],
494
- thinkingLevel: "low" as const,
495
- tools: noAskQuestionToolSet,
496
- };
427
+ const orchestratorModelConfig = {
428
+ model: "openai/gpt-5.5",
429
+ fallbackModels: [
430
+ "openai-codex/gpt-5.5",
431
+ "github-copilot/gpt-5.5",
432
+ "anthropic/claude-sonnet-4-6",
433
+ "github-copilot/claude-sonnet-4.6",
434
+ ],
435
+ thinkingLevel: "medium" as const,
436
+ excludeTools: ["ask_user_question"],
437
+ };
497
438
 
498
- for (let iteration = 1; iteration <= maxLoops; iteration += 1) {
499
- iterationsCompleted = iteration;
439
+ const simplifierModelConfig = {
440
+ model: "openai/gpt-5.5",
441
+ fallbackModels: [
442
+ "openai-codex/gpt-5.5",
443
+ "github-copilot/gpt-5.5",
444
+ "anthropic/claude-sonnet-4-6",
445
+ "github-copilot/claude-sonnet-4.6",
446
+ ],
447
+ thinkingLevel: "medium" as const,
448
+ excludeTools: ["ask_user_question"],
449
+ };
500
450
 
501
- const planner = await ctx.task(`planner-${iteration}`, {
502
- prompt: taggedPrompt([
503
- [
504
- "role",
505
- "You are a technical architect. Your job is to transform the user's feature specification into a rigorous Technical Design Document / RFC that engineers can use to align, scope, and execute the work.",
506
- ],
507
- [
508
- "critical_deliverable",
509
- [
510
- "Your final output is a filled-in RFC rendered as markdown text.",
511
- "Render the RFC Template in this prompt with every section populated by feature-specific content drawn from the user's specification and your codebase investigation.",
512
- "Do not implement code changes in this stage; this stage only investigates and authors the RFC.",
513
- ].join("\n"),
514
- ],
515
- [
516
- "task",
517
- `Plan iteration ${iteration}/${maxLoops} for this user specification:\n${prompt}`,
518
- ],
519
- [
520
- "previous_review_findings",
521
- reviewReport
522
- ? "Previous review findings:\n{previous}"
523
- : "No prior review findings; this is the first iteration.",
524
- ],
525
- [
526
- "input_spec_files",
527
- [
528
- "If the user specification is a file path instead of raw prose, read that file and use it as source material for the RFC.",
529
- "Still author the RFC normally; do not output only a forwarded path.",
530
- ].join("\n"),
531
- ],
532
- [
533
- "investigation_phase",
534
- [
535
- "Before drafting, read the specification carefully and identify the concrete problem, success criteria, hard constraints, and non-goals.",
536
- "Survey the codebase using file/search tools such as read plus grep/rg/find/glob-style shell commands to ground the RFC in current architecture.",
537
- "Name concrete services, modules, files, tests, data models, APIs, CLIs, config files, and external integrations this work will touch.",
538
- "Capture metadata with bash: `git config user.name` for Author(s), and `date '+%Y-%m-%d'` for Created / Last Updated.",
539
- "Look for prior art: existing RFCs, ADRs, README files, specs, docs, tests, or code comments that explain why the current state exists.",
540
- ].join("\n"),
541
- ],
542
- [
543
- "authoring_principles",
544
- [
545
- "Be specific: `src/server/auth.ts:42` beats `the auth layer`.",
546
- "Trade-offs over conclusions: Alternatives Considered must include at least two real alternatives with honest pros, cons, and rejection reasons.",
547
- "Non-goals matter: explicitly exclude work that is out of scope to prevent scope creep.",
548
- "Diagrams are load-bearing: Section 4.1 must include a Mermaid system architecture diagram grounded in real components.",
549
- "Surface open questions in Section 9 with owner placeholders such as `[OWNER: infra team]`; do not paper over uncertainty.",
550
- "Match depth to stakes: a small refactor can be concise, but every template section header must remain present.",
551
- "If prior review findings are present, explicitly address each finding or explain why it is obsolete.",
552
- ].join("\n"),
553
- ],
554
- [
555
- "stage_contract",
556
- [
557
- "This stage is investigation-first RFC authoring. The RFC is only valid if it is grounded in repository inspection performed during this stage.",
558
- "Do not fill the template from generic architecture guesses. Before writing the final RFC, inspect relevant code, docs, tests, configs, and prior design material.",
559
- "Treat the output format as the report after investigation, not a substitute for investigation.",
560
- ].join("\n"),
561
- ],
562
- [
563
- "evidence_expectations",
564
- [
565
- "Every major design claim should be traceable to concrete evidence: file paths, symbols, commands, docs, tests, configs, or prior RFCs.",
566
- "Include those concrete references inside the RFC sections where they support the design.",
567
- "If expected evidence cannot be found, say so in the relevant RFC section or Open Questions rather than papering over the gap.",
568
- ].join("\n"),
569
- ],
570
- [
571
- "output_discipline",
572
- [
573
- "Render the RFC Template exactly as the final document structure: preserve every header and the metadata table.",
574
- "Replace instructional placeholders with real, feature-specific content; do not leave template guidance in the final RFC.",
575
- "Output nothing after the RFC: no meta-commentary, no summary of what you wrote, no implementation log.",
576
- ].join("\n"),
577
- ],
578
- ["rfc_template", PLANNER_RFC_TEMPLATE],
579
- ]),
580
- ...(reviewReport
581
- ? { previous: { name: "review-report", text: reviewReport } }
582
- : {}),
583
- ...plannerModelConfig,
584
- });
585
- finalPlan = planner.text;
586
- const specPath = await writeSpecFile(defaultSpecPath(prompt), planner.text);
587
- finalPlanPath = specPath;
588
-
589
- const orchestrator = await ctx.task(`orchestrator-${iteration}`, {
590
- prompt: taggedPrompt([
591
- [
592
- "role",
593
- "You are a sub-agent orchestrator with many tools available. Your primary implementation tool is the `subagent` tool.",
594
- ],
595
- [
596
- "objective",
597
- `Implement iteration ${iteration}/${maxLoops} for the task: ${prompt}`,
598
- ],
599
- [
600
- "spec_file",
601
- [
602
- `The technical specification for this iteration was written to: ${specPath}`,
603
- "Read this file before delegating or implementing anything.",
604
- "Do not rely on an inline planner transcript; the spec file is the authoritative plan for this iteration.",
605
- ].join("\n"),
606
- ],
607
- [
608
- "implementation_notes",
609
- [
610
- `Keep a running Markdown implementation notes file at this OS temp directory path: ${implementationNotesPath}`,
611
- "The file has already been initialized for this workflow run; update it while you implement the spec.",
612
- "Record decisions you had to make that were not in the spec, things you had to change from the spec, tradeoffs you had to make, blockers, validation outcomes, and anything else the user should know.",
613
- "Ask delegated subagents to report any notes-worthy decisions or tradeoffs back to you, then consolidate them into this file before your final report.",
614
- "Do not include secrets, credentials, tokens, or unrelated environment details in the notes file.",
615
- ].join("\n"),
616
- ],
617
- [
618
- "project_initialization_preflight",
619
- [
620
- "Before normal implementation delegation, determine whether this checkout appears initialized for its actual language, framework, and build system.",
621
- "Do not rely on hard-coded assumptions about JavaScript, TypeScript, Python, Rust, Go, Java, mobile, or any other ecosystem. Infer the project type and setup requirements from repository evidence.",
622
- "Inspect source layout, setup docs, package/build manifests, lockfiles, toolchain files, generated-artifact conventions, CI workflows, workflow configuration, and package scripts or equivalent task definitions.",
623
- "Look for evidence that dependencies, generated files, local toolchains, submodules, codegen outputs, or other project-specific initialization artifacts are missing for this checkout.",
624
- "When repository evidence shows missing initialization, run or delegate the appropriate documented setup command before implementation work.",
625
- "You are responsible for initializing the checkout when setup commands are documented; missing dependencies, generated files, or local toolchains are setup work, not user handoff work.",
626
- "Once setup succeeds, continue normal implementation orchestration. Do not treat missing dependencies or generated setup artifacts in a fresh worktree as implementation failures.",
627
- "If setup requirements cannot be determined confidently, delegate a focused discovery task before implementation instead of guessing.",
628
- "If setup remains blocked after evidence-based discovery and setup attempts, report the blocker with commands tried and the exact evidence needed to continue.",
629
- ].join("\n"),
630
- ],
631
- [
632
- "delegation_policy",
633
- [
634
- "You are not the implementer. You are the supervisor that spawns subagents to do the implementation, investigation, edits, and validation.",
635
- "All non-trivial operations must be delegated to subagents via the `subagent` tool before you claim progress.",
636
- "Delegate codebase understanding, impact analysis, and implementation research to codebase-locator, codebase-analyzer, and pattern-finder style subagents when available.",
637
- "Delegate shell-heavy work — especially commands likely to produce lots of output, log digging, CLI investigation, and broad grep/find exploration — to subagents that can run those commands rather than doing it in this orchestrator context.",
638
- "Delegate implementation edits to a focused subagent with clear files, constraints, and validation expectations; do not merely describe the edits yourself.",
639
- "Use separate subagents for separate tasks, and launch independent subagents in parallel when useful.",
640
- "Do not split highly overlapping tasks across multiple subagents; consolidate overlapping work into one focused delegation to avoid duplicate effort.",
641
- "If a subagent takes a long time, do not attempt to do its assigned job yourself while waiting. Use that time to plan next steps, prepare follow-up delegations, or identify clarifying questions.",
642
- ].join("\n"),
643
- ],
644
- [
645
- "execution_contract",
646
- [
647
- "The required output format is a completion report, not the task itself.",
648
- "Do not jump straight to the report. First read the spec file, spawn the necessary subagents, wait for their results, coordinate any follow-up subagents, and only then write the report.",
649
- "A valid response must be grounded in actual subagent work: name the delegated work, summarize what each subagent did, and distinguish completed changes from recommendations or blockers.",
650
- "If you cannot read the spec file, spawn subagents, or use subagents, treat that as a blocker and report it honestly instead of pretending the requested work was done.",
651
- ].join("\n"),
652
- ],
653
- [
654
- "subagent_tracking",
655
- [
656
- "Use the `todo` tool as your active control ledger for subagent work.",
657
- "Before launching subagents, create todo items for each delegated task with enough detail to identify owner, purpose, and expected output.",
658
- "Mark todo items in_progress when the corresponding subagent starts, append progress/results as subagents report back, and close them only after you have incorporated or explicitly rejected their result.",
659
- "Keep pending, in_progress, blocked, and completed work accurate so you do not lose track of parallel subagents or unresolved follow-ups.",
660
- "Before writing the final report, review the todo list and resolve every pending/in_progress item as completed, blocked, or deferred with an explanation.",
661
- ].join("\n"),
662
- ],
663
- [
664
- "instructions",
665
- [
666
- `Start by reading the spec file at ${specPath}.`,
667
- "Perform the project_initialization_preflight before decomposing implementation work; complete or delegate required setup before implementation delegation when the checkout appears uninitialized.",
668
- "Decompose the work into delegated subagent tasks based on that spec file.",
669
- "Pass each subagent the relevant task, constraints, files, validation expectations, any prior review findings from the spec, and instructions to report implementation-note-worthy decisions or tradeoffs.",
670
- "Coordinate subagent results into the smallest coherent set of changes that satisfies the spec.",
671
- "Preserve existing architecture and repository conventions unless the spec explicitly justifies a change.",
672
- "Run or delegate the most relevant validation commands available in the repository.",
673
- `Before your final report, update the running implementation notes file at ${implementationNotesPath} with decisions, spec deviations, tradeoffs, blockers, and validation outcomes from this iteration.`,
674
- "If blocked, describe the blocker and the safest partial state instead of inventing success.",
675
- "Do not hide failures; reviewers need accurate status.",
676
- ].join("\n"),
677
- ],
678
- [
679
- "output_format",
680
- [
681
- "After subagents have done the work, return Markdown with headings:",
682
- "1. Spec file — the path you read",
683
- "2. Delegations performed — subagents spawned and what each completed",
684
- "3. Changes made — concrete changes from subagent work, not intentions",
685
- "4. Files touched",
686
- "5. Validation run / recommended",
687
- "6. Deferred work or blockers",
688
- "7. Implementation notes — confirm the OS temp notes path was updated",
689
- ].join("\n"),
690
- ],
691
- ]),
692
- reads: [specPath, implementationNotesPath],
693
- ...orchestratorModelConfig,
694
- });
695
- finalResult = orchestrator.text;
696
-
697
- await ctx.task(`code-simplifier-${iteration}`, {
698
- prompt: taggedPrompt([
699
- [
700
- "role",
701
- [
702
- "You are an expert code simplification specialist focused on enhancing code clarity, consistency, and maintainability while preserving exact functionality.",
703
- "Your expertise is applying project-specific best practices to simplify and improve recently modified code without altering behavior.",
704
- "You prioritize readable, explicit code over overly compact or clever solutions.",
705
- ].join("\n"),
706
- ],
707
- [
708
- "objective",
709
- `Refine recently modified code for this task while preserving exact behavior: ${prompt}`,
710
- ],
711
- ["current_iteration_context", "{previous}"],
712
- [
713
- "functionality_preservation",
714
- [
715
- "Never change what the code does — only how it does it.",
716
- "All original features, outputs, side effects, public APIs, persistence formats, tests, and user-visible behavior must remain intact.",
717
- "If a simplification could change behavior, do not apply it; document why it was skipped.",
718
- ].join("\n"),
719
- ],
720
- [
721
- "project_standards",
722
- [
723
- "Read and follow repository guidance from AGENTS.md and/or CLAUDE.md when present.",
724
- "Respect established module style, imports, file extensions, typing conventions, error-handling patterns, naming, tests, and architectural boundaries.",
725
- "For this TypeScript workflow repo, preserve ESM .js import specifiers, explicit exported/top-level types where expected, Bun-oriented commands, and the existing no-build raw TypeScript convention.",
726
- "Do not impose standards that conflict with local project guidance.",
727
- ].join("\n"),
728
- ],
729
- [
730
- "clarity_improvements",
731
- [
732
- "Reduce unnecessary complexity, nesting, duplication, and incidental abstractions.",
733
- "Improve readability with clear variable/function names and consolidated related logic.",
734
- "Remove comments that merely restate obvious code, but keep comments that explain intent, constraints, or non-obvious trade-offs.",
735
- "Avoid nested ternary operators; prefer switch statements or explicit if/else chains for multiple conditions.",
736
- "Choose clarity over brevity: explicit code is often better than dense one-liners.",
737
- ].join("\n"),
738
- ],
451
+ const reviewerModelConfig = {
452
+ model: "openai/gpt-5.5",
453
+ fallbackModels: [
454
+ "openai-codex/gpt-5.5",
455
+ "github-copilot/gpt-5.5",
456
+ "anthropic/claude-opus-4-7",
457
+ "github-copilot/claude-opus-4.7",
458
+ ],
459
+ thinkingLevel: "high" as const,
460
+ excludeTools: ["ask_user_question"],
461
+ customTools: [reviewDecisionTool],
462
+ };
463
+
464
+ const explorerModelConfig = {
465
+ model: "openai/gpt-5.4-mini",
466
+ fallbackModels: [
467
+ "openai-codex/gpt-5.4-mini",
468
+ "github-copilot/gpt-5.4-mini",
469
+ "anthropic/claude-haiku-4-5",
470
+ "github-copilot/claude-haiku-4.5",
471
+ ],
472
+ thinkingLevel: "low" as const,
473
+ excludeTools: ["ask_user_question"],
474
+ };
475
+
476
+ for (let iteration = 1; iteration <= maxLoops; iteration += 1) {
477
+ iterationsCompleted = iteration;
478
+
479
+ const planner = await ctx.task(`planner-${iteration}`, {
480
+ prompt: taggedPrompt([
481
+ [
482
+ "role",
483
+ "You are a technical architect. Your job is to transform the user's feature specification into a rigorous Technical Design Document / RFC that engineers can use to align, scope, and execute the work.",
484
+ ],
485
+ [
486
+ "critical_deliverable",
739
487
  [
740
- "balance_constraints",
741
- [
742
- "Do not over-simplify in ways that reduce clarity, debuggability, extensibility, or separation of concerns.",
743
- "Do not combine too many concerns into one function or remove helpful abstractions that organize the code.",
744
- "Do not prioritize fewer lines over maintainability.",
745
- "Limit scope to code recently modified in this iteration/session unless the planner explicitly asked for broader cleanup.",
746
- ].join("\n"),
747
- ],
488
+ "Your final output is a filled-in RFC rendered as markdown text.",
489
+ "Render the RFC Template in this prompt with every section populated by feature-specific content drawn from the user's specification and your codebase investigation.",
490
+ "Do not implement code changes in this stage; this stage only investigates and authors the RFC.",
491
+ ].join("\n"),
492
+ ],
493
+ [
494
+ "task",
495
+ `Plan iteration ${iteration}/${maxLoops} for this user specification:\n${prompt}`,
496
+ ],
497
+ [
498
+ "previous_review_findings",
499
+ reviewReport
500
+ ? "Previous review findings:\n{previous}"
501
+ : "No prior review findings; this is the first iteration.",
502
+ ],
503
+ [
504
+ "spec_revision_target",
505
+ iteration === 1
506
+ ? [
507
+ `Ralph will write your final RFC markdown for this workflow run to: ${workflowSpecPath}`,
508
+ "Treat this as the original spec file for the run.",
509
+ ].join("\n")
510
+ : [
511
+ `The existing RFC/spec file for this workflow run is: ${workflowSpecPath}`,
512
+ "Read that original spec before drafting; revise it in response to review findings and current repository evidence.",
513
+ "Your final output must be the full updated RFC markdown that should replace the original spec, not a diff, patch, or commentary.",
514
+ ].join("\n"),
515
+ ],
516
+ [
517
+ "input_spec_files",
748
518
  [
749
- "stage_contract",
750
- [
751
- "This is an active code-refinement stage, not just a commentary stage.",
752
- "Before producing the report, inspect the actual repository state and recently modified files from the planner/orchestrator context.",
753
- "Apply safe simplifications with edit/write tools when clear behavior-preserving improvements exist. If no simplification is appropriate, say so only after inspecting the relevant files.",
754
- ].join("\n"),
755
- ],
519
+ "If the user specification is a file path instead of raw prose, read that file and use it as source material for the RFC.",
520
+ "Still author the RFC normally; do not output only a forwarded path.",
521
+ ].join("\n"),
522
+ ],
523
+ [
524
+ "investigation_phase",
756
525
  [
757
- "required_actions_before_output",
758
- [
759
- "1. Identify the concrete files/sections changed in this iteration.",
760
- "2. Read those files before deciding whether to simplify.",
761
- "3. Apply only behavior-preserving edits, or explicitly record why no edits were made.",
762
- "4. Run or recommend focused validation tied to the touched files.",
763
- ].join("\n"),
764
- ],
526
+ "Before drafting, read the specification carefully and identify the concrete problem, success criteria, hard constraints, and non-goals.",
527
+ "Survey the codebase using file/search tools such as read plus grep/rg/find/glob-style shell commands to ground the RFC in current architecture.",
528
+ "Name concrete services, modules, files, tests, data models, APIs, CLIs, config files, and external integrations this work will touch.",
529
+ "Capture metadata with bash: `git config user.name` for Author(s), and `date '+%Y-%m-%d'` for Created / Last Updated.",
530
+ "Look for prior art: existing RFCs, ADRs, README files, specs, docs, tests, or code comments that explain why the current state exists.",
531
+ ].join("\n"),
532
+ ],
533
+ [
534
+ "authoring_principles",
765
535
  [
766
- "handoff_expectations",
767
- "In the final report, distinguish edits actually applied from observations only. Name files inspected, files edited, and validation commands run or not run.",
768
- ],
536
+ "Be specific: `src/server/auth.ts:42` beats `the auth layer`.",
537
+ "Trade-offs over conclusions: Alternatives Considered must include at least two real alternatives with honest pros, cons, and rejection reasons.",
538
+ "Non-goals matter: explicitly exclude work that is out of scope to prevent scope creep.",
539
+ "Diagrams are load-bearing: Section 4.1 must include a Mermaid system architecture diagram grounded in real components.",
540
+ "Surface open questions in Section 9 with owner placeholders such as `[OWNER: infra team]`; do not paper over uncertainty.",
541
+ "Match depth to stakes: a small refactor can be concise, but every template section header must remain present.",
542
+ "If prior review findings are present, explicitly address each finding or explain why it is obsolete.",
543
+ ].join("\n"),
544
+ ],
545
+ [
546
+ "stage_contract",
769
547
  [
770
- "process",
771
- [
772
- "Identify recently modified code sections from the iteration context and repository state.",
773
- "Analyze opportunities to improve elegance, consistency, and maintainability.",
774
- "Apply project-specific best practices while preserving behavior.",
775
- "Run or recommend focused validation when appropriate.",
776
- "Document only significant changes that affect understanding or future maintenance.",
777
- ].join("\n"),
778
- ],
548
+ "This stage is investigation-first RFC authoring. The RFC is only valid if it is grounded in repository inspection performed during this stage.",
549
+ "Do not fill the template from generic architecture guesses. Before writing the final RFC, inspect relevant code, docs, tests, configs, and prior design material.",
550
+ "Treat the output format as the report after investigation, not a substitute for investigation.",
551
+ ].join("\n"),
552
+ ],
553
+ [
554
+ "evidence_expectations",
779
555
  [
780
- "output_format",
781
- [
782
- "Markdown with headings:",
783
- "1. Simplifications applied",
784
- "2. Behavior-preservation notes",
785
- "3. Validation run / recommended",
786
- "4. Skipped risky simplifications",
787
- ].join("\n"),
788
- ],
789
- ]),
790
- previous: [planner, orchestrator],
791
- ...simplifierModelConfig,
792
- });
793
-
794
- const discovery = await ctx.parallel(
556
+ "Every major design claim should be traceable to concrete evidence: file paths, symbols, commands, docs, tests, configs, or prior RFCs.",
557
+ "Include those concrete references inside the RFC sections where they support the design.",
558
+ "If expected evidence cannot be found, say so in the relevant RFC section or Open Questions rather than papering over the gap.",
559
+ ].join("\n"),
560
+ ],
795
561
  [
796
- {
797
- name: `infra-locate-${iteration}`,
798
- task: taggedPrompt([
799
- [
800
- "role",
801
- "You locate project infrastructure needed for patch review.",
802
- ],
803
- [
804
- "objective",
805
- `Find review-relevant infrastructure for the task: ${prompt}`,
806
- ],
807
- [
808
- "stage_contract",
809
- [
810
- "This is a repository-discovery stage. Do not answer from assumptions or common project layouts.",
811
- "Before output, inspect the repository for each infrastructure category: package scripts, test configs, CI workflows, generated artifacts, lint/typecheck setup, and release gates.",
812
- "The table is a compact handoff after discovery, not a substitute for discovery.",
813
- ].join("\n"),
814
- ],
815
- [
816
- "instructions",
817
- [
818
- "Locate package scripts, test configs, CI workflows, generated artifacts, lint/typecheck setup, and release gates.",
819
- "Search/read relevant files such as package manifests, CI workflow directories, test configs, lint/typecheck configs, build scripts, release configs, and generated-artifact markers.",
820
- "Prefer exact file paths and commands.",
821
- "Explain how each item should influence review or validation.",
822
- "If a category does not exist, report `not found` and briefly name the paths or patterns checked.",
823
- ].join("\n"),
824
- ],
825
- [
826
- "output_format",
827
- "Markdown table: Area | Path/command | Why it matters | Confidence.",
828
- ],
829
- ]),
830
- ...explorerModelConfig,
831
- },
832
- {
833
- name: `infra-analyze-${iteration}`,
834
- task: taggedPrompt([
835
- [
836
- "role",
837
- "You analyze integration risks in project infrastructure.",
838
- ],
839
- [
840
- "objective",
841
- `Assess infrastructure and changed-code risks for the task: ${prompt}`,
842
- ],
843
- [
844
- "stage_contract",
845
- [
846
- "This stage analyzes actual repository coupling, not generic integration risks.",
847
- "Before output, inspect the changed-code context plus relevant infrastructure/configuration files discovered or inferable from the repo.",
848
- "Classify a risk as confirmed only when repository evidence shows the coupling; otherwise mark it speculative.",
849
- ].join("\n"),
850
- ],
851
- [
852
- "instructions",
853
- [
854
- "Identify hidden coupling with build, tests, linting, runtime config, release automation, or generated files.",
855
- "Name the exact validations that would most efficiently detect regressions.",
856
- "Separate confirmed risks from speculative risks.",
857
- "Do not repeat generic review advice; ground findings in repository evidence.",
858
- "Copy validation commands from actual repository scripts/configs when available; do not invent commands that are not supported by the repo.",
859
- ].join("\n"),
860
- ],
861
- [
862
- "evidence_expectations",
863
- "Each confirmed risk must include concrete evidence: path, command, symbol, config key, script name, or file relationship.",
864
- ],
865
- [
866
- "output_format",
867
- "Markdown with sections: Confirmed risks, Speculative risks, Validation commands, Evidence.",
868
- ],
869
- ]),
870
- ...explorerModelConfig,
871
- },
872
- {
873
- name: `infra-patterns-${iteration}`,
874
- task: taggedPrompt([
875
- [
876
- "role",
877
- "You find repository patterns that a patch must follow.",
878
- ],
879
- [
880
- "objective",
881
- `Extract conventions relevant to reviewing this task: ${prompt}`,
882
- ],
883
- [
884
- "stage_contract",
885
- [
886
- "This is an evidence-gathering stage for repository conventions. Do not describe generic best practices.",
887
- "Before output, find concrete examples in the repository that demonstrate conventions relevant to this task.",
888
- "Read enough of each example to understand the convention before reporting it.",
889
- ].join("\n"),
890
- ],
891
- [
892
- "instructions",
893
- [
894
- "Find examples of build/test/style/release/architecture patterns the patch should mirror.",
895
- "Search for nearby or analogous implementations, tests, configs, scripts, and docs.",
896
- "Use concrete paths, commands, or symbols as evidence.",
897
- "Highlight conventions that commonly cause subtle review failures.",
898
- "If examples conflict, describe the conflict instead of forcing a single rule.",
899
- "If no relevant example exists, state what was searched and that no pattern was found.",
900
- ].join("\n"),
901
- ],
902
- [
903
- "handoff_expectations",
904
- "For every required convention or useful example, include the supporting path, command, symbol, or file relationship so reviewers can verify it quickly.",
905
- ],
906
- [
907
- "output_format",
908
- "Markdown with sections: Required conventions, Useful examples, Exceptions, Review implications.",
909
- ],
910
- ]),
911
- ...explorerModelConfig,
912
- },
562
+ "output_discipline",
563
+ [
564
+ "Render the RFC Template exactly as the final document structure: preserve every header and the metadata table.",
565
+ "Replace instructional placeholders with real, feature-specific content; do not leave template guidance in the final RFC.",
566
+ "Output nothing after the RFC: no meta-commentary, no summary of what you wrote, no implementation log.",
567
+ ].join("\n"),
913
568
  ],
914
- { task: prompt },
915
- );
569
+ ["rfc_template", PLANNER_RFC_TEMPLATE],
570
+ ]),
571
+ ...(reviewReport
572
+ ? { previous: { name: "review-report", text: reviewReport } }
573
+ : {}),
574
+ ...(iteration > 1 ? { reads: [workflowSpecPath] } : {}),
575
+ ...plannerModelConfig,
576
+ });
577
+ finalPlan = planner.text;
578
+ const specPath = await writeSpecFile(workflowSpecPath, planner.text);
579
+ finalPlanPath = specPath;
916
580
 
917
- const discoveryContext = formatDiscovery(discovery);
918
- const reviewPrompt = taggedPrompt([
581
+ const orchestrator = await ctx.task(`orchestrator-${iteration}`, {
582
+ prompt: taggedPrompt([
919
583
  [
920
584
  "role",
921
- [
922
- "You are acting as a reviewer for a proposed code change made by another engineer.",
923
- "Persona: a grumpy senior developer who has seen too many fragile patches. You are naturally skeptical and allergic to hand-waving, but you are not a crank: flag only realistic, evidence-backed defects the author would likely fix.",
924
- "Be terse, concrete, and technically fair. Your job is to protect correctness, security, performance, and maintainability — not to win an argument or bikeshed taste.",
925
- ].join("\n"),
585
+ "You are a sub-agent orchestrator with many tools available. Your primary implementation tool is the `subagent` tool.",
926
586
  ],
927
587
  [
928
588
  "objective",
929
- `Review the current code delta for the task: ${prompt}`,
589
+ `Implement iteration ${iteration}/${maxLoops} for the task: ${prompt}`,
930
590
  ],
931
591
  [
932
- "comparison_baseline",
592
+ "spec_file",
933
593
  [
934
- `The baseline branch for comparison is \`${comparisonBaseBranch}\`.`,
935
- "Compare the current working tree against this baseline branch, not against previous workflow reasoning or expected loop progress.",
936
- `Start with \`git status --short\`, then use working-tree-aware commands such as \`git diff ${comparisonBaseBranch}\` and \`git diff --cached ${comparisonBaseBranch}\` to identify changed tracked files; inspect untracked files from status directly.`,
594
+ `The current technical specification for this workflow run is written to: ${specPath}`,
595
+ "This is an absolute host-repository path and may be outside the worktree cwd; read it exactly as provided, not as a path relative to the worktree.",
596
+ "Read this file before delegating or implementing anything.",
597
+ "Do not rely on an inline planner transcript; the spec file is the authoritative plan for this iteration.",
937
598
  ].join("\n"),
938
599
  ],
939
- ["infrastructure_discovery", discoveryContext],
940
600
  [
941
- "project_guidance",
601
+ "implementation_notes",
942
602
  [
943
- "Use the repository's AGENTS.md and/or CLAUDE.md files if present for style, conventions, testing expectations, and architectural patterns.",
944
- "Project-level norms override these general instructions when they are more specific.",
945
- "Flag deviations only when they affect correctness, security, performance, or maintainability not personal preference.",
946
- "If validation requires dependencies or tools that are missing, download or install them using the repository-approved package manager/commands rather than bypassing, mocking, or skipping the verification solely because dependencies are absent.",
603
+ `Keep a running Markdown implementation notes file at this OS temp directory path: ${implementationNotesPath}`,
604
+ "The file has already been initialized for this workflow run; update it while you implement the spec.",
605
+ "Record decisions you had to make that were not in the spec, things you had to change from the spec, tradeoffs you had to make, blockers, validation outcomes, and anything else the user should know.",
606
+ "Ask delegated subagents to report any notes-worthy decisions or tradeoffs back to you, then consolidate them into this file before your final report.",
607
+ "Do not include secrets, credentials, tokens, or unrelated environment details in the notes file.",
947
608
  ].join("\n"),
948
609
  ],
610
+ ["project_initialization_preflight", WORKER_PREFLIGHT_CONTRACT],
949
611
  [
950
- "validation_expectations",
612
+ "delegation_policy",
951
613
  [
952
- "Inspect the actual diff/repository state rather than trusting stage summaries.",
953
- "Run or delegate focused validation when it is necessary to distinguish a real bug from a hunch.",
954
- "If tests or typechecks fail because dependencies are missing, install/download the missing dependencies with the repo's documented package manager instead of bypassing the check.",
955
- "If validation cannot be completed after reasonable recovery, record the limitation in overall_explanation and reviewer_error; do not use missing dependencies as a reason to approve.",
614
+ "You are not the implementer. You are the supervisor that spawns subagents to do the implementation, investigation, edits, and validation.",
615
+ "All non-trivial operations must be delegated to subagents via the `subagent` tool before you claim progress.",
616
+ "Delegate codebase understanding, impact analysis, and implementation research to codebase-locator, codebase-analyzer, and pattern-finder style subagents when available.",
617
+ "Delegate shell-heavy work especially commands likely to produce lots of output, log digging, CLI investigation, and broad grep/find exploration to subagents that can run those commands rather than doing it in this orchestrator context.",
618
+ "Delegate implementation edits to a focused subagent with clear files, constraints, and validation expectations; do not merely describe the edits yourself.",
619
+ "Use separate subagents for separate tasks, and launch independent subagents in parallel when useful.",
620
+ "Do not split highly overlapping tasks across multiple subagents; consolidate overlapping work into one focused delegation to avoid duplicate effort.",
621
+ "If a subagent takes a long time, do not attempt to do its assigned job yourself while waiting. Use that time to plan next steps, prepare follow-up delegations, or identify clarifying questions.",
956
622
  ].join("\n"),
957
623
  ],
958
624
  [
959
- "bug_selection_guidelines",
625
+ "execution_contract",
960
626
  [
961
- "Use these default guidelines for deciding whether the author would appreciate the issue being flagged. More specific user, project, or file-level guidance overrides them.",
962
- "Flag an issue only when the original author would likely fix it if they knew about it.",
963
- "A finding should meaningfully impact accuracy, performance, security, or maintainability.",
964
- "A finding must be discrete and actionable, not a broad complaint about the whole codebase or a pile of related concerns.",
965
- "Do not demand rigor inconsistent with the rest of the repository; match the seriousness of existing code and project norms.",
966
- "Flag only bugs introduced by the current patch; do not flag pre-existing issues unless the patch makes them worse in a concrete way.",
967
- "Do not rely on unstated assumptions about author intent or codebase behavior.",
968
- "Speculation is insufficient: identify the code path, scenario, environment, or input that is provably affected.",
969
- "Do not flag intentional behavior changes as bugs unless they clearly violate the task or documented contract.",
970
- "Ignore trivial style unless it obscures meaning or violates documented standards in a way that affects correctness/security/maintainability.",
971
- "If no finding clears this bar, return an empty findings array, mark the patch correct, and set stop_review_loop true.",
627
+ "The required output format is a completion report, not the task itself.",
628
+ "Do not jump straight to the report. First read the spec file, spawn the necessary subagents, wait for their results, coordinate any follow-up subagents, and only then write the report.",
629
+ "A valid response must be grounded in actual subagent work: name the delegated work, summarize what each subagent did, and distinguish completed changes from recommendations or blockers.",
630
+ "If you cannot read the spec file, spawn subagents, or use subagents, treat that as a blocker and report it honestly instead of pretending the requested work was done.",
972
631
  ].join("\n"),
973
632
  ],
974
633
  [
975
- "comment_guidelines",
634
+ "subagent_tracking",
976
635
  [
977
- "Each finding title must start with a priority tag: [P0] drop-everything blocker, [P1] urgent next-cycle fix, [P2] normal fix, [P3] low-priority nice-to-have.",
978
- "Also include numeric priority: 0 for P0, 1 for P1, 2 for P2, 3 for P3; use null only if priority genuinely cannot be determined.",
979
- "The body must be one concise paragraph explaining why this is a bug and the exact scenario, environment, or inputs required for it to arise.",
980
- "Use a matter-of-fact, non-accusatory tone. Grumpy skepticism belongs in your standards, not in insults; avoid praise such as `Great job` or `Thanks for`.",
981
- "Keep code_location ranges as short as possible, ideally one line and never longer than 5-10 lines unless unavoidable.",
982
- "The code_location must overlap the diff/change under review.",
983
- "Use one finding per distinct issue. Do not generate a PR fix.",
984
- "Use suggestion blocks only for concrete replacement code and preserve exact leading whitespace if you include one.",
636
+ "Use the `todo` tool as your active control ledger for subagent work.",
637
+ "Before launching subagents, create todo items for each delegated task with enough detail to identify owner, purpose, and expected output.",
638
+ "Mark todo items in_progress when the corresponding subagent starts, append progress/results as subagents report back, and close them only after you have incorporated or explicitly rejected their result.",
639
+ "Keep pending, in_progress, blocked, and completed work accurate so you do not lose track of parallel subagents or unresolved follow-ups.",
640
+ "Before writing the final report, review the todo list and resolve every pending/in_progress item as completed, blocked, or deferred with an explanation.",
985
641
  ].join("\n"),
986
642
  ],
987
643
  [
988
- "how_many_findings",
644
+ "instructions",
989
645
  [
990
- "Return all findings the original author would definitely want to fix.",
991
- "If no such findings exist, return an empty findings array and mark the patch correct.",
992
- "Do not stop after the first qualifying finding; continue until every qualifying finding is listed.",
646
+ `Start by reading the spec file at ${specPath}.`,
647
+ "Perform the project_initialization_preflight before decomposing implementation work; complete or delegate required setup before implementation delegation when the checkout appears uninitialized.",
648
+ "Decompose the work into delegated subagent tasks based on that spec file.",
649
+ "Pass each subagent the relevant task, constraints, files, validation expectations, any prior review findings from the spec, and instructions to report implementation-note-worthy decisions or tradeoffs.",
650
+ "Coordinate subagent results into the smallest coherent set of changes that satisfies the spec.",
651
+ "Preserve existing architecture and repository conventions unless the spec explicitly justifies a change.",
652
+ "Run or delegate the most relevant validation commands available in the repository.",
653
+ `Before your final report, update the running implementation notes file at ${implementationNotesPath} with decisions, spec deviations, tradeoffs, blockers, and validation outcomes from this iteration.`,
654
+ "If blocked, describe the blocker and the safest partial state instead of inventing success.",
655
+ "Do not hide failures; reviewers need accurate status.",
993
656
  ].join("\n"),
994
657
  ],
995
658
  [
996
- "review_stage_contract",
659
+ "output_format",
997
660
  [
998
- "The structured review decision is only valid after you inspect the actual repository state and compare it against the stated baseline branch.",
999
- "Do not approve based solely on workflow stage summaries or prior agent reasoning.",
1000
- "The tool call is the final verdict after review work, not a shortcut around review work.",
661
+ "After subagents have done the work, return Markdown with headings:",
662
+ "1. Spec file the path you read",
663
+ "2. Delegations performed subagents spawned and what each completed",
664
+ "3. Changes made — concrete changes from subagent work, not intentions",
665
+ "4. Files touched",
666
+ "5. Validation run / recommended",
667
+ "6. Deferred work or blockers",
668
+ "7. Implementation notes — confirm the OS temp notes path was updated",
1001
669
  ].join("\n"),
1002
670
  ],
671
+ ]),
672
+ reads: [specPath, implementationNotesPath],
673
+ ...orchestratorModelConfig,
674
+ });
675
+ finalResult = orchestrator.text;
676
+
677
+ await ctx.task(`code-simplifier-${iteration}`, {
678
+ prompt: taggedPrompt([
1003
679
  [
1004
- "required_actions_before_tool_call",
680
+ "role",
1005
681
  [
1006
- "1. Identify the changed files or diff under review.",
1007
- "2. Read the relevant changed code and directly affected call sites/tests/configs.",
1008
- "3. Run or delegate focused validation when needed to resolve uncertainty.",
1009
- "4. If you cannot inspect or validate enough to approve safely, populate reviewer_error and set stop_review_loop=false.",
682
+ "You are an expert code simplification specialist focused on enhancing code clarity, consistency, and maintainability while preserving exact functionality.",
683
+ "Your expertise is applying project-specific best practices to simplify and improve recently modified code without altering behavior.",
684
+ "You prioritize readable, explicit code over overly compact or clever solutions.",
1010
685
  ].join("\n"),
1011
686
  ],
1012
687
  [
1013
- "evidence_expectations",
688
+ "objective",
689
+ `Refine recently modified code for this task while preserving exact behavior: ${prompt}`,
690
+ ],
691
+ ["current_iteration_context", "{previous}"],
692
+ [
693
+ "functionality_preservation",
1014
694
  [
1015
- "The overall_explanation should briefly mention what was inspected and what validation was run or why validation was not completed.",
1016
- "Every finding must cite a concrete changed location and affected scenario.",
695
+ "Never change what the code does only how it does it.",
696
+ "All original features, outputs, side effects, public APIs, persistence formats, tests, and user-visible behavior must remain intact.",
697
+ "If a simplification could change behavior, do not apply it; document why it was skipped.",
1017
698
  ].join("\n"),
1018
699
  ],
1019
700
  [
1020
- "structured_output_contract",
701
+ "project_standards",
1021
702
  [
1022
- "You have a structured-output tool named review_decision. Use it after your investigation and validation attempts.",
1023
- "The tool terminates the turn and provides the structured data; do not emit a separate final assistant response after calling it.",
1024
- "The review loop decides whether to stop only by parsing the JSON object returned by this tool; invalid JSON, missing fields, reviewer_error, or stop_review_loop=false are treated as not approved for safety.",
1025
- "Set stop_review_loop=true only when findings is empty, overall_correctness is patch is correct, and reviewer_error is null/omitted.",
1026
- "If you hit a reviewer/tool/validation error, still return the object with stop_review_loop=false and reviewer_error populated instead of pretending the patch is approved.",
1027
- "The JSON must match this schema exactly:",
1028
- "{",
1029
- ' "findings": [',
1030
- " {",
1031
- ' "title": "<≤ 80 chars, imperative, starts with [P0]/[P1]/[P2]/[P3]>",',
1032
- ' "body": "<one paragraph of valid Markdown explaining why this is a problem; cite files/lines/functions>",',
1033
- ' "confidence_score": <float 0.0-1.0>,',
1034
- ' "priority": <int 0-3 or null>,',
1035
- ' "code_location": {',
1036
- ' "absolute_file_path": "<absolute file path>",',
1037
- ' "line_range": {"start": <int>, "end": <int>}',
1038
- " }",
1039
- " }",
1040
- " ],",
1041
- ' "overall_correctness": "patch is correct" | "patch is incorrect",',
1042
- ' "overall_explanation": "<1-3 sentence explanation justifying the verdict>",',
1043
- ' "overall_confidence_score": <float 0.0-1.0>,',
1044
- ' "stop_review_loop": <boolean>,',
1045
- ' "reviewer_error": null | {"kind": "validation_unavailable" | "dependency_unavailable" | "tool_failure" | "reviewer_failure", "message": "<what failed>", "attempted_recovery": "<what you tried>"}',
1046
- "}",
703
+ "Read and follow repository guidance from AGENTS.md and/or CLAUDE.md when present.",
704
+ "Respect established module style, imports, file extensions, typing conventions, error-handling patterns, naming, tests, and architectural boundaries.",
705
+ "For this TypeScript workflow repo, preserve ESM .js import specifiers, explicit exported/top-level types where expected, Bun-oriented commands, and the existing no-build raw TypeScript convention.",
706
+ "Do not impose standards that conflict with local project guidance.",
1047
707
  ].join("\n"),
1048
708
  ],
1049
- ]);
1050
-
1051
- let reviews: WorkflowTaskResult[];
1052
- try {
1053
- reviews = await ctx.parallel(
1054
- [
1055
- {
1056
- name: "reviewer-a",
1057
- task: reviewPrompt,
1058
- ...reviewerModelConfig,
1059
- },
1060
- {
1061
- name: "reviewer-b",
1062
- task: reviewPrompt,
1063
- ...reviewerModelConfig,
1064
- },
1065
- ],
1066
- { task: prompt, failFast: false },
1067
- );
1068
- } catch (err) {
1069
- const message = err instanceof Error ? err.message : String(err);
1070
- reviews = [reviewerErrorResult(iteration, message)];
1071
- }
1072
-
1073
- approved =
1074
- reviews.length > 0 &&
1075
- reviews.every((review) => reviewApproved(review.text));
1076
- reviewReport = formatReview(reviews);
1077
- if (approved) break;
1078
- }
1079
-
1080
- const prResult = await ctx.task("pull-request", {
1081
- prompt: taggedPrompt([
1082
709
  [
1083
- "role",
1084
- "You are a careful release engineer preparing a pull request from the current workspace state.",
710
+ "clarity_improvements",
711
+ [
712
+ "Reduce unnecessary complexity, nesting, duplication, and incidental abstractions.",
713
+ "Improve readability with clear variable/function names and consolidated related logic.",
714
+ "Remove comments that merely restate obvious code, but keep comments that explain intent, constraints, or non-obvious trade-offs.",
715
+ "Avoid nested ternary operators; prefer switch statements or explicit if/else chains for multiple conditions.",
716
+ "Choose clarity over brevity: explicit code is often better than dense one-liners.",
717
+ ].join("\n"),
1085
718
  ],
1086
719
  [
1087
- "objective",
1088
- `Review the changes since the base branch \`${comparisonBaseBranch}\` and create a pull request if possible and credentials are available.`,
720
+ "balance_constraints",
721
+ [
722
+ "Do not over-simplify in ways that reduce clarity, debuggability, extensibility, or separation of concerns.",
723
+ "Do not combine too many concerns into one function or remove helpful abstractions that organize the code.",
724
+ "Do not prioritize fewer lines over maintainability.",
725
+ "Limit scope to code recently modified in this iteration/session unless the planner explicitly asked for broader cleanup.",
726
+ ].join("\n"),
1089
727
  ],
1090
728
  [
1091
- "workflow_context",
729
+ "stage_contract",
1092
730
  [
1093
- `Original task: ${prompt}`,
1094
- `Review loop approved: ${approved ? "yes" : "no"}`,
1095
- finalPlanPath
1096
- ? `Planner spec path: ${finalPlanPath}`
1097
- : "Planner spec path: unavailable",
1098
- `Implementation notes path: ${implementationNotesPath}`,
731
+ "This is an active code-refinement stage, not just a commentary stage.",
732
+ "Before producing the report, inspect the actual repository state and recently modified files from the planner/orchestrator context.",
733
+ "Apply safe simplifications with edit/write tools when clear behavior-preserving improvements exist. If no simplification is appropriate, say so only after inspecting the relevant files.",
1099
734
  ].join("\n"),
1100
735
  ],
1101
736
  [
1102
- "required_checks",
737
+ "required_actions_before_output",
1103
738
  [
1104
- "Start by inspecting `git status --short` so unstaged, staged, and untracked changes are all visible.",
1105
- `Review the patch against \`${comparisonBaseBranch}\` with working-tree-aware commands such as \`git diff ${comparisonBaseBranch}\` and \`git diff --cached ${comparisonBaseBranch}\`.`,
1106
- "If untracked files are present, inspect them directly before deciding whether they belong in the PR.",
1107
- "Read the implementation notes file and use its full contents as the body of a PR comment after the pull request exists.",
1108
- "Check the local Git identity with `git config user.name` and `git config user.email` so you can prefer the matching GitHub account when multiple accounts are logged in.",
1109
- "Check whether GitHub credentials are available with non-destructive commands such as `gh auth status` and `gh auth status --show-token-scopes` before attempting PR creation.",
1110
- "If multiple GitHub accounts or hosts are logged in, use the git config username/email as a heuristic to choose the most likely identity, but try each available credential/account and use the first one that can read the repository and create the PR.",
739
+ "1. Identify the concrete files/sections changed in this iteration.",
740
+ "2. Read those files before deciding whether to simplify.",
741
+ "3. Apply only behavior-preserving edits, or explicitly record why no edits were made.",
742
+ "4. Run or recommend focused validation tied to the touched files.",
1111
743
  ].join("\n"),
1112
744
  ],
1113
745
  [
1114
- "pr_policy",
746
+ "handoff_expectations",
747
+ "In the final report, distinguish edits actually applied from observations only. Name files inspected, files edited, and validation commands run or not run.",
748
+ ],
749
+ [
750
+ "process",
1115
751
  [
1116
- "Create a PR only if there are meaningful changes, a remote/branch target is available, credentials are available, and the current state is suitable for review.",
1117
- "If no logged-in account can access the repository or create the PR, do not fake success; report each credential/account tried, what failed, and provide the command the user can run later.",
1118
- "When you successfully create or update the PR, create a PR comment containing the implementation notes file contents as the last action of this workflow stage.",
1119
- "If PR creation is not possible, do not create a standalone comment elsewhere; include the implementation notes path and summary in your report instead.",
1120
- "If the review loop did not approve, prefer reporting the remaining blockers over creating a PR unless the changes are still intentionally ready for human review.",
1121
- "Do not make unrelated code edits in this phase. Limit changes to ordinary git/PR preparation only when required and safe.",
752
+ "Identify recently modified code sections from the iteration context and repository state.",
753
+ "Analyze opportunities to improve elegance, consistency, and maintainability.",
754
+ "Apply project-specific best practices while preserving behavior.",
755
+ "Run or recommend focused validation when appropriate.",
756
+ "Document only significant changes that affect understanding or future maintenance.",
1122
757
  ].join("\n"),
1123
758
  ],
1124
759
  [
1125
760
  "output_format",
1126
761
  [
1127
- "Return Markdown with headings:",
1128
- "1. Change review — summary of files and diff scope inspected",
1129
- "2. PR status — created PR URL, or why no PR was created",
1130
- "3. Implementation notes comment — whether the PR comment was created as the last action, or why it could not be created",
1131
- "4. Commands run — include exit status or clear outcome",
1132
- "5. Follow-up for the user — exact next steps if credentials or repository state blocked PR creation",
762
+ "Markdown with headings:",
763
+ "1. Simplifications applied",
764
+ "2. Behavior-preservation notes",
765
+ "3. Validation run / recommended",
766
+ "4. Skipped risky simplifications",
1133
767
  ].join("\n"),
1134
768
  ],
1135
769
  ]),
1136
- reads: finalPlanPath
1137
- ? [finalPlanPath, implementationNotesPath]
1138
- : [implementationNotesPath],
1139
- ...orchestratorModelConfig,
770
+ previous: [planner, orchestrator],
771
+ ...simplifierModelConfig,
1140
772
  });
1141
- finalPrReport = prResult.text;
1142
773
 
1143
- return {
1144
- result: finalResult,
1145
- plan: finalPlan,
1146
- plan_path: finalPlanPath,
1147
- implementation_notes_path: implementationNotesPath,
1148
- pr_report: finalPrReport,
1149
- approved,
1150
- iterations_completed: iterationsCompleted,
1151
- review_report: reviewReport,
1152
- };
774
+ const discovery = await ctx.parallel(
775
+ [
776
+ {
777
+ name: `infra-locate-${iteration}`,
778
+ task: taggedPrompt([
779
+ [
780
+ "role",
781
+ "You locate project infrastructure needed for patch review.",
782
+ ],
783
+ [
784
+ "objective",
785
+ `Find review-relevant infrastructure for the task: ${prompt}`,
786
+ ],
787
+ [
788
+ "stage_contract",
789
+ [
790
+ "This is a repository-discovery stage. Do not answer from assumptions or common project layouts.",
791
+ "Before output, inspect the repository for each infrastructure category: package scripts, test configs, CI workflows, generated artifacts, lint/typecheck setup, and release gates.",
792
+ "The table is a compact handoff after discovery, not a substitute for discovery.",
793
+ ].join("\n"),
794
+ ],
795
+ [
796
+ "instructions",
797
+ [
798
+ "Locate package scripts, test configs, CI workflows, generated artifacts, lint/typecheck setup, and release gates.",
799
+ "Search/read relevant files such as package manifests, CI workflow directories, test configs, lint/typecheck configs, build scripts, release configs, and generated-artifact markers.",
800
+ "Prefer exact file paths and commands.",
801
+ "Explain how each item should influence review or validation.",
802
+ "If a category does not exist, report `not found` and briefly name the paths or patterns checked.",
803
+ ].join("\n"),
804
+ ],
805
+ [
806
+ "output_format",
807
+ "Markdown table: Area | Path/command | Why it matters | Confidence.",
808
+ ],
809
+ ]),
810
+ ...explorerModelConfig,
811
+ },
812
+ {
813
+ name: `infra-analyze-${iteration}`,
814
+ task: taggedPrompt([
815
+ [
816
+ "role",
817
+ "You analyze integration risks in project infrastructure.",
818
+ ],
819
+ [
820
+ "objective",
821
+ `Assess infrastructure and changed-code risks for the task: ${prompt}`,
822
+ ],
823
+ [
824
+ "stage_contract",
825
+ [
826
+ "This stage analyzes actual repository coupling, not generic integration risks.",
827
+ "Before output, inspect the changed-code context plus relevant infrastructure/configuration files discovered or inferable from the repo.",
828
+ "Classify a risk as confirmed only when repository evidence shows the coupling; otherwise mark it speculative.",
829
+ ].join("\n"),
830
+ ],
831
+ [
832
+ "instructions",
833
+ [
834
+ "Identify hidden coupling with build, tests, linting, runtime config, release automation, or generated files.",
835
+ "Name the exact validations that would most efficiently detect regressions.",
836
+ "Separate confirmed risks from speculative risks.",
837
+ "Do not repeat generic review advice; ground findings in repository evidence.",
838
+ "Copy validation commands from actual repository scripts/configs when available; do not invent commands that are not supported by the repo.",
839
+ ].join("\n"),
840
+ ],
841
+ [
842
+ "evidence_expectations",
843
+ "Each confirmed risk must include concrete evidence: path, command, symbol, config key, script name, or file relationship.",
844
+ ],
845
+ [
846
+ "output_format",
847
+ "Markdown with sections: Confirmed risks, Speculative risks, Validation commands, Evidence.",
848
+ ],
849
+ ]),
850
+ ...explorerModelConfig,
851
+ },
852
+ {
853
+ name: `infra-patterns-${iteration}`,
854
+ task: taggedPrompt([
855
+ ["role", "You find repository patterns that a patch must follow."],
856
+ [
857
+ "objective",
858
+ `Extract conventions relevant to reviewing this task: ${prompt}`,
859
+ ],
860
+ [
861
+ "stage_contract",
862
+ [
863
+ "This is an evidence-gathering stage for repository conventions. Do not describe generic best practices.",
864
+ "Before output, find concrete examples in the repository that demonstrate conventions relevant to this task.",
865
+ "Read enough of each example to understand the convention before reporting it.",
866
+ ].join("\n"),
867
+ ],
868
+ [
869
+ "instructions",
870
+ [
871
+ "Find examples of build/test/style/release/architecture patterns the patch should mirror.",
872
+ "Search for nearby or analogous implementations, tests, configs, scripts, and docs.",
873
+ "Use concrete paths, commands, or symbols as evidence.",
874
+ "Highlight conventions that commonly cause subtle review failures.",
875
+ "If examples conflict, describe the conflict instead of forcing a single rule.",
876
+ "If no relevant example exists, state what was searched and that no pattern was found.",
877
+ ].join("\n"),
878
+ ],
879
+ [
880
+ "handoff_expectations",
881
+ "For every required convention or useful example, include the supporting path, command, symbol, or file relationship so reviewers can verify it quickly.",
882
+ ],
883
+ [
884
+ "output_format",
885
+ "Markdown with sections: Required conventions, Useful examples, Exceptions, Review implications.",
886
+ ],
887
+ ]),
888
+ ...explorerModelConfig,
889
+ },
890
+ ],
891
+ { task: prompt },
892
+ );
893
+
894
+ const discoveryContext = formatDiscovery(discovery);
895
+ const reviewPrompt = taggedPrompt([
896
+ [
897
+ "role",
898
+ [
899
+ "You are acting as a reviewer for a proposed code change made by another engineer.",
900
+ "Persona: a grumpy senior developer who has seen too many fragile patches. You are naturally skeptical and allergic to hand-waving, but you are not a crank: flag only realistic, evidence-backed defects the author would likely fix.",
901
+ "Be terse, concrete, and technically fair. Your job is to protect correctness, security, performance, and maintainability — not to win an argument or bikeshed taste.",
902
+ ].join("\n"),
903
+ ],
904
+ ["objective", `Review the current code delta for the task: ${prompt}`],
905
+ [
906
+ "comparison_baseline",
907
+ [
908
+ `The baseline branch for comparison is \`${comparisonBaseBranch}\`.`,
909
+ "Compare the current working tree against this baseline branch, not against previous workflow reasoning or expected loop progress.",
910
+ `Start with \`git status --short\`, then use working-tree-aware commands such as \`git diff ${comparisonBaseBranch}\` and \`git diff --cached ${comparisonBaseBranch}\` to identify changed tracked files; inspect untracked files from status directly.`,
911
+ ].join("\n"),
912
+ ],
913
+ ["infrastructure_discovery", discoveryContext],
914
+ [
915
+ "project_guidance",
916
+ [
917
+ "Use the repository's AGENTS.md and/or CLAUDE.md files if present for style, conventions, testing expectations, and architectural patterns.",
918
+ "Project-level norms override these general instructions when they are more specific.",
919
+ "Flag deviations only when they affect correctness, security, performance, or maintainability — not personal preference.",
920
+ "If validation requires dependencies or tools that are missing, download or install them using the repository-approved package manager/commands rather than bypassing, mocking, or skipping the verification solely because dependencies are absent.",
921
+ ].join("\n"),
922
+ ],
923
+ [
924
+ "validation_expectations",
925
+ [
926
+ "Inspect the actual diff/repository state rather than trusting stage summaries.",
927
+ "Run or delegate focused validation when it is necessary to distinguish a real bug from a hunch.",
928
+ "If tests or typechecks fail because dependencies are missing, install/download the missing dependencies with the repo's documented package manager instead of bypassing the check.",
929
+ "If validation cannot be completed after reasonable recovery, record the limitation in overall_explanation and reviewer_error; do not use missing dependencies as a reason to approve.",
930
+ ].join("\n"),
931
+ ],
932
+ [
933
+ "bug_selection_guidelines",
934
+ [
935
+ "Use these default guidelines for deciding whether the author would appreciate the issue being flagged. More specific user, project, or file-level guidance overrides them.",
936
+ "Flag an issue only when the original author would likely fix it if they knew about it.",
937
+ "A finding should meaningfully impact accuracy, performance, security, or maintainability.",
938
+ "A finding must be discrete and actionable, not a broad complaint about the whole codebase or a pile of related concerns.",
939
+ "Do not demand rigor inconsistent with the rest of the repository; match the seriousness of existing code and project norms.",
940
+ "Flag only bugs introduced by the current patch; do not flag pre-existing issues unless the patch makes them worse in a concrete way.",
941
+ "Do not rely on unstated assumptions about author intent or codebase behavior.",
942
+ "Speculation is insufficient: identify the code path, scenario, environment, or input that is provably affected.",
943
+ "Do not flag intentional behavior changes as bugs unless they clearly violate the task or documented contract.",
944
+ "Ignore trivial style unless it obscures meaning or violates documented standards in a way that affects correctness/security/maintainability.",
945
+ "If no finding clears this bar, return an empty findings array, mark the patch correct, and set stop_review_loop true.",
946
+ ].join("\n"),
947
+ ],
948
+ [
949
+ "comment_guidelines",
950
+ [
951
+ "Each finding title must start with a priority tag: [P0] drop-everything blocker, [P1] urgent next-cycle fix, [P2] normal fix, [P3] low-priority nice-to-have.",
952
+ "Also include numeric priority: 0 for P0, 1 for P1, 2 for P2, 3 for P3; use null only if priority genuinely cannot be determined.",
953
+ "The body must be one concise paragraph explaining why this is a bug and the exact scenario, environment, or inputs required for it to arise.",
954
+ "Use a matter-of-fact, non-accusatory tone. Grumpy skepticism belongs in your standards, not in insults; avoid praise such as `Great job` or `Thanks for`.",
955
+ "Keep code_location ranges as short as possible, ideally one line and never longer than 5-10 lines unless unavoidable.",
956
+ "The code_location must overlap the diff/change under review.",
957
+ "Use one finding per distinct issue. Do not generate a PR fix.",
958
+ "Use suggestion blocks only for concrete replacement code and preserve exact leading whitespace if you include one.",
959
+ ].join("\n"),
960
+ ],
961
+ [
962
+ "how_many_findings",
963
+ [
964
+ "Return all findings the original author would definitely want to fix.",
965
+ "If no such findings exist, return an empty findings array and mark the patch correct.",
966
+ "Do not stop after the first qualifying finding; continue until every qualifying finding is listed.",
967
+ ].join("\n"),
968
+ ],
969
+ [
970
+ "review_stage_contract",
971
+ [
972
+ "The structured review decision is only valid after you inspect the actual repository state and compare it against the stated baseline branch.",
973
+ "Do not approve based solely on workflow stage summaries or prior agent reasoning.",
974
+ "The tool call is the final verdict after review work, not a shortcut around review work.",
975
+ ].join("\n"),
976
+ ],
977
+ [
978
+ "required_actions_before_tool_call",
979
+ [
980
+ "1. Identify the changed files or diff under review.",
981
+ "2. Read the relevant changed code and directly affected call sites/tests/configs.",
982
+ "3. Run or delegate focused validation when needed to resolve uncertainty.",
983
+ "4. If you cannot inspect or validate enough to approve safely, populate reviewer_error and set stop_review_loop=false.",
984
+ ].join("\n"),
985
+ ],
986
+ [
987
+ "evidence_expectations",
988
+ [
989
+ "The overall_explanation should briefly mention what was inspected and what validation was run or why validation was not completed.",
990
+ "Every finding must cite a concrete changed location and affected scenario.",
991
+ ].join("\n"),
992
+ ],
993
+ [
994
+ "structured_output_contract",
995
+ [
996
+ "You have a structured-output tool named review_decision. Use it after your investigation and validation attempts.",
997
+ "The tool terminates the turn and provides the structured data; do not emit a separate final assistant response after calling it.",
998
+ "The review loop decides whether to stop only by parsing the JSON object returned by this tool; invalid JSON, missing fields, reviewer_error, or stop_review_loop=false are treated as not approved for safety.",
999
+ "Set stop_review_loop=true only when findings is empty, overall_correctness is patch is correct, and reviewer_error is null/omitted.",
1000
+ "If you hit a reviewer/tool/validation error, still return the object with stop_review_loop=false and reviewer_error populated instead of pretending the patch is approved.",
1001
+ "The JSON must match this schema exactly:",
1002
+ "{",
1003
+ ' "findings": [',
1004
+ " {",
1005
+ ' "title": "<≤ 80 chars, imperative, starts with [P0]/[P1]/[P2]/[P3]>",',
1006
+ ' "body": "<one paragraph of valid Markdown explaining why this is a problem; cite files/lines/functions>",',
1007
+ ' "confidence_score": <float 0.0-1.0>,',
1008
+ ' "priority": <int 0-3 or null>,',
1009
+ ' "code_location": {',
1010
+ ' "absolute_file_path": "<absolute file path>",',
1011
+ ' "line_range": {"start": <int>, "end": <int>}',
1012
+ " }",
1013
+ " }",
1014
+ " ],",
1015
+ ' "overall_correctness": "patch is correct" | "patch is incorrect",',
1016
+ ' "overall_explanation": "<1-3 sentence explanation justifying the verdict>",',
1017
+ ' "overall_confidence_score": <float 0.0-1.0>,',
1018
+ ' "stop_review_loop": <boolean>,',
1019
+ ' "reviewer_error": null | {"kind": "validation_unavailable" | "dependency_unavailable" | "tool_failure" | "reviewer_failure", "message": "<what failed>", "attempted_recovery": "<what you tried>"}',
1020
+ "}",
1021
+ ].join("\n"),
1022
+ ],
1023
+ ]);
1024
+
1025
+ let reviews: WorkflowTaskResult[];
1026
+ try {
1027
+ reviews = await ctx.parallel(
1028
+ [
1029
+ {
1030
+ name: "reviewer-a",
1031
+ task: reviewPrompt,
1032
+ ...reviewerModelConfig,
1033
+ },
1034
+ {
1035
+ name: "reviewer-b",
1036
+ task: reviewPrompt,
1037
+ ...reviewerModelConfig,
1038
+ },
1039
+ ],
1040
+ { task: prompt, failFast: false },
1041
+ );
1042
+ } catch (err) {
1043
+ const message = err instanceof Error ? err.message : String(err);
1044
+ reviews = [reviewerErrorResult(iteration, message)];
1045
+ }
1046
+
1047
+ approved =
1048
+ reviews.length > 0 &&
1049
+ reviews.every((review) => reviewApproved(review.text));
1050
+ reviewReport = formatReview(reviews);
1051
+ if (approved) break;
1052
+ }
1053
+
1054
+ const prResult = await ctx.task("pull-request", {
1055
+ prompt: taggedPrompt([
1056
+ [
1057
+ "role",
1058
+ "You are a careful release engineer preparing a pull request from the current workspace state.",
1059
+ ],
1060
+ [
1061
+ "objective",
1062
+ `Review the changes since the base branch \`${comparisonBaseBranch}\` and create a pull request if possible and credentials are available.`,
1063
+ ],
1064
+ [
1065
+ "workflow_context",
1066
+ [
1067
+ `Original task: ${prompt}`,
1068
+ `Review loop approved: ${approved ? "yes" : "no"}`,
1069
+ finalPlanPath
1070
+ ? `Planner spec path: ${finalPlanPath}`
1071
+ : "Planner spec path: unavailable",
1072
+ `Implementation notes path: ${implementationNotesPath}`,
1073
+ ].join("\n"),
1074
+ ],
1075
+ [
1076
+ "required_checks",
1077
+ [
1078
+ "Start by inspecting `git status --short` so unstaged, staged, and untracked changes are all visible.",
1079
+ `Review the patch against \`${comparisonBaseBranch}\` with working-tree-aware commands such as \`git diff ${comparisonBaseBranch}\` and \`git diff --cached ${comparisonBaseBranch}\`.`,
1080
+ "If untracked files are present, inspect them directly before deciding whether they belong in the PR.",
1081
+ "Read the implementation notes file and use its full contents as the body of a PR comment after the pull request exists.",
1082
+ "Check the local Git identity with `git config user.name` and `git config user.email` so you can prefer the matching GitHub account when multiple accounts are logged in.",
1083
+ "Check whether GitHub credentials are available with non-destructive commands such as `gh auth status` and `gh auth status --show-token-scopes` before attempting PR creation.",
1084
+ "If multiple GitHub accounts or hosts are logged in, use the git config username/email as a heuristic to choose the most likely identity, but try each available credential/account and use the first one that can read the repository and create the PR.",
1085
+ ].join("\n"),
1086
+ ],
1087
+ [
1088
+ "pr_policy",
1089
+ [
1090
+ "Create a PR only if there are meaningful changes, a remote/branch target is available, credentials are available, and the current state is suitable for review.",
1091
+ "If no logged-in account can access the repository or create the PR, do not fake success; report each credential/account tried, what failed, and provide the command the user can run later.",
1092
+ "When you successfully create or update the PR, create a PR comment containing the implementation notes file contents as the last action of this workflow stage.",
1093
+ "Ralph-created worktrees are detached HEAD checkouts. If you are preparing a PR from a detached HEAD, create and push a branch from the current HEAD, for example with `git checkout -b <branch>` or `git push origin HEAD:refs/heads/<branch>`, before opening the PR.",
1094
+ "Ralph does not remove git_worktree_dir automatically. Leave the worktree intact for retries or user recovery.",
1095
+ "If PR creation is not possible, do not create a standalone comment elsewhere; include the implementation notes path and summary in your report instead.",
1096
+ "If the review loop did not approve, prefer reporting the remaining blockers over creating a PR unless the changes are still intentionally ready for human review.",
1097
+ "Do not make unrelated code edits in this phase. Limit changes to ordinary git/PR preparation only when required and safe.",
1098
+ ].join("\n"),
1099
+ ],
1100
+ [
1101
+ "output_format",
1102
+ [
1103
+ "Return Markdown with headings:",
1104
+ "1. Change review — summary of files and diff scope inspected",
1105
+ "2. PR status — created PR URL, or why no PR was created",
1106
+ "3. Implementation notes comment — whether the PR comment was created as the last action, or why it could not be created",
1107
+ "4. Commands run — include exit status or clear outcome",
1108
+ "5. Follow-up for the user — exact next steps if credentials or repository state blocked PR creation",
1109
+ ].join("\n"),
1110
+ ],
1111
+ ]),
1112
+ reads: finalPlanPath
1113
+ ? [finalPlanPath, implementationNotesPath]
1114
+ : [implementationNotesPath],
1115
+ ...orchestratorModelConfig,
1116
+ });
1117
+ finalPrReport = prResult.text;
1118
+
1119
+ return {
1120
+ result: finalResult,
1121
+ plan: finalPlan,
1122
+ plan_path: finalPlanPath,
1123
+ implementation_notes_path: implementationNotesPath,
1124
+ pr_report: finalPrReport,
1125
+ approved,
1126
+ iterations_completed: iterationsCompleted,
1127
+ review_report: reviewReport,
1128
+ };
1129
+ }
1130
+
1131
+ export default defineWorkflow("ralph")
1132
+ .description(
1133
+ "Plan → orchestrate → simplify → parallel review loop with bounded iteration.",
1134
+ )
1135
+ .input("prompt", {
1136
+ type: "text",
1137
+ required: true,
1138
+ description: "The task or goal to plan, execute, and refine.",
1139
+ })
1140
+ .input("max_loops", {
1141
+ type: "number",
1142
+ default: DEFAULT_MAX_LOOPS,
1143
+ description: `Maximum plan/orchestrate/review iterations (default ${DEFAULT_MAX_LOOPS}).`,
1144
+ })
1145
+ .input("base_branch", {
1146
+ type: "string",
1147
+ default: "origin/main",
1148
+ description:
1149
+ "Branch reviewers compare the current code delta against (default origin/main).",
1150
+ })
1151
+ .input("git_worktree_dir", {
1152
+ type: "string",
1153
+ default: "",
1154
+ description:
1155
+ "Optional Git worktree path. Ralph must start inside a Git repo; absolute paths are used as-is, relative paths resolve from the repo root, existing Git worktrees from the invoking repository are reused/shared as-is, and missing paths are created from base_branch.",
1156
+ })
1157
+ .worktreeFromInputs({
1158
+ gitWorktreeDir: "git_worktree_dir",
1159
+ baseBranch: "base_branch",
1160
+ })
1161
+ .run(async (ctx) => {
1162
+ const workflowCtx = ctx as WorkflowRunContext<RalphInputs>;
1163
+ const workflowStartCwd = workflowCtx.cwd ?? process.cwd();
1164
+ const inputs = workflowCtx.inputs;
1165
+ const prompt = inputs.prompt ?? "";
1166
+ const maxLoops = positiveInteger(inputs.max_loops, DEFAULT_MAX_LOOPS);
1167
+ const comparisonBaseBranch = normalizeBranchInput(
1168
+ inputs.base_branch,
1169
+ "origin/main",
1170
+ );
1171
+ return await runRalphWorkflow(workflowCtx, {
1172
+ prompt,
1173
+ maxLoops,
1174
+ comparisonBaseBranch,
1175
+ workflowStartCwd,
1176
+ });
1153
1177
  })
1154
- .compile();
1178
+ .compile();