@bastani/atomic 0.8.14-0 → 0.8.15-0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/CHANGELOG.md +35 -0
  2. package/README.md +0 -8
  3. package/dist/builtin/intercom/package.json +1 -1
  4. package/dist/builtin/mcp/CHANGELOG.md +3 -0
  5. package/dist/builtin/mcp/index.ts +4 -8
  6. package/dist/builtin/mcp/package.json +1 -1
  7. package/dist/builtin/subagents/package.json +1 -1
  8. package/dist/builtin/subagents/skills/tmux/SKILL.md +220 -0
  9. package/dist/builtin/subagents/skills/tmux/scripts/find-sessions.sh +112 -0
  10. package/dist/builtin/subagents/skills/tmux/scripts/wait-for-text.sh +83 -0
  11. package/dist/builtin/web-access/package.json +1 -1
  12. package/dist/builtin/workflows/CHANGELOG.md +10 -1
  13. package/dist/builtin/workflows/README.md +3 -1
  14. package/dist/builtin/workflows/builtin/ralph.ts +222 -295
  15. package/dist/builtin/workflows/package.json +1 -1
  16. package/dist/builtin/workflows/src/extension/background-ui-adapter.ts +20 -11
  17. package/dist/builtin/workflows/src/extension/index.ts +1 -0
  18. package/dist/builtin/workflows/src/extension/status-writer.ts +18 -3
  19. package/dist/builtin/workflows/src/runs/background/runner.ts +8 -10
  20. package/dist/builtin/workflows/src/runs/foreground/executor.ts +484 -91
  21. package/dist/builtin/workflows/src/runs/foreground/stage-control-registry.ts +13 -2
  22. package/dist/builtin/workflows/src/runs/foreground/stage-runner.ts +41 -15
  23. package/dist/builtin/workflows/src/runs/shared/graph-inference.ts +31 -0
  24. package/dist/builtin/workflows/src/runs/shared/prompt-callsite.ts +98 -0
  25. package/dist/builtin/workflows/src/shared/persistence-restore.ts +3 -1
  26. package/dist/builtin/workflows/src/shared/persistence-session-entries.ts +4 -0
  27. package/dist/builtin/workflows/src/shared/store-types.ts +12 -1
  28. package/dist/builtin/workflows/src/shared/store.ts +77 -3
  29. package/dist/builtin/workflows/src/tui/graph-view.ts +17 -1
  30. package/dist/builtin/workflows/src/tui/prompt-card.ts +185 -30
  31. package/dist/builtin/workflows/src/tui/stage-chat-view.ts +386 -21
  32. package/docs/changelog.mdx +41 -14
  33. package/docs/docs.json +1 -0
  34. package/docs/extensions.md +19 -19
  35. package/docs/images/workflow-input-picker.png +0 -0
  36. package/docs/images/workflow-list.png +0 -0
  37. package/docs/index.md +33 -27
  38. package/docs/providers.md +2 -2
  39. package/docs/quickstart.md +15 -15
  40. package/docs/sdk.md +8 -8
  41. package/docs/sessions.md +5 -5
  42. package/docs/settings.md +27 -1
  43. package/docs/skills.md +2 -2
  44. package/docs/subagents.md +157 -0
  45. package/docs/usage.md +7 -7
  46. package/docs/windows.md +8 -0
  47. package/docs/workflows.md +62 -9
  48. package/package.json +2 -1
  49. package/docs/images/doom-extension.png +0 -0
  50. package/docs/images/exy.png +0 -3
@@ -7,16 +7,14 @@
7
7
  * iteration feeds review findings into the next planner with ctx.task().
8
8
  */
9
9
 
10
- import { mkdir, mkdtemp, writeFile } from "node:fs/promises";
10
+ import { mkdtemp, writeFile } from "node:fs/promises";
11
11
  import { tmpdir } from "node:os";
12
- import { dirname, extname, join } from "node:path";
12
+ import { dirname, join } from "node:path";
13
13
  import { defineWorkflow } from "../src/index.js";
14
14
  import type { WorkflowTaskResult } from "../src/shared/types.js";
15
15
 
16
16
  const DEFAULT_MAX_LOOPS = 10;
17
- const DEFAULT_SPEC_DIR = "specs";
18
17
  const IMPLEMENTATION_NOTES_FILENAME = "implementation-notes.md";
19
- const MAX_SPEC_SLUG_LENGTH = 80;
20
18
 
21
19
  type ReviewFinding = {
22
20
  readonly title: string;
@@ -47,6 +45,9 @@ type ReviewDecision = {
47
45
  readonly overall_correctness: "patch is correct" | "patch is incorrect";
48
46
  readonly overall_explanation: string;
49
47
  readonly overall_confidence_score: number;
48
+ readonly goal_oracle_satisfied: boolean;
49
+ readonly receipt_assessment: string;
50
+ readonly verification_remaining: string;
50
51
  readonly stop_review_loop: boolean;
51
52
  readonly reviewer_error?: ReviewerError | null;
52
53
  };
@@ -59,6 +60,9 @@ const reviewDecisionSchema = {
59
60
  "overall_correctness",
60
61
  "overall_explanation",
61
62
  "overall_confidence_score",
63
+ "goal_oracle_satisfied",
64
+ "receipt_assessment",
65
+ "verification_remaining",
62
66
  "stop_review_loop",
63
67
  ],
64
68
  properties: {
@@ -99,6 +103,9 @@ const reviewDecisionSchema = {
99
103
  },
100
104
  overall_explanation: { type: "string" },
101
105
  overall_confidence_score: { type: "number", minimum: 0, maximum: 1 },
106
+ goal_oracle_satisfied: { type: "boolean" },
107
+ receipt_assessment: { type: "string" },
108
+ verification_remaining: { type: "string" },
102
109
  stop_review_loop: { type: "boolean" },
103
110
  reviewer_error: {
104
111
  anyOf: [
@@ -148,75 +155,98 @@ const reviewDecisionTool = {
148
155
  },
149
156
  };
150
157
 
151
- const PLANNER_RFC_TEMPLATE = `
152
- # [Project Name] Technical Design Document / RFC
158
+ const GOAL_CONTRACT_TEMPLATE = `
159
+ # Goal Contract / Execution Brief
153
160
 
154
161
  | Document Metadata | Details |
155
162
  | ---------------------- | ------------------------------------------------------------------------------ |
156
163
  | Author(s) | !\`git config user.name\` |
157
- | Status | Draft (WIP) / In Review (RFC) / Approved / Implemented / Deprecated / Rejected |
164
+ | Status | Draft (WIP) / In Review (goal contract) / Approved / Implemented / Deprecated / Rejected |
158
165
  | Team / Owner | |
159
166
  | Created / Last Updated | |
160
167
 
161
- ## 1. Executive Summary
168
+ ## 1. Outcome
162
169
 
163
- ## 2. Context and Motivation
170
+ ## 2. Scope and Non-Goals
164
171
 
165
- ### 2.1 Current State
172
+ ## 3. Verification Oracle
166
173
 
167
- ### 2.2 The Problem
174
+ ## 4. Work Surface and Execution Loop
168
175
 
169
- ## 3. Goals and Non-Goals
176
+ ## 5. Proof and Review Criteria
170
177
 
171
- ### 3.1 Functional Goals
178
+ ## 6. Implementation Strategy
172
179
 
173
- ### 3.2 Non-Goals (Out of Scope)
180
+ ## 7. Context and Motivation
174
181
 
175
- ## 4. Proposed Solution (High-Level Design)
182
+ ### 7.1 Current State
176
183
 
177
- ### 4.1 System Architecture Diagram
184
+ ### 7.2 The Problem
185
+
186
+ ## 8. Bounded Work Slices
187
+
188
+ ## 9. Proposed Approach
189
+
190
+ ### 9.1 System Architecture Diagram
178
191
 
179
192
  Include a Mermaid system architecture diagram grounded in the actual components this work touches.
180
193
 
181
- ### 4.2 Architectural Pattern
194
+ ### 9.2 Architectural Pattern
182
195
 
183
- ### 4.3 Key Components
196
+ ### 9.3 Key Components
184
197
 
185
198
  | Component | Responsibility | Technology Stack | Justification |
186
199
  | --------- | -------------- | ---------------- | ------------- |
187
200
 
188
- ## 5. Detailed Design
201
+ ## 10. Implementation Notes
189
202
 
190
- ### 5.1 API Interfaces
203
+ ### 10.1 API Interfaces
191
204
 
192
- ### 5.2 Data Model / Schema
205
+ ### 10.2 Data Model / Schema
193
206
 
194
- ### 5.3 Algorithms and State Management
207
+ ### 10.3 Algorithms and State Management
195
208
 
196
- ## 6. Alternatives Considered
209
+ ## 11. Alternatives Considered
197
210
 
198
211
  | Option | Pros | Cons | Reason for Rejection |
199
212
  | ------ | ---- | ---- | -------------------- |
200
213
 
201
- ## 7. Cross-Cutting Concerns
214
+ ## 12. Cross-Cutting Concerns
202
215
 
203
- ### 7.1 Security and Privacy
216
+ ### 12.1 Security and Privacy
204
217
 
205
- ### 7.2 Observability Strategy
218
+ ### 12.2 Observability Strategy
206
219
 
207
- ### 7.3 Scalability and Capacity Planning
220
+ ### 12.3 Scalability and Capacity Planning
208
221
 
209
- ## 8. Migration, Rollout, and Testing
222
+ ## 13. Validation and Rollout
210
223
 
211
- ### 8.1 Deployment Strategy
224
+ ### 13.1 Deployment Strategy
212
225
 
213
- ### 8.2 Data Migration Plan
226
+ ### 13.2 Data Migration Plan
214
227
 
215
- ### 8.3 Test Plan
228
+ ### 13.3 Test Plan
216
229
 
217
- ## 9. Open Questions / Unresolved Issues
230
+ ## 14. Open Questions / Unresolved Issues
218
231
  `.trim();
219
232
 
233
+ const GOAL_OPERATING_LOOP =
234
+ "intent, verification oracle, work surface, execution loop, and proof";
235
+
236
+ const GOAL_METHOD_REFERENCE = [
237
+ "Maintain a concrete goal contract for the run: intent, verification oracle, work surface, execution loop, and proof.",
238
+ "Infer the owner outcome and a verifiable oracle from the user's task and repository evidence; do not ask the user unless the workflow is truly blocked.",
239
+ "Treat any user-supplied planning artifacts as supporting context, not as the primary success criterion.",
240
+ "Keep pressure on current evidence: the current worktree, artifacts, command output, tests, demos, generated files, and explicit human decisions are more authoritative than prior conversation summaries.",
241
+ "Never call the work complete because planning, discovery, task selection, or a substantial-looking diff exists; completion requires proof mapped back to the original owner outcome.",
242
+ ].join("\n");
243
+
244
+ const RECEIPT_EXPECTATIONS = [
245
+ "Every implementation, simplification, discovery, review, and audit stage should leave a receipt reviewers can inspect.",
246
+ "A useful receipt names what changed, files touched, commands or checks run with outcomes, artifacts produced, decisions made, blockers, residual risks, and the next safest action.",
247
+ "Receipts should explicitly say which part of the verification oracle they support or what verification remains.",
248
+ ].join("\n");
249
+
220
250
  type PromptSection = readonly [tag: string, content: string];
221
251
 
222
252
  function taggedPrompt(sections: readonly PromptSection[]): string {
@@ -245,60 +275,32 @@ function normalizeBranchInput(
245
275
  return looksLikeSafeGitRef ? trimmed : fallback;
246
276
  }
247
277
 
248
- function slugifySpecTopic(prompt: string): string {
249
- const slug = prompt
250
- .toLowerCase()
251
- .replace(/[^a-z0-9]+/g, "-")
252
- .replace(/^-+|-+$/g, "")
253
- .slice(0, MAX_SPEC_SLUG_LENGTH)
254
- .replace(/-+$/g, "");
255
- return slug.length > 0 ? slug : "plan";
256
- }
257
-
258
- function defaultSpecPath(prompt: string, now = new Date()): string {
259
- const date = now.toISOString().slice(0, 10);
260
- return join(DEFAULT_SPEC_DIR, `${date}-${slugifySpecTopic(prompt)}.md`);
261
- }
262
-
263
- function suffixedPath(path: string, suffix: number): string {
264
- const extension = extname(path);
265
- const stem = extension.length === 0 ? path : path.slice(0, -extension.length);
266
- return `${stem}-${suffix}${extension}`;
267
- }
268
-
269
- function isFileExistsError(error: unknown): boolean {
270
- return error instanceof Error && (error as { readonly code?: string }).code === "EEXIST";
271
- }
272
-
273
- async function writeSpecFile(path: string, content: string): Promise<string> {
274
- await mkdir(dirname(path), { recursive: true });
275
-
276
- for (let suffix = 0; ; suffix += 1) {
277
- const candidate = suffix === 0 ? path : suffixedPath(path, suffix + 1);
278
- try {
279
- await writeFile(candidate, content.endsWith("\n") ? content : `${content}\n`, {
280
- encoding: "utf8",
281
- flag: "wx",
282
- });
283
- return candidate;
284
- } catch (error) {
285
- if (isFileExistsError(error)) continue;
286
- throw error;
287
- }
288
- }
289
- }
290
-
291
278
  async function createImplementationNotesFile(prompt: string): Promise<string> {
292
- const notesDir = await mkdtemp(join(tmpdir(), "atomic-ralph-notes-"));
279
+ const notesDir = await mkdtemp(join(tmpdir(), "atomic-goal-notes-"));
293
280
  const notesPath = join(notesDir, IMPLEMENTATION_NOTES_FILENAME);
294
281
  const initialNotes = [
295
282
  "# Implementation Notes",
296
283
  "",
297
284
  `Task: ${prompt || "(empty prompt)"}`,
298
285
  "",
299
- "## Running Notes",
286
+ "## Goal Charter",
287
+ "",
288
+ "- Outcome: inferred by the planner/orchestrator from the user task and repository evidence.",
289
+ "- Scope: record allowed changes and explicit non-goals as they become clear.",
290
+ "- Oracle: record the observable signal that proves the owner outcome is true.",
291
+ `- Execution contract: ${GOAL_OPERATING_LOOP}`,
292
+ "- Proof: collect receipts that map implementation and validation back to the oracle.",
293
+ "",
294
+ "## Work Surface State",
300
295
  "",
301
- "- Record implementation decisions, deviations from the spec, tradeoffs, blockers, validation notes, and anything else the user should know.",
296
+ "- Active work: none recorded yet.",
297
+ "- Blocked work: none recorded yet.",
298
+ "- Completed work: none recorded yet.",
299
+ "- Verification status: no receipts yet.",
300
+ "",
301
+ "## Receipts",
302
+ "",
303
+ "- Record implementation decisions, deviations from the goal contract, tradeoffs, blockers, validation notes, artifacts, and anything else the user should know.",
302
304
  ].join("\n");
303
305
  await writeFile(notesPath, `${initialNotes}\n`, {
304
306
  encoding: "utf8",
@@ -320,6 +322,9 @@ function parseReviewDecision(text: string): ReviewDecision | undefined {
320
322
  if (typeof parsed.stop_review_loop !== "boolean") return undefined;
321
323
  if (typeof parsed.overall_explanation !== "string") return undefined;
322
324
  if (typeof parsed.overall_confidence_score !== "number") return undefined;
325
+ if (typeof parsed.goal_oracle_satisfied !== "boolean") return undefined;
326
+ if (typeof parsed.receipt_assessment !== "string") return undefined;
327
+ if (typeof parsed.verification_remaining !== "string") return undefined;
323
328
  return parsed as ReviewDecision;
324
329
  } catch {
325
330
  return undefined;
@@ -332,6 +337,7 @@ function reviewApproved(text: string): boolean {
332
337
  return (
333
338
  decision.stop_review_loop === true &&
334
339
  decision.overall_correctness === "patch is correct" &&
340
+ decision.goal_oracle_satisfied === true &&
335
341
  decision.findings.length === 0 &&
336
342
  decision.reviewer_error == null
337
343
  );
@@ -347,6 +353,10 @@ function reviewerErrorResult(
347
353
  overall_explanation:
348
354
  "Reviewer execution failed, so the review loop cannot safely approve this iteration.",
349
355
  overall_confidence_score: 0,
356
+ goal_oracle_satisfied: false,
357
+ receipt_assessment:
358
+ "No reviewer receipt could be produced because reviewer execution failed.",
359
+ verification_remaining: "Recover reviewer execution and re-run oracle validation.",
350
360
  stop_review_loop: false,
351
361
  reviewer_error: {
352
362
  kind: "reviewer_failure",
@@ -362,19 +372,6 @@ function reviewerErrorResult(
362
372
  };
363
373
  }
364
374
 
365
- function discoveryContextLabel(name: string | undefined): string {
366
- if (name?.startsWith("infra-locate-")) return "Infrastructure locator";
367
- if (name?.startsWith("infra-analyze-")) return "Infrastructure analyzer";
368
- if (name?.startsWith("infra-patterns-")) return "Infrastructure pattern finder";
369
- return "Infrastructure discovery";
370
- }
371
-
372
- function formatDiscovery(results: readonly WorkflowTaskResult[]): string {
373
- return results
374
- .map((result) => `### ${discoveryContextLabel(result.name)}\n\n${result.text}`)
375
- .join("\n\n---\n\n");
376
- }
377
-
378
375
  function formatReview(results: readonly WorkflowTaskResult[]): string {
379
376
  return results
380
377
  .map((result) => `### ${result.name}\n\n${result.text}`)
@@ -417,6 +414,7 @@ export default defineWorkflow("ralph")
417
414
  let finalResult = "";
418
415
  let finalPrReport = "";
419
416
  const implementationNotesPath = await createImplementationNotesFile(prompt);
417
+ const goalContractPath = join(dirname(implementationNotesPath), "goal-contract.md");
420
418
  let approved = false;
421
419
  let iterationsCompleted = 0;
422
420
 
@@ -483,18 +481,6 @@ export default defineWorkflow("ralph")
483
481
  customTools: [reviewDecisionTool],
484
482
  };
485
483
 
486
- let explorerModelConfig = {
487
- model: "openai/gpt-5.4-mini",
488
- fallbackModels: [
489
- "openai-codex/gpt-5.4-mini",
490
- "github-copilot/gpt-5.4-mini",
491
- "anthropic/claude-haiku-4-5",
492
- "github-copilot/claude-haiku-4.5",
493
- ],
494
- thinkingLevel: "low" as const,
495
- tools: noAskQuestionToolSet,
496
- };
497
-
498
484
  for (let iteration = 1; iteration <= maxLoops; iteration += 1) {
499
485
  iterationsCompleted = iteration;
500
486
 
@@ -502,19 +488,21 @@ export default defineWorkflow("ralph")
502
488
  prompt: taggedPrompt([
503
489
  [
504
490
  "role",
505
- "You are a technical architect. Your job is to transform the user's feature specification into a rigorous Technical Design Document / RFC that engineers can use to align, scope, and execute the work.",
491
+ "You are a technical architect. Your job is to transform the user's task into a goal charter, verification oracle, review criteria, and supporting goal contract that engineers can use to execute against evidence.",
506
492
  ],
493
+ ["goal_framework", GOAL_METHOD_REFERENCE],
507
494
  [
508
495
  "critical_deliverable",
509
496
  [
510
- "Your final output is a filled-in RFC rendered as markdown text.",
511
- "Render the RFC Template in this prompt with every section populated by feature-specific content drawn from the user's specification and your codebase investigation.",
512
- "Do not implement code changes in this stage; this stage only investigates and authors the RFC.",
497
+ "Your final output is a filled-in goal contract rendered as markdown text, with explicit outcome, scope, verification oracle, work surface, and proof sections.",
498
+ "Render the goal contract template in this prompt with every section populated by feature-specific content drawn from the user's task and your codebase investigation.",
499
+ "The goal contract artifact supports implementation, but the primary success criterion is whether receipts and verification prove the inferred owner outcome.",
500
+ "Do not implement code changes in this stage; this stage only investigates, infers the verification contract, and authors the goal contract.",
513
501
  ].join("\n"),
514
502
  ],
515
503
  [
516
504
  "task",
517
- `Plan iteration ${iteration}/${maxLoops} for this user specification:\n${prompt}`,
505
+ `Plan iteration ${iteration}/${maxLoops} for this user task:\n${prompt}`,
518
506
  ],
519
507
  [
520
508
  "previous_review_findings",
@@ -523,20 +511,20 @@ export default defineWorkflow("ralph")
523
511
  : "No prior review findings; this is the first iteration.",
524
512
  ],
525
513
  [
526
- "input_spec_files",
514
+ "input_goal_contract_files",
527
515
  [
528
- "If the user specification is a file path instead of raw prose, read that file and use it as source material for the RFC.",
529
- "Still author the RFC normally; do not output only a forwarded path.",
516
+ "If the user task is a file path instead of raw prose, read that file and use it as source material for the goal contract.",
517
+ "Still author the goal contract normally; do not output only a forwarded path.",
530
518
  ].join("\n"),
531
519
  ],
532
520
  [
533
521
  "investigation_phase",
534
522
  [
535
- "Before drafting, read the specification carefully and identify the concrete problem, success criteria, hard constraints, and non-goals.",
536
- "Survey the codebase using file/search tools such as read plus grep/rg/find/glob-style shell commands to ground the RFC in current architecture.",
523
+ "Before drafting, read the task carefully and infer the concrete goal contract: outcome, scope, non-goals, verification oracle, work surface, proof expectations, and review criteria tied to the oracle.",
524
+ "Survey the codebase using file/search tools such as read plus grep/rg/find/glob-style shell commands to ground the goal contract in current architecture.",
537
525
  "Name concrete services, modules, files, tests, data models, APIs, CLIs, config files, and external integrations this work will touch.",
538
526
  "Capture metadata with bash: `git config user.name` for Author(s), and `date '+%Y-%m-%d'` for Created / Last Updated.",
539
- "Look for prior art: existing RFCs, ADRs, README files, specs, docs, tests, or code comments that explain why the current state exists.",
527
+ "Look for prior art: existing goal contracts, ADRs, README files, plans, docs, tests, or code comments that explain why the current state exists.",
540
528
  ].join("\n"),
541
529
  ],
542
530
  [
@@ -545,37 +533,40 @@ export default defineWorkflow("ralph")
545
533
  "Be specific: `src/server/auth.ts:42` beats `the auth layer`.",
546
534
  "Trade-offs over conclusions: Alternatives Considered must include at least two real alternatives with honest pros, cons, and rejection reasons.",
547
535
  "Non-goals matter: explicitly exclude work that is out of scope to prevent scope creep.",
548
- "Diagrams are load-bearing: Section 4.1 must include a Mermaid system architecture diagram grounded in real components.",
549
- "Surface open questions in Section 9 with owner placeholders such as `[OWNER: infra team]`; do not paper over uncertainty.",
536
+ "Diagrams are load-bearing when architecture changes are involved: include a Mermaid system architecture diagram grounded in real components in Section 9.1; for non-architecture work, state why no diagram is needed.",
537
+ "Surface open questions in Section 14 with owner placeholders such as `[OWNER: infra team]`; do not paper over uncertainty, but make the workflow autonomous by choosing safe defaults and verifiable assumptions when possible.",
550
538
  "Match depth to stakes: a small refactor can be concise, but every template section header must remain present.",
551
539
  "If prior review findings are present, explicitly address each finding or explain why it is obsolete.",
540
+ "For Sections 1-5, include review criteria tied to the oracle, not document-completeness criteria.",
552
541
  ].join("\n"),
553
542
  ],
554
543
  [
555
544
  "stage_contract",
556
545
  [
557
- "This stage is investigation-first RFC authoring. The RFC is only valid if it is grounded in repository inspection performed during this stage.",
558
- "Do not fill the template from generic architecture guesses. Before writing the final RFC, inspect relevant code, docs, tests, configs, and prior design material.",
546
+ "This stage is investigation-first goal-charter and goal contract authoring. The goal contract is only valid if it is grounded in repository inspection performed during this stage.",
547
+ "Do not fill the template from generic architecture guesses. Before writing the final goal contract, inspect relevant code, docs, tests, configs, and prior design material.",
559
548
  "Treat the output format as the report after investigation, not a substitute for investigation.",
549
+ "Treat the goal contract as supporting context rather than the primary success criterion; success is receipt-backed satisfaction of the verification oracle.",
560
550
  ].join("\n"),
561
551
  ],
562
552
  [
563
553
  "evidence_expectations",
564
554
  [
565
- "Every major design claim should be traceable to concrete evidence: file paths, symbols, commands, docs, tests, configs, or prior RFCs.",
566
- "Include those concrete references inside the RFC sections where they support the design.",
567
- "If expected evidence cannot be found, say so in the relevant RFC section or Open Questions rather than papering over the gap.",
555
+ "Every major design claim should be traceable to concrete evidence: file paths, symbols, commands, docs, tests, configs, or prior goal contracts.",
556
+ "Include those concrete references inside the goal contract sections where they support the design.",
557
+ "For the verification oracle, name the observable proof signal: passing tests, browser walkthrough, generated artifact, benchmark, migration result, demo transcript, source-backed answer, or explicit human decision.",
558
+ "If expected evidence cannot be found, say so in the relevant goal contract section or Open Questions rather than papering over the gap.",
568
559
  ].join("\n"),
569
560
  ],
570
561
  [
571
562
  "output_discipline",
572
563
  [
573
- "Render the RFC Template exactly as the final document structure: preserve every header and the metadata table.",
574
- "Replace instructional placeholders with real, feature-specific content; do not leave template guidance in the final RFC.",
575
- "Output nothing after the RFC: no meta-commentary, no summary of what you wrote, no implementation log.",
564
+ "Render the goal contract template exactly as the final document structure: preserve every header and the metadata table.",
565
+ "Replace instructional placeholders with real, feature-specific content; do not leave template guidance in the final goal contract.",
566
+ "Output nothing after the goal contract: no meta-commentary, no summary of what you wrote, no implementation log.",
576
567
  ].join("\n"),
577
568
  ],
578
- ["rfc_template", PLANNER_RFC_TEMPLATE],
569
+ ["goal_contract_template", GOAL_CONTRACT_TEMPLATE],
579
570
  ]),
580
571
  ...(reviewReport
581
572
  ? { previous: { name: "review-report", text: reviewReport } }
@@ -583,8 +574,11 @@ export default defineWorkflow("ralph")
583
574
  ...plannerModelConfig,
584
575
  });
585
576
  finalPlan = planner.text;
586
- const specPath = await writeSpecFile(defaultSpecPath(prompt), planner.text);
587
- finalPlanPath = specPath;
577
+ await writeFile(goalContractPath, planner.text.endsWith("\n") ? planner.text : `${planner.text}\n`, {
578
+ encoding: "utf8",
579
+ flag: "w",
580
+ });
581
+ finalPlanPath = goalContractPath;
588
582
 
589
583
  const orchestrator = await ctx.task(`orchestrator-${iteration}`, {
590
584
  prompt: taggedPrompt([
@@ -596,24 +590,41 @@ export default defineWorkflow("ralph")
596
590
  "objective",
597
591
  `Implement iteration ${iteration}/${maxLoops} for the task: ${prompt}`,
598
592
  ],
593
+ ["goal_framework", GOAL_METHOD_REFERENCE],
599
594
  [
600
- "spec_file",
595
+ "goal_contract_file",
601
596
  [
602
- `The technical specification for this iteration was written to: ${specPath}`,
603
- "Read this file before delegating or implementing anything.",
604
- "Do not rely on an inline planner transcript; the spec file is the authoritative plan for this iteration.",
597
+ `The goal contract for this iteration was written to: ${goalContractPath}`,
598
+ "Read this file before delegating or implementing anything, especially the outcome, scope, verification oracle, work surface, and proof sections.",
599
+ "Do not rely on an inline planner transcript; the goal contract file is the authoritative supporting plan for this iteration.",
600
+ "The goal contract is not the finish line: the finish line is receipt-backed proof that the verification oracle is satisfied.",
605
601
  ].join("\n"),
606
602
  ],
607
603
  [
608
604
  "implementation_notes",
609
605
  [
610
606
  `Keep a running Markdown implementation notes file at this OS temp directory path: ${implementationNotesPath}`,
611
- "The file has already been initialized for this workflow run; update it while you implement the spec.",
612
- "Record decisions you had to make that were not in the spec, things you had to change from the spec, tradeoffs you had to make, blockers, validation outcomes, and anything else the user should know.",
613
- "Ask delegated subagents to report any notes-worthy decisions or tradeoffs back to you, then consolidate them into this file before your final report.",
607
+ "The file has already been initialized for this workflow run; update it while you implement the goal contract.",
608
+ "Maintain the Goal Charter, Work Surface State, and Receipts sections while you implement.",
609
+ "Record active work, blocked work, completed work, verification status, decisions you had to make that were not in the goal contract, things you had to change from the goal contract, tradeoffs you had to make, blockers, validation outcomes, and anything else the user should know.",
610
+ "Ask delegated subagents to report receipts and any notes-worthy decisions or tradeoffs back to you, then consolidate them into this file before your final report.",
614
611
  "Do not include secrets, credentials, tokens, or unrelated environment details in the notes file.",
615
612
  ].join("\n"),
616
613
  ],
614
+ [
615
+ "project_initialization_preflight",
616
+ [
617
+ "Before normal implementation delegation, determine whether this checkout appears initialized for its actual language, framework, and build system.",
618
+ "Do not rely on hard-coded assumptions about JavaScript, TypeScript, Python, Rust, Go, Java, mobile, or any other ecosystem. Infer the project type and setup requirements from repository evidence.",
619
+ "Inspect source layout, setup docs, package/build manifests, lockfiles, toolchain files, generated-artifact conventions, CI workflows, workflow configuration, and package scripts or equivalent task definitions.",
620
+ "Look for evidence that dependencies, generated files, local toolchains, submodules, codegen outputs, or other project-specific initialization artifacts are missing for this checkout.",
621
+ "When repository evidence shows missing initialization, run or delegate the appropriate documented setup command before implementation work.",
622
+ "You are responsible for initializing the checkout when setup commands are documented; missing dependencies, generated files, or local toolchains are setup work, not user handoff work.",
623
+ "Once setup succeeds, continue normal implementation orchestration. Do not treat missing dependencies or generated setup artifacts in a fresh worktree as implementation failures.",
624
+ "If setup requirements cannot be determined confidently, delegate a focused discovery task before implementation instead of guessing.",
625
+ "If setup remains blocked after evidence-based discovery and setup attempts, report the blocker with commands tried and the exact evidence needed to continue.",
626
+ ].join("\n"),
627
+ ],
617
628
  [
618
629
  "delegation_policy",
619
630
  [
@@ -621,7 +632,8 @@ export default defineWorkflow("ralph")
621
632
  "All non-trivial operations must be delegated to subagents via the `subagent` tool before you claim progress.",
622
633
  "Delegate codebase understanding, impact analysis, and implementation research to codebase-locator, codebase-analyzer, and pattern-finder style subagents when available.",
623
634
  "Delegate shell-heavy work — especially commands likely to produce lots of output, log digging, CLI investigation, and broad grep/find exploration — to subagents that can run those commands rather than doing it in this orchestrator context.",
624
- "Delegate implementation edits to a focused subagent with clear files, constraints, and validation expectations; do not merely describe the edits yourself.",
635
+ "Delegate implementation edits to a focused subagent with clear files, constraints, validation expectations, and the receipts it must return; do not merely describe the edits yourself.",
636
+ "Choose the largest safe useful slice for each write delegation: safe means bounded, explicit, verified, and reversible, not tiny.",
625
637
  "Use separate subagents for separate tasks, and launch independent subagents in parallel when useful.",
626
638
  "Do not split highly overlapping tasks across multiple subagents; consolidate overlapping work into one focused delegation to avoid duplicate effort.",
627
639
  "If a subagent takes a long time, do not attempt to do its assigned job yourself while waiting. Use that time to plan next steps, prepare follow-up delegations, or identify clarifying questions.",
@@ -631,9 +643,9 @@ export default defineWorkflow("ralph")
631
643
  "execution_contract",
632
644
  [
633
645
  "The required output format is a completion report, not the task itself.",
634
- "Do not jump straight to the report. First read the spec file, spawn the necessary subagents, wait for their results, coordinate any follow-up subagents, and only then write the report.",
635
- "A valid response must be grounded in actual subagent work: name the delegated work, summarize what each subagent did, and distinguish completed changes from recommendations or blockers.",
636
- "If you cannot read the spec file, spawn subagents, or use subagents, treat that as a blocker and report it honestly instead of pretending the requested work was done.",
646
+ "Do not jump straight to the report. First read the goal contract file, spawn the necessary subagents, wait for their results, coordinate any follow-up subagents, and only then write the report.",
647
+ "A valid response must be grounded in actual subagent work: name the delegated work, summarize what each subagent did, preserve its receipt, and distinguish completed changes from recommendations or blockers.",
648
+ "If you cannot read the goal contract file, spawn subagents, or use subagents, treat that as a blocker and report it honestly instead of pretending the requested work was done.",
637
649
  ].join("\n"),
638
650
  ],
639
651
  [
@@ -641,22 +653,23 @@ export default defineWorkflow("ralph")
641
653
  [
642
654
  "Use the `todo` tool as your active control ledger for subagent work.",
643
655
  "Before launching subagents, create todo items for each delegated task with enough detail to identify owner, purpose, and expected output.",
644
- "Mark todo items in_progress when the corresponding subagent starts, append progress/results as subagents report back, and close them only after you have incorporated or explicitly rejected their result.",
645
- "Keep pending, in_progress, blocked, and completed work accurate so you do not lose track of parallel subagents or unresolved follow-ups.",
656
+ "Mark todo items in_progress when the corresponding subagent starts, append progress/results/receipts as subagents report back, and close them only after you have incorporated or explicitly rejected their result.",
657
+ "Keep pending, in_progress, blocked, completed, and verification status accurate so you do not lose track of parallel subagents or unresolved follow-ups.",
646
658
  "Before writing the final report, review the todo list and resolve every pending/in_progress item as completed, blocked, or deferred with an explanation.",
647
659
  ].join("\n"),
648
660
  ],
649
661
  [
650
662
  "instructions",
651
663
  [
652
- `Start by reading the spec file at ${specPath}.`,
653
- "Decompose the work into delegated subagent tasks based on that spec file.",
654
- "Pass each subagent the relevant task, constraints, files, validation expectations, any prior review findings from the spec, and instructions to report implementation-note-worthy decisions or tradeoffs.",
655
- "Coordinate subagent results into the smallest coherent set of changes that satisfies the spec.",
656
- "Preserve existing architecture and repository conventions unless the spec explicitly justifies a change.",
664
+ `Start by reading the goal contract file at ${goalContractPath}.`,
665
+ "Perform the project_initialization_preflight before decomposing implementation work; complete or delegate required setup before implementation delegation when the checkout appears uninitialized.",
666
+ "Decompose the work into delegated subagent tasks based on that goal contract file.",
667
+ "Pass each subagent the relevant task, constraints, files, validation expectations, verification oracle, any prior review findings from the goal contract, and instructions to return a receipt: changed files, checks run, artifacts, decisions, blockers, residual risks, and what remains to verify.",
668
+ "Coordinate subagent results into the largest safe useful slice that advances the owner outcome and remains reversible and verifiable.",
669
+ "Preserve existing architecture and repository conventions unless the goal contract explicitly justifies a change.",
657
670
  "Run or delegate the most relevant validation commands available in the repository.",
658
- `Before your final report, update the running implementation notes file at ${implementationNotesPath} with decisions, spec deviations, tradeoffs, blockers, and validation outcomes from this iteration.`,
659
- "If blocked, describe the blocker and the safest partial state instead of inventing success.",
671
+ `Before your final report, update the running implementation notes file at ${implementationNotesPath} with the current Goal Charter, Work Surface State, receipts, decisions, goal-contract deviations, tradeoffs, blockers, and validation outcomes from this iteration.`,
672
+ "If a specific slice is blocked, record that blocker and continue adjacent safe local work that advances the full goal when possible; do not treat one blocked slice as a completed goal.",
660
673
  "Do not hide failures; reviewers need accurate status.",
661
674
  ].join("\n"),
662
675
  ],
@@ -664,17 +677,20 @@ export default defineWorkflow("ralph")
664
677
  "output_format",
665
678
  [
666
679
  "After subagents have done the work, return Markdown with headings:",
667
- "1. Spec file — the path you read",
668
- "2. Delegations performedsubagents spawned and what each completed",
669
- "3. Changes madeconcrete changes from subagent work, not intentions",
670
- "4. Files touched",
671
- "5. Validation run / recommended",
672
- "6. Deferred work or blockers",
673
- "7. Implementation notes — confirm the OS temp notes path was updated",
680
+ "1. Goal contract file — the path you read",
681
+ "2. Goal contractthe inferred outcome, scope, verification oracle, and proof loop used",
682
+ "3. Work surface state active, blocked, completed, and verification status",
683
+ "4. Delegations performed — subagents spawned and what each completed",
684
+ "5. Receipts concrete evidence from each stage, including changed files, checks, artifacts, decisions, and risks",
685
+ "6. Changes made — concrete changes from subagent work, not intentions",
686
+ "7. Files touched",
687
+ "8. Validation run / recommended — map each check to the verification oracle",
688
+ "9. Deferred work or blockers",
689
+ "10. Implementation notes — confirm the OS temp notes path was updated",
674
690
  ].join("\n"),
675
691
  ],
676
692
  ]),
677
- reads: [specPath, implementationNotesPath],
693
+ reads: [goalContractPath, implementationNotesPath],
678
694
  ...orchestratorModelConfig,
679
695
  });
680
696
  finalResult = orchestrator.text;
@@ -691,8 +707,9 @@ export default defineWorkflow("ralph")
691
707
  ],
692
708
  [
693
709
  "objective",
694
- `Refine recently modified code for this task while preserving exact behavior: ${prompt}`,
710
+ `Refine recently modified code for this task while preserving exact behavior and the verification oracle: ${prompt}`,
695
711
  ],
712
+ ["goal_framework", GOAL_METHOD_REFERENCE],
696
713
  ["current_iteration_context", "{previous}"],
697
714
  [
698
715
  "functionality_preservation",
@@ -749,7 +766,10 @@ export default defineWorkflow("ralph")
749
766
  ],
750
767
  [
751
768
  "handoff_expectations",
752
- "In the final report, distinguish edits actually applied from observations only. Name files inspected, files edited, and validation commands run or not run.",
769
+ [
770
+ "In the final report, distinguish edits actually applied from observations only. Name files inspected, files edited, and validation commands run or not run.",
771
+ "Produce a receipt that maps simplifications and validation back to the verification oracle or explicitly says no oracle-relevant simplification was needed.",
772
+ ].join("\n"),
753
773
  ],
754
774
  [
755
775
  "process",
@@ -766,9 +786,10 @@ export default defineWorkflow("ralph")
766
786
  [
767
787
  "Markdown with headings:",
768
788
  "1. Simplifications applied",
769
- "2. Behavior-preservation notes",
770
- "3. Validation run / recommended",
771
- "4. Skipped risky simplifications",
789
+ "2. Receipt — files inspected/edited, checks run, artifacts, and oracle relevance",
790
+ "3. Behavior-preservation notes",
791
+ "4. Validation run / recommended",
792
+ "5. Skipped risky simplifications",
772
793
  ].join("\n"),
773
794
  ],
774
795
  ]),
@@ -776,130 +797,6 @@ export default defineWorkflow("ralph")
776
797
  ...simplifierModelConfig,
777
798
  });
778
799
 
779
- const discovery = await ctx.parallel(
780
- [
781
- {
782
- name: `infra-locate-${iteration}`,
783
- task: taggedPrompt([
784
- [
785
- "role",
786
- "You locate project infrastructure needed for patch review.",
787
- ],
788
- [
789
- "objective",
790
- `Find review-relevant infrastructure for the task: ${prompt}`,
791
- ],
792
- [
793
- "stage_contract",
794
- [
795
- "This is a repository-discovery stage. Do not answer from assumptions or common project layouts.",
796
- "Before output, inspect the repository for each infrastructure category: package scripts, test configs, CI workflows, generated artifacts, lint/typecheck setup, and release gates.",
797
- "The table is a compact handoff after discovery, not a substitute for discovery.",
798
- ].join("\n"),
799
- ],
800
- [
801
- "instructions",
802
- [
803
- "Locate package scripts, test configs, CI workflows, generated artifacts, lint/typecheck setup, and release gates.",
804
- "Search/read relevant files such as package manifests, CI workflow directories, test configs, lint/typecheck configs, build scripts, release configs, and generated-artifact markers.",
805
- "Prefer exact file paths and commands.",
806
- "Explain how each item should influence review or validation.",
807
- "If a category does not exist, report `not found` and briefly name the paths or patterns checked.",
808
- ].join("\n"),
809
- ],
810
- [
811
- "output_format",
812
- "Markdown table: Area | Path/command | Why it matters | Confidence.",
813
- ],
814
- ]),
815
- ...explorerModelConfig,
816
- },
817
- {
818
- name: `infra-analyze-${iteration}`,
819
- task: taggedPrompt([
820
- [
821
- "role",
822
- "You analyze integration risks in project infrastructure.",
823
- ],
824
- [
825
- "objective",
826
- `Assess infrastructure and changed-code risks for the task: ${prompt}`,
827
- ],
828
- [
829
- "stage_contract",
830
- [
831
- "This stage analyzes actual repository coupling, not generic integration risks.",
832
- "Before output, inspect the changed-code context plus relevant infrastructure/configuration files discovered or inferable from the repo.",
833
- "Classify a risk as confirmed only when repository evidence shows the coupling; otherwise mark it speculative.",
834
- ].join("\n"),
835
- ],
836
- [
837
- "instructions",
838
- [
839
- "Identify hidden coupling with build, tests, linting, runtime config, release automation, or generated files.",
840
- "Name the exact validations that would most efficiently detect regressions.",
841
- "Separate confirmed risks from speculative risks.",
842
- "Do not repeat generic review advice; ground findings in repository evidence.",
843
- "Copy validation commands from actual repository scripts/configs when available; do not invent commands that are not supported by the repo.",
844
- ].join("\n"),
845
- ],
846
- [
847
- "evidence_expectations",
848
- "Each confirmed risk must include concrete evidence: path, command, symbol, config key, script name, or file relationship.",
849
- ],
850
- [
851
- "output_format",
852
- "Markdown with sections: Confirmed risks, Speculative risks, Validation commands, Evidence.",
853
- ],
854
- ]),
855
- ...explorerModelConfig,
856
- },
857
- {
858
- name: `infra-patterns-${iteration}`,
859
- task: taggedPrompt([
860
- [
861
- "role",
862
- "You find repository patterns that a patch must follow.",
863
- ],
864
- [
865
- "objective",
866
- `Extract conventions relevant to reviewing this task: ${prompt}`,
867
- ],
868
- [
869
- "stage_contract",
870
- [
871
- "This is an evidence-gathering stage for repository conventions. Do not describe generic best practices.",
872
- "Before output, find concrete examples in the repository that demonstrate conventions relevant to this task.",
873
- "Read enough of each example to understand the convention before reporting it.",
874
- ].join("\n"),
875
- ],
876
- [
877
- "instructions",
878
- [
879
- "Find examples of build/test/style/release/architecture patterns the patch should mirror.",
880
- "Search for nearby or analogous implementations, tests, configs, scripts, and docs.",
881
- "Use concrete paths, commands, or symbols as evidence.",
882
- "Highlight conventions that commonly cause subtle review failures.",
883
- "If examples conflict, describe the conflict instead of forcing a single rule.",
884
- "If no relevant example exists, state what was searched and that no pattern was found.",
885
- ].join("\n"),
886
- ],
887
- [
888
- "handoff_expectations",
889
- "For every required convention or useful example, include the supporting path, command, symbol, or file relationship so reviewers can verify it quickly.",
890
- ],
891
- [
892
- "output_format",
893
- "Markdown with sections: Required conventions, Useful examples, Exceptions, Review implications.",
894
- ],
895
- ]),
896
- ...explorerModelConfig,
897
- },
898
- ],
899
- { task: prompt },
900
- );
901
-
902
- const discoveryContext = formatDiscovery(discovery);
903
800
  const reviewPrompt = taggedPrompt([
904
801
  [
905
802
  "role",
@@ -913,6 +810,17 @@ export default defineWorkflow("ralph")
913
810
  "objective",
914
811
  `Review the current code delta for the task: ${prompt}`,
915
812
  ],
813
+ ["goal_framework", GOAL_METHOD_REFERENCE],
814
+ ["receipt_expectations", RECEIPT_EXPECTATIONS],
815
+ [
816
+ "goal_context_files",
817
+ [
818
+ `Planner/supporting goal contract path: ${goalContractPath}`,
819
+ `Implementation notes and receipts path: ${implementationNotesPath}`,
820
+ "Read these files to recover the goal charter, verification oracle, work surface state, receipts, and verification claims before approving anything.",
821
+ "Review success is whether current evidence and receipts satisfy the verification oracle, not whether the supporting goal contract looks complete.",
822
+ ].join("\n"),
823
+ ],
916
824
  [
917
825
  "comparison_baseline",
918
826
  [
@@ -921,11 +829,12 @@ export default defineWorkflow("ralph")
921
829
  `Start with \`git status --short\`, then use working-tree-aware commands such as \`git diff ${comparisonBaseBranch}\` and \`git diff --cached ${comparisonBaseBranch}\` to identify changed tracked files; inspect untracked files from status directly.`,
922
830
  ].join("\n"),
923
831
  ],
924
- ["infrastructure_discovery", discoveryContext],
925
832
  [
926
833
  "project_guidance",
927
834
  [
928
835
  "Use the repository's AGENTS.md and/or CLAUDE.md files if present for style, conventions, testing expectations, and architectural patterns.",
836
+ "Inspect the codebase for testing, linting, typecheck, build, generated-artifact, and CI patterns that should shape review; prefer commands and conventions copied from actual repository scripts/configs over invented checks.",
837
+ "When changed files touch an area with established test or lint patterns, compare the patch against nearby tests, package scripts, config files, and CI workflows before approving.",
929
838
  "Project-level norms override these general instructions when they are more specific.",
930
839
  "Flag deviations only when they affect correctness, security, performance, or maintainability — not personal preference.",
931
840
  "If validation requires dependencies or tools that are missing, download or install them using the repository-approved package manager/commands rather than bypassing, mocking, or skipping the verification solely because dependencies are absent.",
@@ -935,6 +844,9 @@ export default defineWorkflow("ralph")
935
844
  "validation_expectations",
936
845
  [
937
846
  "Inspect the actual diff/repository state rather than trusting stage summaries.",
847
+ "Identify the smallest relevant validation set from repository evidence: targeted tests, lint, typecheck, build, generated-artifact checks, CI-equivalent scripts, or user-flow proof.",
848
+ "When practical, include an end-to-end QA check that exercises the app the way a user would: use the tmux skill for terminal app environments and playwright-cli for web app environments.",
849
+ "For web app environments, capture a screenshot as a certificate of correct completion when the UI state proves the oracle; for terminal app environments, capture the terminal window/output that shows proof of correctness.",
938
850
  "Run or delegate focused validation when it is necessary to distinguish a real bug from a hunch.",
939
851
  "If tests or typechecks fail because dependencies are missing, install/download the missing dependencies with the repo's documented package manager instead of bypassing the check.",
940
852
  "If validation cannot be completed after reasonable recovery, record the limitation in overall_explanation and reviewer_error; do not use missing dependencies as a reason to approve.",
@@ -953,7 +865,7 @@ export default defineWorkflow("ralph")
953
865
  "Speculation is insufficient: identify the code path, scenario, environment, or input that is provably affected.",
954
866
  "Do not flag intentional behavior changes as bugs unless they clearly violate the task or documented contract.",
955
867
  "Ignore trivial style unless it obscures meaning or violates documented standards in a way that affects correctness/security/maintainability.",
956
- "If no finding clears this bar, return an empty findings array, mark the patch correct, and set stop_review_loop true.",
868
+ "If no finding clears this bar and receipts prove the verification oracle, return an empty findings array, mark the patch correct, set goal_oracle_satisfied true, and set stop_review_loop true.",
957
869
  ].join("\n"),
958
870
  ],
959
871
  [
@@ -973,7 +885,7 @@ export default defineWorkflow("ralph")
973
885
  "how_many_findings",
974
886
  [
975
887
  "Return all findings the original author would definitely want to fix.",
976
- "If no such findings exist, return an empty findings array and mark the patch correct.",
888
+ "If no such findings exist, return an empty findings array and mark the patch correct only when receipt-backed evidence also satisfies the verification oracle.",
977
889
  "Do not stop after the first qualifying finding; continue until every qualifying finding is listed.",
978
890
  ].join("\n"),
979
891
  ],
@@ -982,6 +894,8 @@ export default defineWorkflow("ralph")
982
894
  [
983
895
  "The structured review decision is only valid after you inspect the actual repository state and compare it against the stated baseline branch.",
984
896
  "Do not approve based solely on workflow stage summaries or prior agent reasoning.",
897
+ "Treat this review as the completion audit for the current iteration: approval means receipts and current evidence prove the original owner outcome against the verification oracle.",
898
+ "Do not approve when proof only shows planning, discovery, task selection, helper documents, or a narrow slice while the broader requested outcome still has safe local work remaining.",
985
899
  "The tool call is the final verdict after review work, not a shortcut around review work.",
986
900
  ].join("\n"),
987
901
  ],
@@ -990,14 +904,18 @@ export default defineWorkflow("ralph")
990
904
  [
991
905
  "1. Identify the changed files or diff under review.",
992
906
  "2. Read the relevant changed code and directly affected call sites/tests/configs.",
993
- "3. Run or delegate focused validation when needed to resolve uncertainty.",
994
- "4. If you cannot inspect or validate enough to approve safely, populate reviewer_error and set stop_review_loop=false.",
907
+ "3. Read the implementation notes receipts and map them to the inferred verification oracle and original owner outcome.",
908
+ "4. Run or delegate focused validation when needed to resolve uncertainty.",
909
+ "5. Decide whether the receipt/evidence map proves completion; if evidence is uncertain, indirect, stale, missing, or narrower than the requested outcome, set goal_oracle_satisfied=false and stop_review_loop=false.",
910
+ "6. If you cannot inspect receipts or validate enough to approve safely, populate reviewer_error and set stop_review_loop=false.",
995
911
  ].join("\n"),
996
912
  ],
997
913
  [
998
914
  "evidence_expectations",
999
915
  [
1000
916
  "The overall_explanation should briefly mention what was inspected and what validation was run or why validation was not completed.",
917
+ "The receipt_assessment should map concrete receipts, files, commands, artifacts, or reviewer checks back to the original owner outcome and verification oracle.",
918
+ "The verification_remaining field should say `none` only when no oracle-relevant verification remains.",
1001
919
  "Every finding must cite a concrete changed location and affected scenario.",
1002
920
  ].join("\n"),
1003
921
  ],
@@ -1007,7 +925,7 @@ export default defineWorkflow("ralph")
1007
925
  "You have a structured-output tool named review_decision. Use it after your investigation and validation attempts.",
1008
926
  "The tool terminates the turn and provides the structured data; do not emit a separate final assistant response after calling it.",
1009
927
  "The review loop decides whether to stop only by parsing the JSON object returned by this tool; invalid JSON, missing fields, reviewer_error, or stop_review_loop=false are treated as not approved for safety.",
1010
- "Set stop_review_loop=true only when findings is empty, overall_correctness is patch is correct, and reviewer_error is null/omitted.",
928
+ "Set stop_review_loop=true only when findings is empty, overall_correctness is patch is correct, goal_oracle_satisfied is true, verification_remaining is `none` or equivalent, and reviewer_error is null/omitted.",
1011
929
  "If you hit a reviewer/tool/validation error, still return the object with stop_review_loop=false and reviewer_error populated instead of pretending the patch is approved.",
1012
930
  "The JSON must match this schema exactly:",
1013
931
  "{",
@@ -1026,6 +944,9 @@ export default defineWorkflow("ralph")
1026
944
  ' "overall_correctness": "patch is correct" | "patch is incorrect",',
1027
945
  ' "overall_explanation": "<1-3 sentence explanation justifying the verdict>",',
1028
946
  ' "overall_confidence_score": <float 0.0-1.0>,',
947
+ ' "goal_oracle_satisfied": <boolean>,',
948
+ ' "receipt_assessment": "<how receipts/current evidence map to the verification oracle>",',
949
+ ' "verification_remaining": "<oracle-relevant verification still missing, or none>",',
1029
950
  ' "stop_review_loop": <boolean>,',
1030
951
  ' "reviewer_error": null | {"kind": "validation_unavailable" | "dependency_unavailable" | "tool_failure" | "reviewer_failure", "message": "<what failed>", "attempted_recovery": "<what you tried>"}',
1031
952
  "}",
@@ -1040,11 +961,13 @@ export default defineWorkflow("ralph")
1040
961
  {
1041
962
  name: "reviewer-a",
1042
963
  task: reviewPrompt,
964
+ reads: [goalContractPath, implementationNotesPath],
1043
965
  ...reviewerModelConfig,
1044
966
  },
1045
967
  {
1046
968
  name: "reviewer-b",
1047
969
  task: reviewPrompt,
970
+ reads: [goalContractPath, implementationNotesPath],
1048
971
  ...reviewerModelConfig,
1049
972
  },
1050
973
  ],
@@ -1078,9 +1001,12 @@ export default defineWorkflow("ralph")
1078
1001
  `Original task: ${prompt}`,
1079
1002
  `Review loop approved: ${approved ? "yes" : "no"}`,
1080
1003
  finalPlanPath
1081
- ? `Planner spec path: ${finalPlanPath}`
1082
- : "Planner spec path: unavailable",
1004
+ ? `Planner goal contract path: ${finalPlanPath}`
1005
+ : "Planner goal contract path: unavailable",
1083
1006
  `Implementation notes path: ${implementationNotesPath}`,
1007
+ reviewReport
1008
+ ? `Latest reviewer decisions:\n${reviewReport}`
1009
+ : "Latest reviewer decisions: unavailable",
1084
1010
  ].join("\n"),
1085
1011
  ],
1086
1012
  [
@@ -1089,7 +1015,8 @@ export default defineWorkflow("ralph")
1089
1015
  "Start by inspecting `git status --short` so unstaged, staged, and untracked changes are all visible.",
1090
1016
  `Review the patch against \`${comparisonBaseBranch}\` with working-tree-aware commands such as \`git diff ${comparisonBaseBranch}\` and \`git diff --cached ${comparisonBaseBranch}\`.`,
1091
1017
  "If untracked files are present, inspect them directly before deciding whether they belong in the PR.",
1092
- "Read the implementation notes file and use its full contents as the body of a PR comment after the pull request exists.",
1018
+ "Read the implementation notes file and latest structured reviewer decisions before deciding whether the PR is ready.",
1019
+ "Use the implementation notes contents as the body of a PR comment after the pull request exists.",
1093
1020
  "Check the local Git identity with `git config user.name` and `git config user.email` so you can prefer the matching GitHub account when multiple accounts are logged in.",
1094
1021
  "Check whether GitHub credentials are available with non-destructive commands such as `gh auth status` and `gh auth status --show-token-scopes` before attempting PR creation.",
1095
1022
  "If multiple GitHub accounts or hosts are logged in, use the git config username/email as a heuristic to choose the most likely identity, but try each available credential/account and use the first one that can read the repository and create the PR.",
@@ -1100,7 +1027,7 @@ export default defineWorkflow("ralph")
1100
1027
  [
1101
1028
  "Create a PR only if there are meaningful changes, a remote/branch target is available, credentials are available, and the current state is suitable for review.",
1102
1029
  "If no logged-in account can access the repository or create the PR, do not fake success; report each credential/account tried, what failed, and provide the command the user can run later.",
1103
- "When you successfully create or update the PR, create a PR comment containing the implementation notes file contents as the last action of this workflow stage.",
1030
+ "When you successfully create or update the PR, create a PR comment containing the implementation notes file contents and latest reviewer approval summary as the last action of this workflow stage.",
1104
1031
  "If PR creation is not possible, do not create a standalone comment elsewhere; include the implementation notes path and summary in your report instead.",
1105
1032
  "If the review loop did not approve, prefer reporting the remaining blockers over creating a PR unless the changes are still intentionally ready for human review.",
1106
1033
  "Do not make unrelated code edits in this phase. Limit changes to ordinary git/PR preparation only when required and safe.",
@@ -1112,7 +1039,7 @@ export default defineWorkflow("ralph")
1112
1039
  "Return Markdown with headings:",
1113
1040
  "1. Change review — summary of files and diff scope inspected",
1114
1041
  "2. PR status — created PR URL, or why no PR was created",
1115
- "3. Implementation notes comment — whether the PR comment was created as the last action, or why it could not be created",
1042
+ "3. Implementation notes and reviewer approval comment — whether the PR comment was created as the last action, or why it could not be created",
1116
1043
  "4. Commands run — include exit status or clear outcome",
1117
1044
  "5. Follow-up for the user — exact next steps if credentials or repository state blocked PR creation",
1118
1045
  ].join("\n"),