@bastani/atomic 0.8.26-alpha.2 → 0.8.26-alpha.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/builtin/intercom/CHANGELOG.md +12 -0
- package/dist/builtin/intercom/package.json +1 -1
- package/dist/builtin/mcp/CHANGELOG.md +12 -0
- package/dist/builtin/mcp/package.json +1 -1
- package/dist/builtin/subagents/CHANGELOG.md +12 -0
- package/dist/builtin/subagents/package.json +1 -1
- package/dist/builtin/web-access/CHANGELOG.md +12 -0
- package/dist/builtin/web-access/package.json +1 -1
- package/dist/builtin/workflows/CHANGELOG.md +13 -0
- package/dist/builtin/workflows/README.md +10 -8
- package/dist/builtin/workflows/builtin/deep-research-codebase.ts +1 -1
- package/dist/builtin/workflows/builtin/goal.ts +4 -4
- package/dist/builtin/workflows/builtin/index.d.ts +2 -0
- package/dist/builtin/workflows/builtin/open-claude-design.ts +3 -3
- package/dist/builtin/workflows/builtin/ralph.d.ts +2 -0
- package/dist/builtin/workflows/builtin/ralph.ts +305 -95
- package/dist/builtin/workflows/package.json +1 -1
- package/dist/builtin/workflows/skills/create-spec/SKILL.md +14 -0
- package/dist/core/atomic-guide-command.d.ts.map +1 -1
- package/dist/core/atomic-guide-command.js +10 -8
- package/dist/core/atomic-guide-command.js.map +1 -1
- package/docs/quickstart.md +4 -4
- package/docs/workflows.md +16 -14
- package/package.json +1 -1
|
@@ -158,69 +158,231 @@ const PLANNER_RFC_TEMPLATE = `
|
|
|
158
158
|
|
|
159
159
|
| Document Metadata | Details |
|
|
160
160
|
| ---------------------- | ------------------------------------------------------------------------------ |
|
|
161
|
-
| Author(s) |
|
|
161
|
+
| Author(s) | Run \`git config user.name\` and insert the result. |
|
|
162
162
|
| Status | Draft (WIP) / In Review (RFC) / Approved / Implemented / Deprecated / Rejected |
|
|
163
163
|
| Team / Owner | |
|
|
164
164
|
| Created / Last Updated | |
|
|
165
165
|
|
|
166
166
|
## 1. Executive Summary
|
|
167
167
|
|
|
168
|
+
_Instruction: A "TL;DR" of the document. Assume the reader is a VP or an engineer from another team who has 2 minutes. Summarize the Context (Problem), the Solution (Proposal), and the Impact (Value). Name the one or two **doors** at the heart of the change. Keep it under 200 words._
|
|
169
|
+
|
|
170
|
+
> **Example:** This RFC proposes replacing our current nightly batch billing system with an event-driven architecture. Currently, billing delays cause a 5% increase in customer support tickets. The proposed solution introduces two money doors — \`authorize_charge\` (reversible hold) and \`settle_payment\` (irreversible capture) — as the single chokepoint for outbound money, reducing billing latency from 24 hours to <5 minutes while making double-charges structurally impossible.
|
|
171
|
+
|
|
168
172
|
## 2. Context and Motivation
|
|
169
173
|
|
|
174
|
+
_Instruction: Why are we doing this? Why now? Link to the Product Requirement Document (PRD) and cite the relevant \`research/\` documents._
|
|
175
|
+
|
|
170
176
|
### 2.1 Current State
|
|
171
177
|
|
|
178
|
+
_Instruction: Describe the existing architecture. Use a "Context Diagram" if possible. Be honest about the flaws — including which existing doors **leak** (named for tools, dishonest compression, scattered danger)._
|
|
179
|
+
|
|
180
|
+
- **Architecture:** Currently, Service A communicates with Service B via a shared SQL database.
|
|
181
|
+
- **Limitations:** This creates a tight coupling; when Service A locks the table, Service B times out.
|
|
182
|
+
- **Leaking doors (today):** e.g. \`chargeCard(token, cents)\` is reachable from checkout, the retry job, *and* the admin panel — no one owns "charge exactly once." \`processPayment(...) -> bool\` collapses a declined card, a network failure, and a duplicate submission into the same \`false\`.
|
|
183
|
+
|
|
172
184
|
### 2.2 The Problem
|
|
173
185
|
|
|
186
|
+
_Instruction: What is the specific pain point?_
|
|
187
|
+
|
|
188
|
+
- **User Impact:** Customers cannot download receipts during the nightly batch window.
|
|
189
|
+
- **Business Impact:** We are losing $X/month in churn due to billing errors.
|
|
190
|
+
- **Technical Debt:** Danger is scattered; the boundary is misplaced, with defensive code deep inside the core instead of at the door.
|
|
191
|
+
|
|
174
192
|
## 3. Goals and Non-Goals
|
|
175
193
|
|
|
194
|
+
_Instruction: This is the contract / Definition of Success. Be precise._
|
|
195
|
+
|
|
176
196
|
### 3.1 Functional Goals
|
|
177
197
|
|
|
198
|
+
- [ ] Users must be able to export data in CSV format.
|
|
199
|
+
- [ ] System must support multi-tenant data isolation.
|
|
200
|
+
|
|
178
201
|
### 3.2 Non-Goals (Out of Scope)
|
|
179
202
|
|
|
203
|
+
_Instruction: Explicitly state what you are NOT doing. Remember: **intent lives in what the door refuses** — the doors you deliberately do not build are as much a statement of purpose as the ones you do. This prevents scope creep._
|
|
204
|
+
|
|
205
|
+
- [ ] We will NOT support PDF export in this version (CSV only).
|
|
206
|
+
- [ ] We will NOT migrate data older than 3 years.
|
|
207
|
+
- [ ] We will NOT expose a second path to move money; \`settle_payment\` remains the only chokepoint.
|
|
208
|
+
|
|
180
209
|
## 4. Proposed Solution (High-Level Design)
|
|
181
210
|
|
|
211
|
+
_Instruction: The "Big Picture." Diagrams are mandatory here._
|
|
212
|
+
|
|
182
213
|
### 4.1 System Architecture Diagram
|
|
183
214
|
|
|
184
|
-
|
|
215
|
+
_Instruction: Insert a C4 System Context or Container diagram. Show the "Black Boxes" and mark where the **airlock** sits (the single edge where untrusted network becomes a trusted request)._
|
|
216
|
+
|
|
217
|
+
\`\`\`mermaid
|
|
218
|
+
%%{init: {'theme':'base', 'themeVariables': { 'primaryColor':'#f8f9fa','primaryTextColor':'#2c3e50','primaryBorderColor':'#4a5568','lineColor':'#4a90e2','secondaryColor':'#ffffff','tertiaryColor':'#e9ecef','clusterBkg':'#ffffff','clusterBorder':'#cbd5e0'}}}%%
|
|
219
|
+
flowchart TB
|
|
220
|
+
classDef person fill:#5a67d8,stroke:#4c51bf,stroke-width:3px,color:#fff,font-weight:600
|
|
221
|
+
classDef core fill:#4a90e2,stroke:#357abd,stroke-width:2.5px,color:#fff,font-weight:600
|
|
222
|
+
classDef support fill:#667eea,stroke:#5a67d8,stroke-width:2.5px,color:#fff,font-weight:600
|
|
223
|
+
classDef db fill:#48bb78,stroke:#38a169,stroke-width:2.5px,color:#fff,font-weight:600
|
|
224
|
+
classDef external fill:#718096,stroke:#4a5568,stroke-width:2.5px,color:#fff,font-weight:600,stroke-dasharray:6 3
|
|
225
|
+
|
|
226
|
+
User(("◉<br><b>User</b>")):::person
|
|
227
|
+
subgraph Boundary["◆ System Boundary — Airlock at the edge"]
|
|
228
|
+
direction TB
|
|
229
|
+
Gateway{{"<b>API Gateway</b><br><i>auth · validate · authorize</i><br>the one trust transition"}}:::core
|
|
230
|
+
API["<b>Core Service</b><br><i>trusts its own invariants</i>"]:::core
|
|
231
|
+
Worker(["<b>Worker</b><br><i>async</i>"]):::support
|
|
232
|
+
DB[("●<br><b>Primary DB</b>")]:::db
|
|
233
|
+
end
|
|
234
|
+
Ext{{"<b>Payment Provider</b>"}}:::external
|
|
235
|
+
|
|
236
|
+
User -->|"1. HTTPS (untrusted)"| Gateway
|
|
237
|
+
Gateway -->|"2. trusted request"| API
|
|
238
|
+
API -->|"3. persist (txn)"| DB
|
|
239
|
+
API -.->|"4. enqueue"| Worker
|
|
240
|
+
Worker -.->|"5. settle (irreversible)"| Ext
|
|
241
|
+
style Boundary fill:#fff,stroke:#cbd5e0,stroke-width:2px,stroke-dasharray:8 4
|
|
242
|
+
\`\`\`
|
|
185
243
|
|
|
186
244
|
### 4.2 Architectural Pattern
|
|
187
245
|
|
|
246
|
+
_Instruction: Name the pattern (e.g., "Event Sourcing", "BFF — Backend for Frontend", "Publisher-Subscriber")._
|
|
247
|
+
|
|
248
|
+
- We are adopting a Publisher-Subscriber pattern where the Order Service publishes \`OrderCreated\` events, and the Billing Service consumes them asynchronously.
|
|
249
|
+
|
|
188
250
|
### 4.3 Key Components
|
|
189
251
|
|
|
190
|
-
| Component
|
|
191
|
-
|
|
|
252
|
+
| Component | Responsibility | Technology Stack | Justification |
|
|
253
|
+
| ----------------- | --------------------------- | ----------------- | -------------------------------------------- |
|
|
254
|
+
| Ingestion Service | Validates incoming webhooks | Go, Gin Framework | High concurrency performance needed. |
|
|
255
|
+
| Event Bus | Decouples services | Kafka | Durable log, replay capability. |
|
|
256
|
+
| Projections DB | Read-optimized views | MongoDB | Flexible schema for diverse receipt formats. |
|
|
257
|
+
|
|
258
|
+
### 4.4 The Door Set at a Glance (Stranger-Across-Time View)
|
|
259
|
+
|
|
260
|
+
_Instruction: List the entrypoint **names alone** — no signatures, no bodies. A competent stranger should reconstruct the system's purpose from this list. If they cannot, intent has leaked into the mechanism; return to §5 and rename until they can. Mark every door that guards an irreversible effect with ⚠._
|
|
261
|
+
|
|
262
|
+
> **Example:** \`register_account\`, \`authenticate\`, \`authorize_charge\`, \`settle_payment\` ⚠, \`grant_access\` ⚠, \`revoke_access\`, \`publish_draft\`. Reading these alone tells you who the system lets in, that money moves in exactly two steps and only those two, who may hand out access, and what it means for work to go live.
|
|
192
263
|
|
|
193
264
|
## 5. Detailed Design
|
|
194
265
|
|
|
195
|
-
|
|
266
|
+
_Instruction: The "Meat" of the document. Sufficient detail for an engineer to start coding. Lead with the **doors** — they are the load-bearing part of the spec — then describe the mechanism behind them._
|
|
267
|
+
|
|
268
|
+
### 5.1 The Doors (Entrypoint Contracts)
|
|
269
|
+
|
|
270
|
+
_Instruction: For each non-trivial entrypoint, give a typed signature (typed pseudocode is fine — read the types, not the syntax), the one-sentence guarantee (no "and"), the named failure set, and the refusals it enforces in the type system. Then record the rubric result. Make illegal states **unrepresentable**, not merely checked. Cite the \`research/\` doc that establishes each joint._
|
|
271
|
+
|
|
272
|
+
\`\`\`
|
|
273
|
+
// — Money. Two doors, and there is no third way to move a cent. —
|
|
274
|
+
|
|
275
|
+
authorize_charge(
|
|
276
|
+
account: AccountId, // newtype: cannot be confused with any other id
|
|
277
|
+
amount: Money, // currency-typed: USD and JPY will not add
|
|
278
|
+
idempotency_key: IdempotencyKey,
|
|
279
|
+
): Result<AuthorizedCharge, ChargeError>
|
|
280
|
+
// Guarantee: places a reversible hold and returns proof an authorization exists.
|
|
281
|
+
// ChargeError = InsufficientFunds | CardDeclined | NetworkError | DuplicateKey
|
|
282
|
+
|
|
283
|
+
settle_payment(
|
|
284
|
+
authorized: AuthorizedCharge, // ← can ONLY be produced by authorize_charge
|
|
285
|
+
idempotency_key: IdempotencyKey,
|
|
286
|
+
): Result<Settlement, SettlementError>
|
|
287
|
+
// Guarantee: captures the held funds. IRREVERSIBLE. The single chokepoint for outbound money.
|
|
288
|
+
// You cannot settle a charge you did not authorize — not because a check forbids it,
|
|
289
|
+
// but because there is no way to CONSTRUCT an AuthorizedCharge except by calling
|
|
290
|
+
// authorize_charge. The illegal state is unrepresentable. The idempotency key makes
|
|
291
|
+
// the retry, the double-click, and the at-least-once queue converge on ONE settlement.
|
|
292
|
+
\`\`\`
|
|
293
|
+
|
|
294
|
+
**Per-door audit (run the rubric):**
|
|
295
|
+
|
|
296
|
+
| Door | (1) Joint | (2) One sentence, no "and" | (3) Honest name | (5) Every exit | (6) Refusals real | (7) Trust transition | (8) One chokepoint |
|
|
297
|
+
| ------------------ | --------------- | ---------------------------- | ------------------------------- | ------------------------------------------------ | ----------------------------------------- | -------------------- | ------------------------------ |
|
|
298
|
+
| \`authorize_charge\` | ✅ business verb | ✅ "places a reversible hold" | ✅ | retry → \`DuplicateKey\`; timeout → \`NetworkError\` | currency mismatch unrepresentable | n/a | reversible, not the chokepoint |
|
|
299
|
+
| \`settle_payment\` ⚠ | ✅ business verb | ✅ "captures held funds" | ✅ irreversibility in doc + type | replay converges via key | cannot settle un-authorized charge (type) | n/a | ✅ the sole outbound-money door |
|
|
196
300
|
|
|
197
|
-
### 5.2
|
|
301
|
+
### 5.2 API Interfaces — The Same Doors on the Wire
|
|
198
302
|
|
|
199
|
-
|
|
303
|
+
_Instruction: A web service's real boundary is its transport surface. The URL names the joint, the HTTP verb declares its safety class, the status code is the door's honest exit. Never \`200 OK\` wrapping an error. The wire door MUST carry the same name as its in-process twin (§5.1)._
|
|
304
|
+
|
|
305
|
+
\`\`\`
|
|
306
|
+
# Identity — the one trust transition, at the edge
|
|
307
|
+
POST /v1/sessions 201 Created # = authenticate; 401 on bad credentials
|
|
308
|
+
DELETE /v1/sessions/current 204 No Content # = log out
|
|
309
|
+
|
|
310
|
+
# Money — two doors, one chokepoint, idempotent under retry
|
|
311
|
+
POST /v1/payment_intents 201 Idempotency-Key: <key> # = authorize_charge (reversible)
|
|
312
|
+
POST /v1/payment_intents/{id}/capture 200 Idempotency-Key: <key> # = settle_payment (IRREVERSIBLE)
|
|
313
|
+
# 409 Conflict if the key is replayed with a different body
|
|
314
|
+
# 422 Unprocessable if the intent was never authorized
|
|
315
|
+
|
|
316
|
+
# Access — authority demanded by the route, destructive door made idempotent
|
|
317
|
+
POST /v1/accounts/{id}/grants 201 (admin scope required) # = grant_access
|
|
318
|
+
DELETE /v1/grants/{id} 204 (204 even if already revoked) # = revoke_access
|
|
319
|
+
|
|
320
|
+
# Publishing — the domain's own verb, refusing to clobber a concurrent edit
|
|
321
|
+
POST /v1/drafts/{id}/publish 200 If-Match: <etag> # = publish_draft
|
|
322
|
+
# 412 Precondition Failed if the draft moved under you — the wire's --force-with-lease
|
|
323
|
+
\`\`\`
|
|
324
|
+
|
|
325
|
+
_If using gRPC, define the same joints in the \`.proto\`; the typed request message is the airlock by construction. Use honest status codes (\`INVALID_ARGUMENT\`, \`PERMISSION_DENIED\`, \`NOT_FOUND\`, \`ALREADY_EXISTS\`, \`FAILED_PRECONDITION\`, retryable \`ABORTED\`/\`UNAVAILABLE\`) — never a lone \`OK\` carrying an error field._
|
|
326
|
+
|
|
327
|
+
### 5.3 Data Model / Schema
|
|
328
|
+
|
|
329
|
+
_Instruction: Provide ERDs or JSON schemas. Discuss normalization vs. denormalization. Prefer schemas that make illegal states unrepresentable (sum-type status columns over independent boolean flags)._
|
|
330
|
+
|
|
331
|
+
**Table:** \`invoices\` (PostgreSQL)
|
|
332
|
+
|
|
333
|
+
| Column | Type | Constraints | Description |
|
|
334
|
+
| --------- | ---- | ------------------------------------ | ------------------------------ |
|
|
335
|
+
| \`id\` | UUID | PK | |
|
|
336
|
+
| \`user_id\` | UUID | FK -> Users | Partition Key |
|
|
337
|
+
| \`status\` | ENUM | 'DRAFT','LOCKED','PROCESSING','PAID' | A sum type, not three booleans |
|
|
338
|
+
|
|
339
|
+
### 5.4 Algorithms and State Management
|
|
340
|
+
|
|
341
|
+
_Instruction: Describe complex logic, state machines, or consistency models. Tie each state transition to the door that performs it._
|
|
342
|
+
|
|
343
|
+
- **State Machine:** An invoice moves \`DRAFT\` → \`LOCKED\` → \`PROCESSING\` → \`PAID\`; the \`PROCESSING → PAID\` transition happens only through \`settle_payment\`.
|
|
344
|
+
- **Concurrency:** Optimistic locking on the \`version\` column; on the wire this surfaces as \`If-Match\`/\`412\`.
|
|
200
345
|
|
|
201
346
|
## 6. Alternatives Considered
|
|
202
347
|
|
|
203
|
-
|
|
204
|
-
|
|
348
|
+
_Instruction: Prove you thought about trade-offs — including alternative **door sets** (e.g., one god endpoint vs. distinct joints). Why is your boundary better than the others?_
|
|
349
|
+
|
|
350
|
+
| Option | Pros | Cons | Reason for Rejection |
|
|
351
|
+
| ------------------------------------------- | ------------------------------------------- | ------------------------------------------------------ | ------------------------------------------------------------------------------ |
|
|
352
|
+
| Option A: Single \`POST /execute {action}\` | One route, flexible | God door; intent hidden in payload; danger un-funneled | Fails "joint, not tool" and "few dangerous doors." |
|
|
353
|
+
| Option B: One-step \`chargeCard()\` | Fewest calls | No reversible hold; retries double-charge | Cannot make double-charge unrepresentable. |
|
|
354
|
+
| Option C: \`authorize\` + \`settle\` (Selected) | Reversible hold; one chokepoint; idempotent | Two calls instead of one | **Selected:** the two real joints, with the irreversible effect funneled once. |
|
|
205
355
|
|
|
206
356
|
## 7. Cross-Cutting Concerns
|
|
207
357
|
|
|
208
358
|
### 7.1 Security and Privacy
|
|
209
359
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
### 7.3 Scalability and Capacity Planning
|
|
360
|
+
_Instruction: This is where "keep the dangerous doors few and honest" and "the airlock at the boundary" become concrete._
|
|
213
361
|
|
|
214
|
-
|
|
362
|
+
- **The trust transition is singular:** untrusted callers become trusted only at \`POST /v1/sessions\` / the gateway. No other door promotes an anonymous caller. (Rubric #7.)
|
|
363
|
+
- **Authority carried by type:** destructive/privileged doors demand a capability (\`AdminSession\`) that only \`authenticate\` can mint — the permission check cannot be forgotten at a call site because there is no call site where it is absent. (Rubric #6.)
|
|
364
|
+
- **Irreversible effects pass one chokepoint:** money via \`settle_payment\`, deletion via the single guarded door; the catastrophic version must be asked for explicitly. (Rubric #8.)
|
|
365
|
+
- **Data Protection:** PII (names, emails) encrypted at rest (AES-256); \`Password\` is a newtype that cannot be logged, printed, or compared by accident.
|
|
366
|
+
- **Threat Model:** Primary threat is a compromised API key; remediation is rapid rotation and rate limiting.
|
|
215
367
|
|
|
216
|
-
|
|
368
|
+
## 8. Test Plan
|
|
217
369
|
|
|
218
|
-
|
|
370
|
+
_Instruction: Test the doors at their promises and their refusals — not just the happy path. Every exit in rubric #5 deserves a test. The interactive verification is what lets a human or another agent confirm the feature is correct without reading the bodies — the stranger-across-time test, made executable._
|
|
219
371
|
|
|
220
|
-
|
|
372
|
+
- **Unit Tests:** each door's named failure variants; the *refusals* (e.g., a type/construction test proving \`settle_payment\` cannot accept anything but an \`AuthorizedCharge\`).
|
|
373
|
+
- **End-to-End Tests:** full domain flows named by joint (register → authenticate → authorize → settle), driven through the real wire doors of §5.2.
|
|
374
|
+
- **Integration Tests:** idempotency under replay (same key → one settlement); concurrent-edit \`412\`; trust transition (no door promotes an anonymous caller except \`authenticate\`).
|
|
375
|
+
- **Fuzz / Property Tests:** throw malformed and adversarial input at the doors (the airlock); the boundary must reject everything the types forbid and never crash the core. Assert invariants over random inputs (e.g., \`settle_payment\` converges on one settlement under any interleaving of retries; no input sequence reaches a money move except through the chokepoint).
|
|
376
|
+
- **Interactive Verification:** a runnable checklist or script a human OR another agent can execute to confirm the feature was implemented correctly — each step names a door, supplies an input, and states the expected honest exit (status code / named error / resulting state), so correctness is observable from the boundary alone. Include the exact commands or requests to run and the pass/fail condition for each.
|
|
221
377
|
|
|
222
378
|
## 9. Open Questions / Unresolved Issues
|
|
223
|
-
|
|
379
|
+
|
|
380
|
+
_Instruction: List known unknowns. These must be resolved before the doc is marked "Approved." Include any door whose rubric could not be answered cleanly — especially undefined guarantees (rubric #2, the most dangerous case) and any irreversible effect not yet funneled to a single chokepoint (rubric #8). Resolve these with the user via contrastive clarification._
|
|
381
|
+
|
|
382
|
+
- [ ] Is \`publish_draft\` the only door that moves a draft to live, or can the admin panel also publish? (If the latter, the effect is not yet funneled — rubric #8.)
|
|
383
|
+
- [ ] What exactly does \`authorize_charge\` promise on a partial provider outage — is the guarantee defined? (rubric #2.)
|
|
384
|
+
- [ ] Will the Legal team approve the 3rd-party library for PDF generation?
|
|
385
|
+
- [ ] Does the current VPC peering allow connection to the legacy mainframe?`.trim();
|
|
224
386
|
|
|
225
387
|
type PromptSection = readonly [tag: string, content: string];
|
|
226
388
|
|
|
@@ -230,6 +392,18 @@ function taggedPrompt(sections: readonly PromptSection[]): string {
|
|
|
230
392
|
.join("\n\n");
|
|
231
393
|
}
|
|
232
394
|
|
|
395
|
+
function workflowCwdContextSection(workflowCwd: string): PromptSection {
|
|
396
|
+
return [
|
|
397
|
+
"workflow_cwd_context",
|
|
398
|
+
[
|
|
399
|
+
`Current working directory: ${workflowCwd}`,
|
|
400
|
+
"Use this as the starting directory for repository work in this stage.",
|
|
401
|
+
"Shell commands and relative file paths should be relative to this directory unless you intentionally pass an explicit cwd override.",
|
|
402
|
+
"When delegating, pass along that this is the current working directory for the workflow.",
|
|
403
|
+
].join("\n"),
|
|
404
|
+
];
|
|
405
|
+
}
|
|
406
|
+
|
|
233
407
|
function positiveInteger(value: number | undefined, fallback: number): number {
|
|
234
408
|
return typeof value === "number" && Number.isFinite(value) && value > 0
|
|
235
409
|
? Math.floor(value)
|
|
@@ -390,6 +564,7 @@ type RalphInputs = {
|
|
|
390
564
|
readonly max_loops?: number;
|
|
391
565
|
readonly base_branch?: string;
|
|
392
566
|
readonly git_worktree_dir?: string;
|
|
567
|
+
readonly create_pr?: boolean;
|
|
393
568
|
};
|
|
394
569
|
|
|
395
570
|
type RalphWorkflowOptions = {
|
|
@@ -397,6 +572,7 @@ type RalphWorkflowOptions = {
|
|
|
397
572
|
readonly maxLoops: number;
|
|
398
573
|
readonly comparisonBaseBranch: string;
|
|
399
574
|
readonly workflowStartCwd: string;
|
|
575
|
+
readonly createPr: boolean;
|
|
400
576
|
};
|
|
401
577
|
|
|
402
578
|
type RalphWorkflowResult = {
|
|
@@ -404,7 +580,7 @@ type RalphWorkflowResult = {
|
|
|
404
580
|
readonly plan: string;
|
|
405
581
|
readonly plan_path: string;
|
|
406
582
|
readonly implementation_notes_path: string;
|
|
407
|
-
readonly pr_report
|
|
583
|
+
readonly pr_report?: string;
|
|
408
584
|
readonly approved: boolean;
|
|
409
585
|
readonly iterations_completed: number;
|
|
410
586
|
readonly review_report: string;
|
|
@@ -415,19 +591,26 @@ async function runRalphWorkflow(
|
|
|
415
591
|
ctx: WorkflowRunContext<RalphInputs>,
|
|
416
592
|
options: RalphWorkflowOptions,
|
|
417
593
|
): Promise<RalphWorkflowResult> {
|
|
418
|
-
const {
|
|
594
|
+
const {
|
|
595
|
+
prompt,
|
|
596
|
+
maxLoops,
|
|
597
|
+
comparisonBaseBranch,
|
|
598
|
+
workflowStartCwd,
|
|
599
|
+
createPr,
|
|
600
|
+
} = options;
|
|
419
601
|
|
|
420
602
|
let latestReviewReportPath: string | undefined;
|
|
421
603
|
let finalPlan = "";
|
|
422
604
|
let finalPlanPath = "";
|
|
423
605
|
let finalResult = "";
|
|
424
|
-
let finalPrReport
|
|
606
|
+
let finalPrReport: string | undefined;
|
|
425
607
|
// Keep generated specs under the workflow runtime cwd. When Ralph is invoked
|
|
426
608
|
// with git_worktree_dir, the executor defaults ctx.cwd to the matching
|
|
427
609
|
// worktree cwd so specs and stage writes land in the same checkout.
|
|
428
610
|
const workflowSpecPath = resolve(workflowStartCwd, defaultSpecPath(prompt));
|
|
429
611
|
const implementationNotesPath = await createImplementationNotesFile(prompt);
|
|
430
612
|
const artifactDir = await mkdtemp(join(tmpdir(), "atomic-ralph-run-"));
|
|
613
|
+
const workflowCwdContext = workflowCwdContextSection(workflowStartCwd);
|
|
431
614
|
let approved = false;
|
|
432
615
|
let iterationsCompleted = 0;
|
|
433
616
|
|
|
@@ -437,7 +620,7 @@ async function runRalphWorkflow(
|
|
|
437
620
|
"openai-codex/gpt-5.5:xhigh",
|
|
438
621
|
"github-copilot/gpt-5.5:xhigh",
|
|
439
622
|
"anthropic/claude-opus-4-8:xhigh",
|
|
440
|
-
"github-copilot/claude-opus-4.8:
|
|
623
|
+
"github-copilot/claude-opus-4.8:xhigh",
|
|
441
624
|
],
|
|
442
625
|
excludedTools: ["ask_user_question"],
|
|
443
626
|
};
|
|
@@ -447,8 +630,8 @@ async function runRalphWorkflow(
|
|
|
447
630
|
fallbackModels: [
|
|
448
631
|
"openai-codex/gpt-5.5:medium",
|
|
449
632
|
"github-copilot/gpt-5.5:medium",
|
|
450
|
-
"anthropic/claude-
|
|
451
|
-
"github-copilot/claude-
|
|
633
|
+
"anthropic/claude-opus-4-8:medium",
|
|
634
|
+
"github-copilot/claude-opus-4.8:medium",
|
|
452
635
|
],
|
|
453
636
|
excludedTools: ["ask_user_question"],
|
|
454
637
|
};
|
|
@@ -458,8 +641,8 @@ async function runRalphWorkflow(
|
|
|
458
641
|
fallbackModels: [
|
|
459
642
|
"openai-codex/gpt-5.5:medium",
|
|
460
643
|
"github-copilot/gpt-5.5:medium",
|
|
461
|
-
"anthropic/claude-
|
|
462
|
-
"github-copilot/claude-
|
|
644
|
+
"anthropic/claude-opus-4-8:medium",
|
|
645
|
+
"github-copilot/claude-opus-4.8:medium",
|
|
463
646
|
],
|
|
464
647
|
excludedTools: ["ask_user_question"],
|
|
465
648
|
};
|
|
@@ -470,7 +653,7 @@ async function runRalphWorkflow(
|
|
|
470
653
|
"openai-codex/gpt-5.5:xhigh",
|
|
471
654
|
"github-copilot/gpt-5.5:xhigh",
|
|
472
655
|
"anthropic/claude-opus-4-8:xhigh",
|
|
473
|
-
"github-copilot/claude-opus-4.8:
|
|
656
|
+
"github-copilot/claude-opus-4.8:xhigh",
|
|
474
657
|
],
|
|
475
658
|
excludedTools: ["ask_user_question"],
|
|
476
659
|
customTools: [reviewDecisionTool],
|
|
@@ -497,6 +680,7 @@ async function runRalphWorkflow(
|
|
|
497
680
|
"task",
|
|
498
681
|
`Plan iteration ${iteration}/${maxLoops} for this user specification:\n${prompt}`,
|
|
499
682
|
],
|
|
683
|
+
workflowCwdContext,
|
|
500
684
|
[
|
|
501
685
|
"latest_review_artifact",
|
|
502
686
|
latestReviewReportPath === undefined
|
|
@@ -547,6 +731,13 @@ async function runRalphWorkflow(
|
|
|
547
731
|
"Surface open questions in Section 9 with owner placeholders such as `[OWNER: infra team]`; do not paper over uncertainty.",
|
|
548
732
|
"Match depth to stakes: a small refactor can be concise, but every template section header must remain present.",
|
|
549
733
|
"If prior review findings are present, explicitly address each finding or explain why it is obsolete.",
|
|
734
|
+
"Determine the compatibility posture:",
|
|
735
|
+
"- Before decomposing the spec creation request, identify whether this project must preserve backward compatibility for real downstream users.",
|
|
736
|
+
"- If the user explicitly allows breaking changes, public API changes, cleanup, or says there are no real users/downstream dependencies, allow breaking changes.",
|
|
737
|
+
"- If the user mentions production users, published APIs, downstream consumers, migration safety, or compatibility requirements, disallow breaking changes.",
|
|
738
|
+
"- Carry this posture into the spec creation plan, the final spec frontmatter, and a `## Backwards Compatibility` section in the final spec.",
|
|
739
|
+
"- When allowing breaking changes, document existing legacy behavior, compatibility shims, optional flags, and public APIs as current state, not as constraints future specs must preserve unless the user explicitly asks for preservation.",
|
|
740
|
+
"- When not allowing breaking changes, document public APIs, compatibility-sensitive surfaces, downstream callers, migration constraints, and behavior that future work must preserve."
|
|
550
741
|
].join("\n"),
|
|
551
742
|
],
|
|
552
743
|
[
|
|
@@ -598,6 +789,7 @@ async function runRalphWorkflow(
|
|
|
598
789
|
"objective",
|
|
599
790
|
`Implement iteration ${iteration}/${maxLoops} for the task: ${prompt}`,
|
|
600
791
|
],
|
|
792
|
+
workflowCwdContext,
|
|
601
793
|
[
|
|
602
794
|
"spec_file",
|
|
603
795
|
[
|
|
@@ -626,6 +818,7 @@ async function runRalphWorkflow(
|
|
|
626
818
|
"Delegate codebase understanding, impact analysis, and implementation research to codebase-locator, codebase-analyzer, and pattern-finder style subagents when available.",
|
|
627
819
|
"Delegate shell-heavy work — especially commands likely to produce lots of output, log digging, CLI investigation, and broad grep/find exploration — to subagents that can run those commands rather than doing it in this orchestrator context.",
|
|
628
820
|
"Delegate implementation edits to a focused subagent with clear files, constraints, and validation expectations; do not merely describe the edits yourself.",
|
|
821
|
+
"Keep delegated work focused on implementation, tests, docs, validation evidence, and implementation notes.",
|
|
629
822
|
"Use separate subagents for separate tasks, and launch independent subagents in parallel when useful.",
|
|
630
823
|
"Do not split highly overlapping tasks across multiple subagents; consolidate overlapping work into one focused delegation to avoid duplicate effort.",
|
|
631
824
|
"If a subagent takes a long time, do not attempt to do its assigned job yourself while waiting. Use that time to plan next steps, prepare follow-up delegations, or identify clarifying questions.",
|
|
@@ -700,6 +893,7 @@ async function runRalphWorkflow(
|
|
|
700
893
|
"objective",
|
|
701
894
|
`Refine recently modified code for this task while preserving exact behavior: ${prompt}`,
|
|
702
895
|
],
|
|
896
|
+
workflowCwdContext,
|
|
703
897
|
[
|
|
704
898
|
"artifact_handoff",
|
|
705
899
|
[
|
|
@@ -803,6 +997,7 @@ async function runRalphWorkflow(
|
|
|
803
997
|
].join("\n"),
|
|
804
998
|
],
|
|
805
999
|
["objective", `Review the current code delta for the task: ${prompt}`],
|
|
1000
|
+
workflowCwdContext,
|
|
806
1001
|
[
|
|
807
1002
|
"comparison_baseline",
|
|
808
1003
|
[
|
|
@@ -864,7 +1059,7 @@ async function runRalphWorkflow(
|
|
|
864
1059
|
"Use a matter-of-fact, non-accusatory tone. Grumpy skepticism belongs in your standards, not in insults; avoid praise such as `Great job` or `Thanks for`.",
|
|
865
1060
|
"Keep code_location ranges as short as possible, ideally one line and never longer than 5-10 lines unless unavoidable.",
|
|
866
1061
|
"The code_location must overlap the diff/change under review.",
|
|
867
|
-
"Use one finding per distinct issue. Do not generate a
|
|
1062
|
+
"Use one finding per distinct issue. Do not generate or apply a fix patch.",
|
|
868
1063
|
"Use suggestion blocks only for concrete replacement code and preserve exact leading whitespace if you include one.",
|
|
869
1064
|
].join("\n"),
|
|
870
1065
|
],
|
|
@@ -962,7 +1157,10 @@ async function runRalphWorkflow(
|
|
|
962
1157
|
...reviewerModelConfig,
|
|
963
1158
|
},
|
|
964
1159
|
],
|
|
965
|
-
{
|
|
1160
|
+
{
|
|
1161
|
+
task: prompt,
|
|
1162
|
+
failFast: false,
|
|
1163
|
+
},
|
|
966
1164
|
);
|
|
967
1165
|
} catch (err) {
|
|
968
1166
|
const message = err instanceof Error ? err.message : String(err);
|
|
@@ -995,82 +1193,87 @@ async function runRalphWorkflow(
|
|
|
995
1193
|
if (approved) break;
|
|
996
1194
|
}
|
|
997
1195
|
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
[
|
|
1001
|
-
"role",
|
|
1002
|
-
"You are a careful release engineer preparing a pull request from the current workspace state.",
|
|
1003
|
-
],
|
|
1004
|
-
[
|
|
1005
|
-
"objective",
|
|
1006
|
-
`Review the changes since the base branch \`${comparisonBaseBranch}\` and create a pull request if possible and credentials are available.`,
|
|
1007
|
-
],
|
|
1008
|
-
[
|
|
1009
|
-
"workflow_context",
|
|
1196
|
+
if (createPr === true) {
|
|
1197
|
+
const prResult = await ctx.task("pull-request", {
|
|
1198
|
+
prompt: taggedPrompt([
|
|
1010
1199
|
[
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
? `Planner spec path: ${finalPlanPath}`
|
|
1015
|
-
: "Planner spec path: unavailable",
|
|
1016
|
-
`Implementation notes path: ${implementationNotesPath}`,
|
|
1017
|
-
latestReviewReportPath === undefined
|
|
1018
|
-
? "Latest review artifact: unavailable"
|
|
1019
|
-
: `Latest review artifact: ${latestReviewReportPath}`,
|
|
1020
|
-
].join("\n"),
|
|
1021
|
-
],
|
|
1022
|
-
[
|
|
1023
|
-
"required_checks",
|
|
1200
|
+
"role",
|
|
1201
|
+
"You are a careful release engineer preparing a provider-appropriate pull request, merge request, or code-review handoff from the current workspace state.",
|
|
1202
|
+
],
|
|
1024
1203
|
[
|
|
1025
|
-
"
|
|
1026
|
-
`Review the
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
"Check the local Git identity with `git config user.name` and `git config user.email` so you can prefer the matching GitHub account when multiple accounts are logged in.",
|
|
1030
|
-
"Check whether GitHub credentials are available with non-destructive commands such as `gh auth status` and `gh auth status --show-token-scopes` before attempting PR creation.",
|
|
1031
|
-
"If multiple GitHub accounts or hosts are logged in, use the git config username/email as a heuristic to choose the most likely identity, but try each available credential/account and use the first one that can read the repository and create the PR.",
|
|
1032
|
-
].join("\n"),
|
|
1033
|
-
],
|
|
1034
|
-
[
|
|
1035
|
-
"pr_policy",
|
|
1204
|
+
"objective",
|
|
1205
|
+
`Review the changes since the base branch \`${comparisonBaseBranch}\` and create a provider-appropriate pull request, merge request, or code-review handoff if possible and credentials are available. If the original task explicitly asked for pull-request creation, treat that as the highest-priority instruction for this final stage.`,
|
|
1206
|
+
],
|
|
1207
|
+
workflowCwdContext,
|
|
1036
1208
|
[
|
|
1037
|
-
"
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1209
|
+
"workflow_context",
|
|
1210
|
+
[
|
|
1211
|
+
`Original task: ${prompt}`,
|
|
1212
|
+
`Review loop approved: ${approved ? "yes" : "no"}`,
|
|
1213
|
+
finalPlanPath
|
|
1214
|
+
? `Planner spec path: ${finalPlanPath}`
|
|
1215
|
+
: "Planner spec path: unavailable",
|
|
1216
|
+
`Implementation notes path: ${implementationNotesPath}`,
|
|
1217
|
+
latestReviewReportPath === undefined
|
|
1218
|
+
? "Latest review artifact: unavailable"
|
|
1219
|
+
: `Latest review artifact: ${latestReviewReportPath}`,
|
|
1220
|
+
].join("\n"),
|
|
1221
|
+
],
|
|
1049
1222
|
[
|
|
1050
|
-
"
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1223
|
+
"required_checks",
|
|
1224
|
+
[
|
|
1225
|
+
"Start by inspecting `git status --short` so unstaged, staged, and untracked changes are all visible.",
|
|
1226
|
+
`Review the patch against \`${comparisonBaseBranch}\` with working-tree-aware commands such as \`git diff ${comparisonBaseBranch}\` and \`git diff --cached ${comparisonBaseBranch}\`.`,
|
|
1227
|
+
"If untracked files are present, inspect them directly before deciding whether they belong in the PR.",
|
|
1228
|
+
"Read the implementation notes file and use its full contents as the body of a provider-appropriate PR/review comment after the pull request, merge request, or review exists.",
|
|
1229
|
+
"Detect the source-control and code-review provider from `git remote -v`, repository hosting URLs, configured CLI auth, and repository metadata before choosing a creation tool.",
|
|
1230
|
+
"Use the provider-appropriate tool for the detected remote: GitHub `gh pr create`, Azure DevOps/Azure Repos `az repos pr create`, GitLab `glab mr create` when available, Bitbucket's configured CLI/API workflow, or Sapling/Phabricator `sl`/Phabricator/Differential tooling used by the repository.",
|
|
1231
|
+
"Check the local Git identity with `git config user.name` and `git config user.email` so you can prefer the matching account when multiple provider accounts are logged in.",
|
|
1232
|
+
"Check provider credentials with non-destructive commands before attempting PR/review creation, such as `gh auth status`, `az account show`, `az repos pr list`, `glab auth status`, `sl` status/config commands, or the repository's documented Phabricator/Differential checks.",
|
|
1233
|
+
"If multiple accounts, hosts, or providers are available, use the remote URL and git config username/email as heuristics to choose the most likely identity, but try each available credential/account that can read the repository and create the provider-appropriate review request.",
|
|
1234
|
+
].join("\n"),
|
|
1235
|
+
],
|
|
1236
|
+
[
|
|
1237
|
+
"pr_policy",
|
|
1238
|
+
[
|
|
1239
|
+
"Create a provider-appropriate PR/MR/review request only if there are meaningful changes, a remote/branch target is available, credentials are available, and the current state is suitable for review.",
|
|
1240
|
+
"If no logged-in account can access the repository or create the review request, do not fake success; report each provider, credential/account, and tool tried, what failed, and provide the command the user can run later.",
|
|
1241
|
+
"When you successfully create or update the review request, create a provider-appropriate comment containing the implementation notes file contents as the last action of this workflow stage.",
|
|
1242
|
+
"Ralph-created worktrees are detached HEAD checkouts. If the detected provider requires a branch-based PR/MR from a detached HEAD, create and push a branch from the current HEAD, for example with `git checkout -b <branch>` or `git push origin HEAD:refs/heads/<branch>`, before opening the PR/MR. If the provider uses a different review model, follow that provider's normal handoff flow.",
|
|
1243
|
+
"Ralph does not remove git_worktree_dir automatically. Leave the worktree intact for retries or user recovery.",
|
|
1244
|
+
"If PR/MR/review creation is not possible, do not create a standalone comment elsewhere; include the implementation notes path and summary in your report instead.",
|
|
1245
|
+
"If the review loop did not approve, prefer reporting the remaining blockers over creating a PR/MR/review unless the changes are still intentionally ready for human review.",
|
|
1246
|
+
"Do not make unrelated code edits in this phase. Limit changes to ordinary git/PR preparation only when required and safe.",
|
|
1247
|
+
].join("\n"),
|
|
1248
|
+
],
|
|
1249
|
+
[
|
|
1250
|
+
"output_format",
|
|
1251
|
+
[
|
|
1252
|
+
"Return Markdown with headings:",
|
|
1253
|
+
"1. Change review — summary of files and diff scope inspected",
|
|
1254
|
+
"2. PR/review status — created PR/MR/review URL, or why no review request was created",
|
|
1255
|
+
"3. Implementation notes comment — whether the provider-appropriate comment was created as the last action, or why it could not be created",
|
|
1256
|
+
"4. Commands run — include exit status or clear outcome",
|
|
1257
|
+
"5. Follow-up for the user — exact next steps if credentials or repository state blocked PR creation",
|
|
1258
|
+
].join("\n"),
|
|
1259
|
+
],
|
|
1260
|
+
]),
|
|
1261
|
+
reads: [
|
|
1262
|
+
...(finalPlanPath ? [finalPlanPath] : []),
|
|
1263
|
+
implementationNotesPath,
|
|
1264
|
+
...(latestReviewReportPath === undefined ? [] : [latestReviewReportPath]),
|
|
1057
1265
|
],
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
...(latestReviewReportPath === undefined ? [] : [latestReviewReportPath]),
|
|
1063
|
-
],
|
|
1064
|
-
...orchestratorModelConfig,
|
|
1065
|
-
});
|
|
1066
|
-
finalPrReport = prResult.text;
|
|
1266
|
+
...orchestratorModelConfig,
|
|
1267
|
+
});
|
|
1268
|
+
finalPrReport = prResult.text;
|
|
1269
|
+
}
|
|
1067
1270
|
|
|
1068
1271
|
return {
|
|
1069
1272
|
result: finalResult,
|
|
1070
1273
|
plan: finalPlan,
|
|
1071
1274
|
plan_path: finalPlanPath,
|
|
1072
1275
|
implementation_notes_path: implementationNotesPath,
|
|
1073
|
-
pr_report: finalPrReport,
|
|
1276
|
+
...(finalPrReport === undefined ? {} : { pr_report: finalPrReport }),
|
|
1074
1277
|
approved,
|
|
1075
1278
|
iterations_completed: iterationsCompleted,
|
|
1076
1279
|
review_report: compactReviewReport(latestReviewReportPath),
|
|
@@ -1096,6 +1299,11 @@ export default defineWorkflow("ralph")
|
|
|
1096
1299
|
description:
|
|
1097
1300
|
"Optional Git worktree path. Ralph must start inside a Git repo; absolute paths are used as-is, relative paths resolve from the repo root, existing Git worktrees from the invoking repository are reused/shared as-is, and missing paths are created from base_branch.",
|
|
1098
1301
|
}))
|
|
1302
|
+
.input("create_pr", Type.Boolean({
|
|
1303
|
+
default: false,
|
|
1304
|
+
description:
|
|
1305
|
+
"Whether to run the final pull-request creation stage. Defaults to false; prompt text alone does not opt in. Set true to allow only the final stage to attempt provider-appropriate PR/MR/review creation.",
|
|
1306
|
+
}))
|
|
1099
1307
|
.worktreeFromInputs({
|
|
1100
1308
|
gitWorktreeDir: "git_worktree_dir",
|
|
1101
1309
|
baseBranch: "base_branch",
|
|
@@ -1104,8 +1312,8 @@ export default defineWorkflow("ralph")
|
|
|
1104
1312
|
.output("plan", Type.Optional(Type.String({ description: "Latest RFC-style plan text." })))
|
|
1105
1313
|
.output("plan_path", Type.Optional(Type.String({ description: "Path to the latest generated spec under specs/." })))
|
|
1106
1314
|
.output("implementation_notes_path", Type.Optional(Type.String({ description: "OS-temp notes file containing decisions, deviations, blockers, and validation notes." })))
|
|
1107
|
-
.output("pr_report", Type.Optional(Type.String({ description: "Pull-request
|
|
1108
|
-
.output("approved", Type.Optional(Type.Boolean({ description: "Whether the reviewer loop approved before
|
|
1315
|
+
.output("pr_report", Type.Optional(Type.String({ description: "Pull-request report emitted only when create_pr=true and the final pull-request stage runs." })))
|
|
1316
|
+
.output("approved", Type.Optional(Type.Boolean({ description: "Whether the reviewer loop approved before completion or optional final handoff." })))
|
|
1109
1317
|
.output("iterations_completed", Type.Optional(Type.Number({ description: "Number of plan/orchestrate/review loops completed." })))
|
|
1110
1318
|
.output("review_report", Type.Optional(Type.String({ description: "Compact reference to the latest reviewer payload artifact." })))
|
|
1111
1319
|
.output("review_report_path", Type.Optional(Type.String({ description: "JSON artifact path for the latest Ralph review round." })))
|
|
@@ -1119,11 +1327,13 @@ export default defineWorkflow("ralph")
|
|
|
1119
1327
|
inputs.base_branch,
|
|
1120
1328
|
"origin/main",
|
|
1121
1329
|
);
|
|
1330
|
+
const createPr = inputs.create_pr === true;
|
|
1122
1331
|
return await runRalphWorkflow(workflowCtx, {
|
|
1123
1332
|
prompt,
|
|
1124
1333
|
maxLoops,
|
|
1125
1334
|
comparisonBaseBranch,
|
|
1126
1335
|
workflowStartCwd,
|
|
1336
|
+
createPr,
|
|
1127
1337
|
});
|
|
1128
1338
|
})
|
|
1129
1339
|
.compile();
|