@bastani/atomic 0.5.16 → 0.5.17-0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/.agents/skills/workflow-creator/references/agent-sessions.md +3 -1
  2. package/.agents/skills/workflow-creator/references/failure-modes.md +140 -0
  3. package/.claude/settings.json +1 -0
  4. package/dist/sdk/components/header.d.ts.map +1 -1
  5. package/dist/sdk/components/layout.d.ts.map +1 -1
  6. package/dist/sdk/components/node-card.d.ts.map +1 -1
  7. package/dist/sdk/components/orchestrator-panel-store.d.ts +2 -0
  8. package/dist/sdk/components/orchestrator-panel-store.d.ts.map +1 -1
  9. package/dist/sdk/components/orchestrator-panel-types.d.ts +1 -1
  10. package/dist/sdk/components/orchestrator-panel-types.d.ts.map +1 -1
  11. package/dist/sdk/components/orchestrator-panel.d.ts +2 -0
  12. package/dist/sdk/components/orchestrator-panel.d.ts.map +1 -1
  13. package/dist/sdk/components/status-helpers.d.ts.map +1 -1
  14. package/dist/sdk/providers/claude.d.ts +33 -1
  15. package/dist/sdk/providers/claude.d.ts.map +1 -1
  16. package/dist/sdk/runtime/executor.d.ts +88 -0
  17. package/dist/sdk/runtime/executor.d.ts.map +1 -1
  18. package/dist/sdk/workflows/builtin/deep-research-codebase/claude/index.d.ts.map +1 -1
  19. package/dist/sdk/workflows/builtin/ralph/claude/index.d.ts +7 -3
  20. package/dist/sdk/workflows/builtin/ralph/claude/index.d.ts.map +1 -1
  21. package/dist/services/config/definitions.d.ts.map +1 -1
  22. package/package.json +5 -5
  23. package/src/commands/cli/init/onboarding.ts +19 -2
  24. package/src/sdk/components/header.tsx +2 -1
  25. package/src/sdk/components/layout.ts +2 -1
  26. package/src/sdk/components/node-card.tsx +16 -0
  27. package/src/sdk/components/orchestrator-panel-store.test.ts +88 -0
  28. package/src/sdk/components/orchestrator-panel-store.ts +16 -0
  29. package/src/sdk/components/orchestrator-panel-types.ts +1 -1
  30. package/src/sdk/components/orchestrator-panel.tsx +8 -0
  31. package/src/sdk/components/session-graph-panel.tsx +1 -1
  32. package/src/sdk/components/status-helpers.ts +3 -2
  33. package/src/sdk/providers/claude.ts +160 -2
  34. package/src/sdk/runtime/executor.test.ts +144 -0
  35. package/src/sdk/runtime/executor.ts +219 -31
  36. package/src/sdk/workflows/builtin/deep-research-codebase/claude/index.ts +3 -18
  37. package/src/sdk/workflows/builtin/ralph/claude/index.ts +41 -51
  38. package/src/services/config/definitions.ts +5 -0
@@ -487,6 +487,170 @@ function resolveRef(ref: SessionRef): string {
487
487
  return typeof ref === "string" ? ref : ref.name;
488
488
  }
489
489
 
490
+ /**
491
+ * Minimal Copilot session surface required by `wrapCopilotSend()`.
492
+ * Uses a generic `on` signature to remain compatible with both the real
493
+ * CopilotSession and lightweight test mocks.
494
+ */
495
+ export interface CopilotSendSessionSurface {
496
+ on(eventType: string, handler: (event: { data?: unknown }) => void): () => void;
497
+ }
498
+
499
+ /**
500
+ * Wraps a Copilot session's `send()` to block until `session.idle` fires.
501
+ *
502
+ * Copilot's `send()` is fire-and-forget — it returns immediately after
503
+ * queuing the message. This wrapper blocks the returned promise until the
504
+ * session emits `session.idle` (turn complete) or `session.error`.
505
+ *
506
+ * HIL detection for Copilot is handled separately by
507
+ * `watchCopilotSessionForHIL()`, which subscribes to the session's
508
+ * `tool.execution_start` / `tool.execution_complete` events for the
509
+ * `ask_user` built-in tool. Those events fire regardless of whether
510
+ * an `onUserInputRequest` handler is registered, so we can detect HIL
511
+ * via native SDK events while the CLI continues to handle user input
512
+ * locally in the tmux pane.
513
+ *
514
+ * Exported for unit testing.
515
+ */
516
+ export function wrapCopilotSend<O, R>(
517
+ session: CopilotSendSessionSurface,
518
+ nativeSend: (options: O) => Promise<R>,
519
+ ): (options: O) => Promise<R> {
520
+ return async (options: O): Promise<R> => {
521
+ const idle = new Promise<void>((resolve, reject) => {
522
+ let unsubIdle: (() => void) | undefined;
523
+ let unsubError: (() => void) | undefined;
524
+ const cleanup = () => {
525
+ unsubIdle?.();
526
+ unsubError?.();
527
+ };
528
+ unsubIdle = session.on("session.idle", () => {
529
+ cleanup();
530
+ resolve();
531
+ });
532
+ unsubError = session.on("session.error", (event) => {
533
+ cleanup();
534
+ const data = event.data as { message?: string } | undefined;
535
+ reject(new Error(data?.message ?? "Copilot session error"));
536
+ });
537
+ });
538
+ const result = await nativeSend(options);
539
+ await idle;
540
+ return result;
541
+ };
542
+ }
543
+
544
+ /**
545
+ * Minimal shape of an event as produced by the OpenCode v2 SDK event stream.
546
+ * Using a structural interface rather than the SDK's generated union type keeps
547
+ * this helper independently unit-testable with plain objects.
548
+ *
549
+ * `sessionID` is optional because many OpenCode event types (e.g.
550
+ * `file.edited`, `session.compacted`) carry properties without that field.
551
+ * The `watchOpencodeStreamForHIL` implementation guards with a runtime check.
552
+ */
553
+ export interface OpenCodeHILEvent {
554
+ type: string;
555
+ properties: { sessionID?: string; [key: string]: unknown };
556
+ }
557
+
558
+ /**
559
+ * Consume an OpenCode SSE event stream and call `onHIL` whenever the session
560
+ * with `sessionId` enters or exits a human-in-the-loop (HIL) state:
561
+ *
562
+ * - `question.asked` → `onHIL(true)` (agent awaiting user input)
563
+ * - `question.replied` → `onHIL(false)` (user answered, agent resumes)
564
+ * - `question.rejected` → `onHIL(false)` (user dismissed, agent resumes)
565
+ *
566
+ * Events for other sessions are silently ignored. The function returns when
567
+ * the stream is exhausted (i.e. the server closes the connection).
568
+ *
569
+ * Exported for unit testing.
570
+ */
571
+ export async function watchOpencodeStreamForHIL(
572
+ stream: AsyncIterable<OpenCodeHILEvent>,
573
+ sessionId: string,
574
+ onHIL: (waiting: boolean) => void,
575
+ ): Promise<void> {
576
+ for await (const event of stream) {
577
+ if (
578
+ event.type === "question.asked" &&
579
+ event.properties.sessionID === sessionId
580
+ ) {
581
+ onHIL(true);
582
+ } else if (
583
+ (event.type === "question.replied" ||
584
+ event.type === "question.rejected") &&
585
+ event.properties.sessionID === sessionId
586
+ ) {
587
+ onHIL(false);
588
+ }
589
+ }
590
+ }
591
+
592
+ /**
593
+ * Minimal Copilot session surface required by `watchCopilotSessionForHIL()`.
594
+ * A structural `on()` signature keeps this helper independently unit-testable
595
+ * with plain objects and compatible with both the real CopilotSession and
596
+ * test mocks.
597
+ */
598
+ export interface CopilotHILSessionSurface {
599
+ on(
600
+ eventType: string,
601
+ handler: (event: { data?: unknown }) => void,
602
+ ): () => void;
603
+ }
604
+
605
+ /**
606
+ * Subscribe to a Copilot session's tool-execution events to track HIL state
607
+ * for the `ask_user` built-in tool:
608
+ *
609
+ * - `tool.execution_start` with `toolName === "ask_user"` → `onHIL(true)`
610
+ * - `tool.execution_complete` with matching `toolCallId` → `onHIL(false)`
611
+ *
612
+ * These events fire regardless of whether an `onUserInputRequest` handler is
613
+ * registered, so we can detect HIL without providing one — letting the CLI
614
+ * keep its native tmux-pane dialog.
615
+ *
616
+ * Overlapping `ask_user` invocations are tracked by `toolCallId` so
617
+ * `onHIL(false)` only fires after the last active request resolves.
618
+ *
619
+ * Returns an unsubscribe function that removes both listeners.
620
+ *
621
+ * Exported for unit testing.
622
+ */
623
+ export function watchCopilotSessionForHIL(
624
+ session: CopilotHILSessionSurface,
625
+ onHIL: (waiting: boolean) => void,
626
+ ): () => void {
627
+ const active = new Set<string>();
628
+ const unsubStart = session.on("tool.execution_start", (event) => {
629
+ const data = event.data as
630
+ | { toolName?: string; toolCallId?: string }
631
+ | undefined;
632
+ if (data?.toolName === "ask_user" && data.toolCallId) {
633
+ const wasEmpty = active.size === 0;
634
+ active.add(data.toolCallId);
635
+ if (wasEmpty) onHIL(true);
636
+ }
637
+ });
638
+ const unsubComplete = session.on("tool.execution_complete", (event) => {
639
+ const data = event.data as { toolCallId?: string } | undefined;
640
+ if (
641
+ data?.toolCallId &&
642
+ active.delete(data.toolCallId) &&
643
+ active.size === 0
644
+ ) {
645
+ onHIL(false);
646
+ }
647
+ });
648
+ return () => {
649
+ unsubStart();
650
+ unsubComplete();
651
+ };
652
+ }
653
+
490
654
  // ============================================================================
491
655
  // Shared transcript / message readers
492
656
  // ============================================================================
@@ -583,6 +747,7 @@ async function initProviderClientAndSession<A extends AgentType>(
583
747
  clientOpts: StageClientOptions<A>,
584
748
  sessionOpts: StageSessionOptions<A>,
585
749
  headless = false,
750
+ onHIL?: (waiting: boolean) => void,
586
751
  ): Promise<{
587
752
  client: ProviderClient<A>;
588
753
  session: ProviderSession<A>;
@@ -649,11 +814,7 @@ async function initProviderClientAndSession<A extends AgentType>(
649
814
  const claudeSessionOpts = sessionOpts as StageSessionOptions<"claude">;
650
815
  const client = new ClaudeClientWrapper(paneId, claudeClientOpts);
651
816
  await client.start();
652
- const session = new ClaudeSessionWrapper(
653
- paneId,
654
- sessionId,
655
- claudeSessionOpts,
656
- );
817
+ const session = new ClaudeSessionWrapper(paneId, sessionId, claudeSessionOpts, onHIL);
657
818
  return { client, session } as Result;
658
819
  }
659
820
  default:
@@ -911,6 +1072,17 @@ function createSessionRunner(
911
1072
  const transcriptFn = createTranscriptReader(shared.completedRegistry);
912
1073
  const getMessagesFn = createMessagesReader(shared.completedRegistry);
913
1074
 
1075
+ // ── HIL (human-in-the-loop) callback ──
1076
+ // Unified callback passed to provider-specific HIL detection so that any
1077
+ // provider can signal when the agent is waiting for user input or has
1078
+ // resumed processing. Both `name` and `shared.panel` are guaranteed to
1079
+ // be in scope here: `name` is validated above and `shared.panel` is
1080
+ // always present on the shared runner state.
1081
+ const onHIL = (waiting: boolean) => {
1082
+ if (waiting) shared.panel.sessionAwaitingInput(name);
1083
+ else shared.panel.sessionResumed(name);
1084
+ };
1085
+
914
1086
  // ── 12. Auto-create provider client and session ──
915
1087
  const {
916
1088
  client: providerClient,
@@ -924,6 +1096,7 @@ function createSessionRunner(
924
1096
  clientOpts,
925
1097
  sessionOpts,
926
1098
  isHeadless,
1099
+ onHIL,
927
1100
  );
928
1101
 
929
1102
  // ── 12a. Copilot: wrap send() to await session.idle ──
@@ -938,35 +1111,47 @@ function createSessionRunner(
938
1111
  // Compatible with sendAndWait(): the SDK's _dispatchEvent broadcasts
939
1112
  // to all handlers (typed + wildcard), so both this wrapper's listener
940
1113
  // and sendAndWait's internal wildcard handler observe the same event.
1114
+ // Unsubscribe fn for the Copilot HIL event listeners; invoked in the
1115
+ // `finally` block so the handlers are removed when the stage ends.
1116
+ let hilUnsubscribe: (() => void) | undefined;
1117
+
941
1118
  if (shared.agent === "copilot") {
942
1119
  const copilotSession = providerSession as ProviderSession<"copilot">;
943
1120
  const nativeSend = copilotSession.send.bind(copilotSession);
944
- copilotSession.send = async (options) => {
945
- // Register listeners BEFORE sending to avoid a race where the
946
- // agent finishes before the listener is attached. Listen for
947
- // both idle (success) and error (failure) so we never hang if
948
- // the session errors without reaching idle.
949
- const idle = new Promise<void>((resolve, reject) => {
950
- let unsubIdle: (() => void) | undefined;
951
- let unsubError: (() => void) | undefined;
952
- const cleanup = () => {
953
- unsubIdle?.();
954
- unsubError?.();
955
- };
956
- unsubIdle = copilotSession.on("session.idle", () => {
957
- cleanup();
958
- resolve();
959
- });
960
- unsubError = copilotSession.on("session.error", (event) => {
961
- cleanup();
962
- const data = event.data as { message?: string } | undefined;
963
- reject(new Error(data?.message ?? "Copilot session error"));
964
- });
1121
+ copilotSession.send = wrapCopilotSend(copilotSession, nativeSend);
1122
+
1123
+ // Copilot HIL detection via native SDK events.
1124
+ //
1125
+ // `tool.execution_start` / `tool.execution_complete` fire for the
1126
+ // `ask_user` built-in tool regardless of whether `onUserInputRequest`
1127
+ // is registered, so we can detect HIL via the SDK's event stream and
1128
+ // still let the CLI render its native tmux-pane dialog.
1129
+ hilUnsubscribe = watchCopilotSessionForHIL(copilotSession, onHIL);
1130
+ }
1131
+
1132
+ // ── 12b. OpenCode: SSE event stream for HIL detection ──
1133
+ //
1134
+ // `client.event.subscribe()` yields `question.asked`, `question.replied`,
1135
+ // and `question.rejected` events in real time. The subscription is
1136
+ // **awaited** before the stage callback runs so the stream is guaranteed
1137
+ // to be open when the first prompt fires.
1138
+ if (shared.agent === "opencode") {
1139
+ const ocClient = providerClient as ProviderClient<"opencode">;
1140
+ const ocSession = providerSession as ProviderSession<"opencode">;
1141
+ const ocSessionId = ocSession.id;
1142
+
1143
+ try {
1144
+ const { stream } = await ocClient.event.subscribe();
1145
+ watchOpencodeStreamForHIL(stream, ocSessionId, onHIL).catch((err) => {
1146
+ console.warn(
1147
+ `[opencode] HIL event stream disconnected for session ${ocSessionId}: ${errorMessage(err)}`,
1148
+ );
965
1149
  });
966
- const messageId = await nativeSend(options);
967
- await idle;
968
- return messageId;
969
- };
1150
+ } catch (err) {
1151
+ console.warn(
1152
+ `[opencode] HIL event stream failed to subscribe for session ${ocSessionId}: ${errorMessage(err)}`,
1153
+ );
1154
+ }
970
1155
  }
971
1156
 
972
1157
  // ── 13. Construct SessionContext ──
@@ -1017,7 +1202,10 @@ function createSessionRunner(
1017
1202
  if (!isHeadless) shared.panel.sessionError(name, message);
1018
1203
  throw error;
1019
1204
  } finally {
1020
- // ── 14a. Auto-cleanup provider resources ──
1205
+ // ── 14a. Stop background HIL watcher (if any) ──
1206
+ hilUnsubscribe?.();
1207
+
1208
+ // ── 14b. Auto-cleanup provider resources ──
1021
1209
  await cleanupProvider(
1022
1210
  shared.agent,
1023
1211
  providerClient,
@@ -187,12 +187,7 @@ export default defineWorkflow({
187
187
  description:
188
188
  "Surface prior research via research-locator + research-analyzer",
189
189
  },
190
- {
191
- chatFlags: [
192
- "--allow-dangerously-skip-permissions",
193
- "--dangerously-skip-permissions",
194
- ],
195
- },
190
+ {},
196
191
  {},
197
192
  async (s) => {
198
193
  // Dispatches codebase-research-locator → codebase-research-analyzer
@@ -243,12 +238,7 @@ export default defineWorkflow({
243
238
  ", ",
244
239
  )} (${partition.reduce((s, u) => s + u.fileCount, 0)} files)`,
245
240
  },
246
- {
247
- chatFlags: [
248
- "--allow-dangerously-skip-permissions",
249
- "--dangerously-skip-permissions",
250
- ],
251
- },
241
+ {},
252
242
  {},
253
243
  async (s) => {
254
244
  await s.session.query(
@@ -298,12 +288,7 @@ export default defineWorkflow({
298
288
  description:
299
289
  "Synthesize explorer findings + history into final research doc",
300
290
  },
301
- {
302
- chatFlags: [
303
- "--allow-dangerously-skip-permissions",
304
- "--dangerously-skip-permissions",
305
- ],
306
- },
291
+ {},
307
292
  {},
308
293
  async (s) => {
309
294
  await s.session.query(
@@ -7,15 +7,18 @@
7
7
  * - {@link MAX_LOOPS} iterations have completed, OR
8
8
  * - Two parallel reviewer passes both return zero findings.
9
9
  *
10
- * The reviewer stages use the Claude Agent SDK's structured output
11
- * (`outputFormat`) to guarantee the review result matches the
12
- * {@link ReviewResultSchema} no manual JSON parsing required.
10
+ * The reviewer stages run the `reviewer` sub-agent in a visible TUI via the
11
+ * `--agent reviewer` chatFlag, then parse the JSON review out of the
12
+ * assistant text with {@link parseReviewResult}. The prompt enumerates the
13
+ * {@link ReviewResultSchema} fields so the model emits matching JSON. We
14
+ * deliberately avoid invoking the Claude Agent SDK's `query()` from inside a
15
+ * non-headless stage — that would spawn a TUI pane that goes unused while
16
+ * the SDK runs in-process (see workflow-creator skill, failure-modes F17).
13
17
  *
14
18
  * Run: atomic workflow -n ralph -a claude "<your spec>"
15
19
  */
16
20
 
17
21
  import { defineWorkflow, extractAssistantText } from "../../../index.ts";
18
- import { query as claudeSdkQuery } from "@anthropic-ai/claude-agent-sdk";
19
22
 
20
23
  import {
21
24
  buildPlannerPrompt,
@@ -24,10 +27,8 @@ import {
24
27
  buildReviewPrompt,
25
28
  buildDebuggerReportPrompt,
26
29
  extractMarkdownBlock,
27
- filterActionable,
30
+ parseReviewResult,
28
31
  mergeReviewResults,
29
- REVIEW_RESULT_JSON_SCHEMA,
30
- type ReviewResult,
31
32
  type StructuredReviewResult,
32
33
  } from "../helpers/prompts.ts";
33
34
  import { hasActionableFindings } from "../helpers/review.ts";
@@ -41,41 +42,13 @@ const MAX_LOOPS = 10;
41
42
  // timeout is needed.
42
43
 
43
44
  /**
44
- * Run the Claude Agent SDK's `query()` with structured output and collect
45
- * the result. Returns a {@link StructuredReviewResult} with the SDK-validated
46
- * structured output (when available) and the raw text fallback.
45
+ * Extract a {@link StructuredReviewResult} from the reviewer TUI's assistant
46
+ * text. {@link parseReviewResult} tolerates surrounding prose and fenced
47
+ * code blocks; the prompt instructs the model to emit JSON matching
48
+ * {@link ReviewResultSchema}.
47
49
  */
48
- async function queryWithStructuredOutput(
49
- prompt: string,
50
- ): Promise<StructuredReviewResult> {
51
- let structured: ReviewResult | null = null;
52
- let raw = "";
53
-
54
- for await (const msg of claudeSdkQuery({
55
- prompt,
56
- options: {
57
- outputFormat: {
58
- type: "json_schema",
59
- schema: REVIEW_RESULT_JSON_SCHEMA,
60
- },
61
- },
62
- })) {
63
- if (msg.type === "result") {
64
- raw = String((msg as Record<string, unknown>).result ?? "");
65
- if (
66
- msg.subtype === "success" &&
67
- (msg as Record<string, unknown>).structured_output
68
- ) {
69
- structured = (msg as Record<string, unknown>)
70
- .structured_output as ReviewResult;
71
- }
72
- }
73
- }
74
-
75
- return {
76
- structured: structured ? filterActionable(structured) : null,
77
- raw,
78
- };
50
+ function extractReview(rawText: string): StructuredReviewResult {
51
+ return { structured: parseReviewResult(rawText), raw: rawText };
79
52
  }
80
53
 
81
54
  export default defineWorkflow({
@@ -206,17 +179,34 @@ export default defineWorkflow({
206
179
  discoveryContext,
207
180
  });
208
181
 
182
+ const reviewerChatFlags = [
183
+ "--agent",
184
+ "reviewer",
185
+ "--allow-dangerously-skip-permissions",
186
+ "--dangerously-skip-permissions",
187
+ ];
188
+
209
189
  const [reviewA, reviewB] = await Promise.all([
210
- ctx.stage({ name: `reviewer-${iteration}-a` }, {}, {}, async (s) => {
211
- const result = await queryWithStructuredOutput(reviewPrompt);
212
- s.save(s.sessionId);
213
- return result;
214
- }),
215
- ctx.stage({ name: `reviewer-${iteration}-b` }, {}, {}, async (s) => {
216
- const result = await queryWithStructuredOutput(reviewPrompt);
217
- s.save(s.sessionId);
218
- return result;
219
- }),
190
+ ctx.stage(
191
+ { name: `reviewer-${iteration}-a` },
192
+ { chatFlags: reviewerChatFlags },
193
+ {},
194
+ async (s) => {
195
+ const result = await s.session.query(reviewPrompt);
196
+ s.save(s.sessionId);
197
+ return extractReview(extractAssistantText(result, 0));
198
+ },
199
+ ),
200
+ ctx.stage(
201
+ { name: `reviewer-${iteration}-b` },
202
+ { chatFlags: reviewerChatFlags },
203
+ {},
204
+ async (s) => {
205
+ const result = await s.session.query(reviewPrompt);
206
+ s.save(s.sessionId);
207
+ return extractReview(extractAssistantText(result, 0));
208
+ },
209
+ ),
220
210
  ]);
221
211
 
222
212
  const merged = mergeReviewResults(reviewA.result, reviewB.result);
@@ -54,6 +54,11 @@ export const AGENT_CONFIG: Record<AgentKey, AgentConfig> = {
54
54
  destination: ".claude/settings.json",
55
55
  merge: true,
56
56
  },
57
+ {
58
+ source: ".claude/settings.json",
59
+ destination: "~/.claude/settings.json",
60
+ merge: true,
61
+ },
57
62
  ],
58
63
  },
59
64
  opencode: {