@bastani/atomic 0.8.31-alpha.3 → 0.8.31-alpha.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/CHANGELOG.md +13 -0
  2. package/dist/builtin/cursor/CHANGELOG.md +1 -1
  3. package/dist/builtin/cursor/package.json +2 -2
  4. package/dist/builtin/intercom/package.json +1 -1
  5. package/dist/builtin/mcp/CHANGELOG.md +5 -0
  6. package/dist/builtin/mcp/direct-tools.ts +4 -2
  7. package/dist/builtin/mcp/package.json +1 -1
  8. package/dist/builtin/mcp/proxy-modes.ts +4 -2
  9. package/dist/builtin/mcp/utils.ts +25 -0
  10. package/dist/builtin/subagents/package.json +1 -1
  11. package/dist/builtin/web-access/package.json +1 -1
  12. package/dist/builtin/workflows/CHANGELOG.md +9 -0
  13. package/dist/builtin/workflows/builtin/ralph-review-gate.ts +89 -0
  14. package/dist/builtin/workflows/builtin/ralph.ts +16 -51
  15. package/dist/builtin/workflows/package.json +1 -1
  16. package/dist/builtin/workflows/src/extension/dispatcher.ts +3 -0
  17. package/dist/builtin/workflows/src/extension/index.ts +15 -0
  18. package/dist/builtin/workflows/src/extension/runtime.ts +7 -0
  19. package/dist/builtin/workflows/src/runs/foreground/executor.ts +103 -7
  20. package/dist/builtin/workflows/src/runs/foreground/stage-runner.ts +133 -10
  21. package/dist/builtin/workflows/src/shared/persistence-restore.ts +2 -0
  22. package/dist/core/agent-session.d.ts +25 -0
  23. package/dist/core/agent-session.d.ts.map +1 -1
  24. package/dist/core/agent-session.js +124 -8
  25. package/dist/core/agent-session.js.map +1 -1
  26. package/dist/core/auth-guidance.d.ts +12 -0
  27. package/dist/core/auth-guidance.d.ts.map +1 -1
  28. package/dist/core/auth-guidance.js +24 -0
  29. package/dist/core/auth-guidance.js.map +1 -1
  30. package/dist/core/auth-storage.d.ts +42 -0
  31. package/dist/core/auth-storage.d.ts.map +1 -1
  32. package/dist/core/auth-storage.js +71 -10
  33. package/dist/core/auth-storage.js.map +1 -1
  34. package/dist/core/copilot-gemini-payload-sanitizer.d.ts +72 -0
  35. package/dist/core/copilot-gemini-payload-sanitizer.d.ts.map +1 -0
  36. package/dist/core/copilot-gemini-payload-sanitizer.js +296 -0
  37. package/dist/core/copilot-gemini-payload-sanitizer.js.map +1 -0
  38. package/dist/core/copilot-gemini-reasoning.d.ts +118 -0
  39. package/dist/core/copilot-gemini-reasoning.d.ts.map +1 -0
  40. package/dist/core/copilot-gemini-reasoning.js +260 -0
  41. package/dist/core/copilot-gemini-reasoning.js.map +1 -0
  42. package/dist/core/copilot-gemini-tool-arguments.d.ts +42 -0
  43. package/dist/core/copilot-gemini-tool-arguments.d.ts.map +1 -0
  44. package/dist/core/copilot-gemini-tool-arguments.js +179 -0
  45. package/dist/core/copilot-gemini-tool-arguments.js.map +1 -0
  46. package/dist/core/flattened-tool-arguments.d.ts +41 -0
  47. package/dist/core/flattened-tool-arguments.d.ts.map +1 -0
  48. package/dist/core/flattened-tool-arguments.js +136 -0
  49. package/dist/core/flattened-tool-arguments.js.map +1 -0
  50. package/dist/core/http-dispatcher.d.ts.map +1 -1
  51. package/dist/core/http-dispatcher.js +5 -0
  52. package/dist/core/http-dispatcher.js.map +1 -1
  53. package/dist/core/sdk.d.ts.map +1 -1
  54. package/dist/core/sdk.js +38 -8
  55. package/dist/core/sdk.js.map +1 -1
  56. package/dist/core/session-manager.d.ts +1 -1
  57. package/dist/core/session-manager.d.ts.map +1 -1
  58. package/dist/core/session-manager.js.map +1 -1
  59. package/dist/index.d.ts +1 -0
  60. package/dist/index.d.ts.map +1 -1
  61. package/dist/index.js +1 -0
  62. package/dist/index.js.map +1 -1
  63. package/docs/providers.md +1 -0
  64. package/docs/sessions.md +4 -0
  65. package/docs/workflows.md +7 -1
  66. package/examples/extensions/gondolin/package-lock.json +183 -183
  67. package/package.json +2 -2
@@ -121,6 +121,8 @@ export interface RunOpts extends Omit<AuthoringContract.RunOpts, "adapters" | "s
121
121
  ui?: WorkflowUIAdapter;
122
122
  /** Runtime execution mode. Controls child session policy metadata. */
123
123
  executionMode?: WorkflowExecutionMode;
124
+ /** Host-resolved non-default session directory inherited by stages without explicit sessionDir. */
125
+ defaultSessionDir?: string;
124
126
  /** Internal detached-run mode: surface ctx.ui.* as node-local workflow prompt stages. */
125
127
  usePromptNodesForUi?: boolean;
126
128
  /**
@@ -1003,6 +1005,24 @@ export function toolResultHasChatAnswer(result: unknown): boolean {
1003
1005
  );
1004
1006
  }
1005
1007
 
1008
+ // ---------------------------------------------------------------------------
1009
+ // Resume continuation hook (#1407)
1010
+ // ---------------------------------------------------------------------------
1011
+ // When an interactive paused stage is resumed with a user message, the resumed
1012
+ // answer turn should be followed by one deterministic same-session nudge so the
1013
+ // model returns to the interrupted work without showing the readiness gate for
1014
+ // the resume-answer turn itself.
1015
+
1016
+ export const RESUME_CONTINUATION_PROMPT = "Continue where you left off.";
1017
+
1018
+ export function shouldInjectResumeContinuation(state: {
1019
+ readonly resumeOccurred: boolean;
1020
+ readonly gateEnabled: boolean;
1021
+ readonly aborted: boolean;
1022
+ }): boolean {
1023
+ return state.resumeOccurred && state.gateEnabled && !state.aborted;
1024
+ }
1025
+
1006
1026
  let cachedReadinessGateTool: ReturnType<typeof createAskUserQuestionToolDefinition> | undefined;
1007
1027
  function readinessGateTool(): ReturnType<typeof createAskUserQuestionToolDefinition> {
1008
1028
  return (cachedReadinessGateTool ??= createAskUserQuestionToolDefinition());
@@ -4199,6 +4219,7 @@ export async function run<TInputs extends WorkflowInputValues>(
4199
4219
  __requestPause: async () => rejectReplayMutation("pause"),
4200
4220
  __resume: async () => rejectReplayMutation("resume"),
4201
4221
  __isPaused: () => false,
4222
+ __structuredOutputFinalized: () => false,
4202
4223
  };
4203
4224
  return replayContext;
4204
4225
  }
@@ -4233,6 +4254,7 @@ export async function run<TInputs extends WorkflowInputValues>(
4233
4254
  stageOptions: stageOptionsForContext,
4234
4255
  models: opts.models,
4235
4256
  executionMode: opts.executionMode,
4257
+ defaultSessionDir: opts.defaultSessionDir,
4236
4258
  onModelFallbackMetaChange(meta) {
4237
4259
  applyModelFallbackMeta(meta);
4238
4260
  if (stageSnapshot.status === "running") {
@@ -4251,6 +4273,12 @@ export async function run<TInputs extends WorkflowInputValues>(
4251
4273
  // When true the readiness gate is bypassed — the stage stays in the
4252
4274
  // composer without showing an extra confirmation UI (#1264).
4253
4275
  let chatAnswerObservedThisTurn = false;
4276
+ // Saturated one-slot marker for the latest real pause->resume(message)
4277
+ // transition that still needs the deterministic same-session continuation
4278
+ // prompt (#1407). Later paused resumes before the same post-turn drain
4279
+ // supersede earlier unfinished resumes; the slot is consumed before
4280
+ // prompting so a pause/resume of the continuation turn can set it again.
4281
+ let resumeContinuationPending = false;
4254
4282
  const hasActiveAskUserQuestion = (): boolean =>
4255
4283
  activeAskUserQuestionCalls.size > 0 || activeAskUserQuestionAnonymousCalls > 0;
4256
4284
  const unsubscribeAskUserQuestionWatcher = innerCtx.subscribe((event) => {
@@ -4421,6 +4449,18 @@ export async function run<TInputs extends WorkflowInputValues>(
4421
4449
  if (changed) {
4422
4450
  ensureReleaseBarrier(stageId);
4423
4451
  await cascadePauseFrom(stageId);
4452
+ // Mark the run paused once no stage is still actively running,
4453
+ // mirroring pauseRun() (runs/background/status.ts). This keeps a
4454
+ // manual TUI/Escape pause updating run-level status — and therefore
4455
+ // the main-chat status widget and `/workflow status` — identically
4456
+ // to the `workflow` tool and `/workflow pause`. recordRunPaused is
4457
+ // idempotent, so double-recording from the tool/slash path or from
4458
+ // cascade re-entry is safe.
4459
+ const run = activeStore.runs().find((candidate) => candidate.id === runId);
4460
+ const stillActive = run?.stages.some(
4461
+ (s) => s.status === "running" && s.id !== stageId,
4462
+ ) ?? false;
4463
+ if (!stillActive) activeStore.recordRunPaused(runId);
4424
4464
  }
4425
4465
  if (statusBeforePause === "pending" || statusBeforePause === "running" || innerCtx.isStreaming) {
4426
4466
  await innerCtx.__requestPause();
@@ -4429,13 +4469,30 @@ export async function run<TInputs extends WorkflowInputValues>(
4429
4469
  async resume(message?: string) {
4430
4470
  throwIfStageMutationBlocked();
4431
4471
  await ensureMessagingSession();
4432
- const changed = activeStore.recordStageResumed(runId, stageId);
4433
- if (changed) {
4434
- releaseStageBarrier(stageId);
4435
- await cascadeResumeFrom(stageId);
4472
+ const wasPausedBeforeResume = innerCtx.__isPaused();
4473
+ const hasResumeContinuationMessage = typeof message === "string" && message.trim().length > 0;
4474
+ const previousResumeContinuationPending = resumeContinuationPending;
4475
+ const queuedResumeContinuation = wasPausedBeforeResume && hasResumeContinuationMessage;
4476
+ if (queuedResumeContinuation) {
4477
+ resumeContinuationPending = true;
4436
4478
  }
4437
4479
  try {
4480
+ const changed = activeStore.recordStageResumed(runId, stageId);
4481
+ if (changed) {
4482
+ releaseStageBarrier(stageId);
4483
+ await cascadeResumeFrom(stageId);
4484
+ // Restore run-level status so a manual resume updates the main chat
4485
+ // like the `workflow` tool / `/workflow resume`. recordRunResumed is
4486
+ // a no-op when the run is not paused, so this is safe under cascade
4487
+ // and the tool/slash path.
4488
+ activeStore.recordRunResumed(runId);
4489
+ }
4438
4490
  await innerCtx.__resume(message);
4491
+ } catch (err) {
4492
+ if (queuedResumeContinuation) {
4493
+ resumeContinuationPending = previousResumeContinuationPending;
4494
+ }
4495
+ throw err;
4439
4496
  } finally {
4440
4497
  captureStageSessionMeta();
4441
4498
  }
@@ -4584,7 +4641,44 @@ export async function run<TInputs extends WorkflowInputValues>(
4584
4641
  }
4585
4642
  };
4586
4643
 
4587
- const runTrackedStageCall = async (call: () => Promise<string>, eagerSession = false): Promise<string> => {
4644
+ const suppressReadinessForCurrentTurn = (): void => {
4645
+ askUserQuestionObservedThisTurn = false;
4646
+ chatAnswerObservedThisTurn = false;
4647
+ };
4648
+
4649
+ const skipResumeContinuationInjection = (): boolean => {
4650
+ if (stageFinalized) return true;
4651
+ if (skippedForParallelFailFast) return true;
4652
+ if (stageSnapshot.status === "skipped" && stageSnapshot.skippedReason === "fail-fast") return true;
4653
+ if (isTerminalStage(stageSnapshot)) return true;
4654
+ if (stageFailFastScope?.failed === true && stageFailFastScope.activeStages.has(stageId)) return true;
4655
+ // A schema-backed stage can finalize during the resumed answer turn by
4656
+ // calling structured_output. That consumes the resume slot and
4657
+ // suppresses readiness for the resume-answer turn, but a second prompt
4658
+ // would violate the one-prompt schema contract.
4659
+ if (innerCtx.__structuredOutputFinalized()) return true;
4660
+ return false;
4661
+ };
4662
+
4663
+ const drainResumeContinuations = async <T>(currentResult: T): Promise<T> => {
4664
+ let result = currentResult;
4665
+ while (resumeContinuationPending) {
4666
+ resumeContinuationPending = false;
4667
+ suppressReadinessForCurrentTurn();
4668
+ if (!shouldInjectResumeContinuation({
4669
+ resumeOccurred: true,
4670
+ gateEnabled: readinessGateEnabled,
4671
+ aborted: ownController.signal.aborted,
4672
+ })) {
4673
+ continue;
4674
+ }
4675
+ if (skipResumeContinuationInjection()) continue;
4676
+ result = await raceAbort(innerCtx.prompt(RESUME_CONTINUATION_PROMPT), ownController.signal) as T;
4677
+ }
4678
+ return result;
4679
+ };
4680
+
4681
+ const runTrackedStageCall = async <T>(call: () => Promise<T>, eagerSession = false): Promise<T> => {
4588
4682
  throwIfWorkflowExitSelected();
4589
4683
  await waitForStageRelease();
4590
4684
  if (stageFinalized) {
@@ -4661,12 +4755,13 @@ export async function run<TInputs extends WorkflowInputValues>(
4661
4755
  };
4662
4756
  if (ownController.signal.aborted) abortSession();
4663
4757
  else ownController.signal.addEventListener("abort", abortSession, { once: true });
4664
- let result = "";
4758
+ let result: T;
4665
4759
  try {
4666
4760
  // Run the stage's initial agent turn.
4667
4761
  askUserQuestionObservedThisTurn = false;
4668
4762
  chatAnswerObservedThisTurn = false;
4669
4763
  result = await raceAbort(call(), ownController.signal);
4764
+ result = await drainResumeContinuations(result);
4670
4765
 
4671
4766
  // Per-turn readiness gate (#1099). When an agent turn ENDS (control
4672
4767
  // returns to the user): if the turn issued no ask_user_question
@@ -4706,7 +4801,8 @@ export async function run<TInputs extends WorkflowInputValues>(
4706
4801
  ownController.signal,
4707
4802
  );
4708
4803
  if (ownController.signal.aborted) break;
4709
- result = innerCtx.__getLastAssistantText() ?? result;
4804
+ result = (innerCtx.__getLastAssistantText() ?? result) as T;
4805
+ result = await drainResumeContinuations(result);
4710
4806
  }
4711
4807
  } finally {
4712
4808
  resolveNextTurnEnd = null;
@@ -124,6 +124,8 @@ export interface StageRunnerOpts {
124
124
  models?: WorkflowModelCatalogPort;
125
125
  /** Runtime execution mode forwarded to stage session adapters. */
126
126
  executionMode?: WorkflowExecutionMode;
127
+ /** Host-resolved non-default session directory inherited by stages without explicit sessionDir. */
128
+ defaultSessionDir?: string;
127
129
  /** Internal: notifies the executor when an in-flight fallback changes model/fast metadata. */
128
130
  onModelFallbackMetaChange?: (meta: StageModelFallbackMeta) => void;
129
131
  }
@@ -168,10 +170,16 @@ export interface InternalStageContext extends StageContext {
168
170
  __resume(message?: string): Promise<void>;
169
171
  /** Internal: true while a controlled pause is in flight. */
170
172
  __isPaused(): boolean;
173
+ /** Internal: true once a schema-backed prompt captured its final structured output. */
174
+ __structuredOutputFinalized(): boolean;
171
175
  }
172
176
 
173
- function stripWorkflowOnlyOptions(options: StageOptions | undefined): CreateAgentSessionOptions {
174
- if (!options) return {};
177
+ function stripWorkflowOnlyOptions(options: StageOptions | undefined, defaultSessionDir?: string): CreateAgentSessionOptions {
178
+ if (!options) {
179
+ return defaultSessionDir === undefined
180
+ ? {}
181
+ : { sessionManager: SessionManager.create(process.cwd(), defaultSessionDir) };
182
+ }
175
183
  const {
176
184
  schema: _schema,
177
185
  mcp: _mcp,
@@ -186,10 +194,11 @@ function stripWorkflowOnlyOptions(options: StageOptions | undefined): CreateAgen
186
194
  } = options;
187
195
  if (sessionOptions.sessionManager === undefined) {
188
196
  const cwd = sessionOptions.cwd ?? process.cwd();
197
+ const effectiveSessionDir = sessionDir ?? defaultSessionDir;
189
198
  if (context === "fork" && forkFromSessionFile !== undefined) {
190
- sessionOptions.sessionManager = SessionManager.forkFrom(forkFromSessionFile, cwd, sessionDir);
191
- } else if (sessionDir !== undefined) {
192
- sessionOptions.sessionManager = SessionManager.create(cwd, sessionDir);
199
+ sessionOptions.sessionManager = SessionManager.forkFrom(forkFromSessionFile, cwd, effectiveSessionDir);
200
+ } else if (effectiveSessionDir !== undefined) {
201
+ sessionOptions.sessionManager = SessionManager.create(cwd, effectiveSessionDir);
193
202
  }
194
203
  }
195
204
  return sessionOptions as CreateAgentSessionOptions;
@@ -645,7 +654,7 @@ async function finalizePromptOutput(
645
654
  }
646
655
 
647
656
  export function createStageContext(opts: StageRunnerOpts): InternalStageContext {
648
- const { stageId, stageName, adapters, runId, signal, stageOptions, executionMode } = opts;
657
+ const { stageId, stageName, adapters, runId, signal, stageOptions, executionMode, defaultSessionDir } = opts;
649
658
  const structuredOutputCapture = stageOptions?.schema ? createStructuredOutputCapture<unknown>() : undefined;
650
659
  const effectiveStageOptions = stageOptionsWithStructuredOutput(stageOptions, structuredOutputCapture);
651
660
  const meta: StageExecutionMeta = { runId, stageId, stageName, signal, stageOptions: effectiveStageOptions, executionMode };
@@ -723,6 +732,18 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
723
732
  let candidatesPromise: Promise<WorkflowResolvedModelCandidate[]> | undefined;
724
733
  let activeCandidateIndex: number | undefined;
725
734
  let selectedModel: string | undefined;
735
+ // A single ModelRegistry (carrying its AuthStorage) reused across every model
736
+ // fallback candidate in this stage. Captured from the first created session
737
+ // and threaded into subsequent candidate sessions so fallback does not rebuild
738
+ // auth/model state per candidate — which can misreport configured providers as
739
+ // "No API key found" under auth.json lock contention (issue #1431).
740
+ let sharedModelRegistry: CreateAgentSessionOptions["modelRegistry"];
741
+ // When true, the next promptWithFallback() call first retries the model the
742
+ // session last settled on (a post-completion follow-up, a subsequent turn, or
743
+ // a reattached session) before replaying the chain from the primary. Set on
744
+ // every successful attempt and by ensureSession()'s reattach branch; cleared
745
+ // when the current session is disposed.
746
+ let resumeCurrentSession = false;
726
747
  const modelAttempts: WorkflowModelAttempt[] = [];
727
748
  const modelWarnings: string[] = [];
728
749
  const pendingFallbackWarnings: string[] = [];
@@ -748,7 +769,10 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
748
769
  return candidatesPromise;
749
770
  }
750
771
 
751
- function stageOptionsForCandidate(candidate: WorkflowResolvedModelCandidate | undefined): StageOptions | undefined {
772
+ function stageOptionsForCandidate(
773
+ candidate: WorkflowResolvedModelCandidate | undefined,
774
+ resumeOptions?: { restoreSavedModel?: boolean },
775
+ ): StageOptions | undefined {
752
776
  const optionsForCandidate: StageOptions = candidate === undefined
753
777
  ? { ...(effectiveStageOptions ?? {}) }
754
778
  : {
@@ -763,6 +787,12 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
763
787
  fallbackModels: undefined,
764
788
  fallbackThinkingLevels: undefined,
765
789
  };
790
+ // When resuming a reattached session (a post-completion follow-up), drop any
791
+ // model override so the SDK restores the model the session last used — the
792
+ // one that actually worked — instead of forcing the primary/candidate model.
793
+ if (resumeOptions?.restoreSavedModel) {
794
+ delete optionsForCandidate.model;
795
+ }
766
796
  if (reattachSessionFile !== undefined && optionsForCandidate.sessionManager === undefined) {
767
797
  const cwd = optionsForCandidate.cwd ?? process.cwd();
768
798
  optionsForCandidate.sessionManager = SessionManager.open(
@@ -773,6 +803,11 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
773
803
  optionsForCandidate.context = undefined;
774
804
  optionsForCandidate.forkFromSessionFile = undefined;
775
805
  }
806
+ // Reuse the registry captured from the first session for later fallback
807
+ // candidates. A caller-supplied modelRegistry is preserved (issue #1431).
808
+ if (sharedModelRegistry !== undefined && optionsForCandidate.modelRegistry === undefined) {
809
+ optionsForCandidate.modelRegistry = sharedModelRegistry;
810
+ }
776
811
  return Object.keys(optionsForCandidate).length === 0 ? undefined : optionsForCandidate;
777
812
  }
778
813
 
@@ -829,6 +864,16 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
829
864
  function attachSession(created: StageSessionRuntime | StageSessionCreateResult): StageSessionRuntime {
830
865
  const result = normalizeSessionCreateResult(created);
831
866
  session = result.session;
867
+ // Capture the SDK ModelRegistry from the first real session so subsequent
868
+ // fallback candidates reuse the same already-loaded auth/model state instead
869
+ // of re-creating it per candidate (issue #1431). The test stub session has
870
+ // no modelRegistry, so capture is simply skipped there.
871
+ if (sharedModelRegistry === undefined) {
872
+ const withRegistry = result.session as Partial<Pick<AgentSession, "modelRegistry">>;
873
+ if (withRegistry.modelRegistry !== undefined) {
874
+ sharedModelRegistry = withRegistry.modelRegistry;
875
+ }
876
+ }
832
877
  sessionSettingsManager = result.settingsManager ?? result.session.settingsManager;
833
878
  if (pendingThinkingLevel !== undefined) {
834
879
  result.session.setThinkingLevel(pendingThinkingLevel);
@@ -851,12 +896,13 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
851
896
  async function createSession(
852
897
  candidate: WorkflowResolvedModelCandidate | undefined,
853
898
  consumer: AgentSessionConsumer,
899
+ resumeOptions?: { restoreSavedModel?: boolean },
854
900
  ): Promise<StageSessionRuntime> {
855
901
  applyCandidateThinking(candidate);
856
902
  const created = adapters.agentSession
857
- ? await adapters.agentSession.create(stripWorkflowOnlyOptions(stageOptionsForCandidate(candidate)) as StageSessionCreateOptions, {
903
+ ? await adapters.agentSession.create(stripWorkflowOnlyOptions(stageOptionsForCandidate(candidate, resumeOptions), defaultSessionDir) as StageSessionCreateOptions, {
858
904
  ...meta,
859
- stageOptions: stageOptionsForCandidate(candidate),
905
+ stageOptions: stageOptionsForCandidate(candidate, resumeOptions),
860
906
  })
861
907
  : missingAdapter(consumer);
862
908
  return attachSession(created);
@@ -864,12 +910,37 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
864
910
 
865
911
  async function ensureSession(consumer: AgentSessionConsumer = "prompt"): Promise<StageSessionRuntime> {
866
912
  if (disposed) throw new Error(`atomic-workflows: stage "${stageName}" session has been disposed`);
913
+ // Reuse an already-attached session. After model fallback settles, `session`
914
+ // is set but `sessionPromise` is left undefined; without this guard a
915
+ // follow-up's ensureSession() (via ctx.followUp / ctx.steer / __ensureSession)
916
+ // would create a brand-new session from the primary candidate and discard the
917
+ // working fallback session (issue #1431 follow-up).
918
+ if (session !== undefined) return session;
867
919
  if (!sessionPromise) {
868
920
  sessionPromise = (async () => {
869
921
  if (!hasExplicitModelFallbackConfig) return createSession(undefined, consumer);
870
922
  const candidates = await modelCandidates();
871
923
  const first = candidates[0];
872
924
  if (first === undefined) return createSession(undefined, consumer);
925
+
926
+ // Reattaching a previously-run session (e.g. a post-completion
927
+ // follow-up after the session was disposed): resume on the model the
928
+ // session last settled on — the one that actually worked — instead of
929
+ // replaying the fallback chain from an unavailable primary.
930
+ // promptWithFallback retries that model first; if it fails again it
931
+ // restarts the full chain from the primary.
932
+ if (reattachSessionFile !== undefined) {
933
+ const resumed = await createSession(undefined, consumer, { restoreSavedModel: true });
934
+ const restoredId = workflowModelId(resumed.model);
935
+ const restoredIndex = restoredId === undefined
936
+ ? -1
937
+ : candidates.findIndex((entry) => entry.id === restoredId);
938
+ activeCandidateIndex = restoredIndex >= 0 ? restoredIndex : undefined;
939
+ selectedModel = restoredId ?? first.id;
940
+ resumeCurrentSession = true;
941
+ return resumed;
942
+ }
943
+
873
944
  activeCandidateIndex = 0;
874
945
  selectedModel = first.id;
875
946
  return createSession(first, consumer);
@@ -889,6 +960,7 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
889
960
  session = undefined;
890
961
  sessionPromise = undefined;
891
962
  sessionSettingsManager = undefined;
963
+ resumeCurrentSession = false;
892
964
  for (const unsubscribe of listenerUnsubscribes.values()) unsubscribe();
893
965
  listenerUnsubscribes.clear();
894
966
  unsubscribeTerminateWatcher?.();
@@ -956,14 +1028,61 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
956
1028
  return;
957
1029
  }
958
1030
 
959
- let index = activeCandidateIndex ?? 0;
960
1031
  const capturedStructuredOutputForAttempt = (): boolean =>
961
1032
  structuredOutputCapture?.called === true && signal?.aborted !== true;
962
1033
  const recordSuccessfulAttempt = (candidate: WorkflowResolvedModelCandidate): void => {
963
1034
  modelAttempts.push({ model: candidate.id, success: true, ...modelAttemptReasoning(candidate) });
964
1035
  pendingFallbackWarnings.length = 0;
1036
+ // The session settled on a working model; a later follow-up/turn should
1037
+ // resume on it rather than replaying the chain from the primary.
1038
+ resumeCurrentSession = true;
965
1039
  };
966
1040
 
1041
+ // Resume preamble: when the stage already settled on a working model (a
1042
+ // post-completion follow-up, a subsequent turn, or a reattached session),
1043
+ // retry that model first instead of replaying the chain from an unavailable
1044
+ // primary. If that model now fails retryably, restart the full chain from
1045
+ // the primary.
1046
+ if (resumeCurrentSession && session !== undefined) {
1047
+ resumeCurrentSession = false;
1048
+ const resumedSession = session;
1049
+ const resumedLabel = selectedModel ?? workflowModelId(resumedSession.model) ?? candidates[0]!.id;
1050
+ notifyModelFallbackMetaChange();
1051
+ try {
1052
+ const { terminalScanStartIndex } = await promptWithPauseResume(resumedSession, text, sdkOptions);
1053
+ const terminalFailure = latestTerminalAssistantFailureSince(resumedSession.messages, terminalScanStartIndex);
1054
+ if (terminalFailure === undefined || capturedStructuredOutputForAttempt()) {
1055
+ modelAttempts.push({ model: resumedLabel, success: true });
1056
+ pendingFallbackWarnings.length = 0;
1057
+ resumeCurrentSession = true;
1058
+ return;
1059
+ }
1060
+ throw new WorkflowPromptModelFailure(terminalFailure);
1061
+ } catch (err) {
1062
+ if (capturedStructuredOutputForAttempt() && isRetryableModelFailure(err)) {
1063
+ modelAttempts.push({ model: resumedLabel, success: true });
1064
+ pendingFallbackWarnings.length = 0;
1065
+ resumeCurrentSession = true;
1066
+ return;
1067
+ }
1068
+ const message = errorMessage(err);
1069
+ modelAttempts.push({ model: resumedLabel, success: false, error: message });
1070
+ if (signal?.aborted || !isRetryableModelFailure(err)) {
1071
+ modelWarnings.push(...pendingFallbackWarnings);
1072
+ pendingFallbackWarnings.length = 0;
1073
+ notifyModelFallbackMetaChange();
1074
+ throw err;
1075
+ }
1076
+ // The resumed model failed retryably: restart the whole fallback chain
1077
+ // from the primary. disposeCurrentSession clears resumeCurrentSession.
1078
+ pendingFallbackWarnings.push(`[fallback] resume on ${resumedLabel} failed: ${message}. Restarting fallback from ${candidateLabel(candidates[0]!)}.`);
1079
+ await disposeCurrentSession();
1080
+ activeCandidateIndex = undefined;
1081
+ }
1082
+ }
1083
+
1084
+ let index = activeCandidateIndex ?? 0;
1085
+
967
1086
  while (index < candidates.length) {
968
1087
  const candidate = candidates[index]!;
969
1088
  const activeSession = session && activeCandidateIndex === index
@@ -1228,6 +1347,10 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
1228
1347
  __isPaused() {
1229
1348
  return pauseRequest !== null;
1230
1349
  },
1350
+
1351
+ __structuredOutputFinalized() {
1352
+ return structuredOutputCapture?.called === true;
1353
+ },
1231
1354
  };
1232
1355
  }
1233
1356
 
@@ -47,6 +47,8 @@ export interface SessionEntry {
47
47
  /** Structural type for pi's sessionManager (optional — degrades gracefully). */
48
48
  export interface SessionManager {
49
49
  getEntries?: () => SessionEntry[] | readonly SessionEntry[];
50
+ getSessionDir?: () => string;
51
+ usesDefaultSessionDir?: () => boolean;
50
52
  }
51
53
 
52
54
  // ---------------------------------------------------------------------------
@@ -563,6 +563,31 @@ export declare class AgentSession {
563
563
  * Context overflow errors are NOT retryable (handled by compaction instead).
564
564
  */
565
565
  private _isRetryableError;
566
+ /**
567
+ * For GitHub Copilot Gemini, reconstruct flattened tool-call arguments
568
+ * (for example `edits[0].newText`) into the nested arrays/objects Gemini
569
+ * produced before the assistant message is persisted, so saved transcripts
570
+ * never carry the flattened CAPI wire shape and replays loaded from disk match
571
+ * the structure Gemini signed. In-place, gated to Copilot Gemini, and a no-op
572
+ * for well-formed arguments or any other provider/model. The outbound replay
573
+ * normalizer still heals already-persisted (legacy) sessions on the wire.
574
+ */
575
+ private _normalizePersistedGeminiToolArgs;
576
+ /**
577
+ * Detect a degenerate empty completion: the provider ended the stream with no
578
+ * usable content and zero output tokens. Seen with github-copilot Gemini models
579
+ * that emit finish_reason "stop" (or a tool-use stop) with an empty content array
580
+ * and 0 output tokens, leaving the turn dead instead of producing the next step.
581
+ *
582
+ * These are treated as retryable so the harness re-issues the request rather than
583
+ * silently stopping mid-task. Guarded tightly (no text, no tool call, no thinking,
584
+ * and output === 0) so legitimate non-empty turns are never matched.
585
+ *
586
+ * Intentionally provider-agnostic (not gated to Copilot Gemini): a degenerate
587
+ * empty turn is a transient failure for any provider. It is bounded by
588
+ * `maxRetries` and falls through to normal handling on exhaustion.
589
+ */
590
+ private _isEmptyCompletion;
566
591
  private _handleRetryableError;
567
592
  /**
568
593
  * Cancel in-progress retry.