@bastani/atomic 0.8.31-alpha.2 → 0.8.31-alpha.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -3
- package/dist/builtin/cursor/CHANGELOG.md +1 -1
- package/dist/builtin/cursor/package.json +2 -2
- package/dist/builtin/intercom/package.json +1 -1
- package/dist/builtin/mcp/CHANGELOG.md +5 -0
- package/dist/builtin/mcp/direct-tools.ts +4 -2
- package/dist/builtin/mcp/package.json +1 -1
- package/dist/builtin/mcp/proxy-modes.ts +4 -2
- package/dist/builtin/mcp/utils.ts +25 -0
- package/dist/builtin/subagents/package.json +1 -1
- package/dist/builtin/web-access/package.json +1 -1
- package/dist/builtin/workflows/CHANGELOG.md +5 -0
- package/dist/builtin/workflows/builtin/ralph.ts +1 -0
- package/dist/builtin/workflows/package.json +1 -1
- package/dist/builtin/workflows/src/runs/foreground/stage-runner.ts +114 -4
- package/dist/core/agent-session.d.ts +25 -0
- package/dist/core/agent-session.d.ts.map +1 -1
- package/dist/core/agent-session.js +135 -11
- package/dist/core/agent-session.js.map +1 -1
- package/dist/core/auth-guidance.d.ts +12 -0
- package/dist/core/auth-guidance.d.ts.map +1 -1
- package/dist/core/auth-guidance.js +24 -0
- package/dist/core/auth-guidance.js.map +1 -1
- package/dist/core/auth-storage.d.ts +42 -0
- package/dist/core/auth-storage.d.ts.map +1 -1
- package/dist/core/auth-storage.js +71 -10
- package/dist/core/auth-storage.js.map +1 -1
- package/dist/core/context-window.d.ts +15 -0
- package/dist/core/context-window.d.ts.map +1 -1
- package/dist/core/context-window.js +11 -0
- package/dist/core/context-window.js.map +1 -1
- package/dist/core/copilot-gemini-payload-sanitizer.d.ts +72 -0
- package/dist/core/copilot-gemini-payload-sanitizer.d.ts.map +1 -0
- package/dist/core/copilot-gemini-payload-sanitizer.js +296 -0
- package/dist/core/copilot-gemini-payload-sanitizer.js.map +1 -0
- package/dist/core/copilot-gemini-reasoning.d.ts +118 -0
- package/dist/core/copilot-gemini-reasoning.d.ts.map +1 -0
- package/dist/core/copilot-gemini-reasoning.js +260 -0
- package/dist/core/copilot-gemini-reasoning.js.map +1 -0
- package/dist/core/copilot-gemini-tool-arguments.d.ts +42 -0
- package/dist/core/copilot-gemini-tool-arguments.d.ts.map +1 -0
- package/dist/core/copilot-gemini-tool-arguments.js +179 -0
- package/dist/core/copilot-gemini-tool-arguments.js.map +1 -0
- package/dist/core/copilot-model-catalog.d.ts +26 -11
- package/dist/core/copilot-model-catalog.d.ts.map +1 -1
- package/dist/core/copilot-model-catalog.js +34 -9
- package/dist/core/copilot-model-catalog.js.map +1 -1
- package/dist/core/flattened-tool-arguments.d.ts +41 -0
- package/dist/core/flattened-tool-arguments.d.ts.map +1 -0
- package/dist/core/flattened-tool-arguments.js +136 -0
- package/dist/core/flattened-tool-arguments.js.map +1 -0
- package/dist/core/http-dispatcher.d.ts.map +1 -1
- package/dist/core/http-dispatcher.js +5 -0
- package/dist/core/http-dispatcher.js.map +1 -1
- package/dist/core/model-registry.d.ts.map +1 -1
- package/dist/core/model-registry.js +6 -4
- package/dist/core/model-registry.js.map +1 -1
- package/dist/core/sdk.d.ts.map +1 -1
- package/dist/core/sdk.js +38 -8
- package/dist/core/sdk.js.map +1 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/docs/providers.md +4 -3
- package/docs/workflows.md +2 -0
- package/package.json +2 -2
package/CHANGELOG.md
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
### Added
|
|
6
6
|
|
|
7
|
-
- Added configurable context-window support for models that declare `contextWindowOptions`, including explicit `--context-window` CLI/settings control, a GitHub Copilot CLI-style `/model`-flow picker (numbered `Default`/`Long context` tiers with token counts), session replay, SDK/runtime/RPC APIs, and docs while preserving each model's scalar default context window. For GitHub Copilot, context windows are measured in **input (prompt) tokens** (consistent with every other provider) and derived **dynamically from GitHub's live CAPI model catalog** (`GET /models`) instead of a hardcoded model list: Atomic resolves each model's input budget as `max_prompt_tokens || max_context_window_tokens || 128_000` and, for tiered models, exposes
|
|
7
|
+
- Added configurable context-window support for models that declare `contextWindowOptions`, including explicit `--context-window` CLI/settings control, a GitHub Copilot CLI-style `/model`-flow picker (numbered `Default`/`Long context` tiers with token counts), session replay, SDK/runtime/RPC APIs, and docs while preserving each model's scalar default context window. For GitHub Copilot, context windows are measured in **input (prompt) tokens** (consistent with every other provider) and derived **dynamically from GitHub's live CAPI model catalog** (`GET /models`) instead of a hardcoded model list: Atomic resolves each model's input budget as `max_prompt_tokens || max_context_window_tokens || 128_000` and, for tiered models, exposes a selectable default window (`token_prices.default.context_max`) plus a long window set to the model's full `max_context_window_tokens` (retaining `max_prompt_tokens` as the internal effective compaction/overflow budget) — gated on the user actually having the GitHub Copilot provider and cached on disk for 30 minutes (for example `github-copilot/gpt-5.5` exposes `272k` default / `1.05m` long, and the Claude/Gemini long-context models `200k` default / `1m` long). Atomic raises the local budget and sends `X-GitHub-Api-Version: 2026-06-01`, while GitHub applies the long-context billing tier server-side by prompt token count. Long-context Copilot requests consume more AI credits and require Copilot long-context/usage-based billing entitlement; offline, unauthenticated, or non-Copilot sessions leave the built-in window untouched and show no picker; custom providers and explicit model overrides can still expose their own selectable windows ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
|
|
8
8
|
- Exported context-window helper functions and types from the package root, including parser/formatter/normalizer/selection utilities and the `Model<Api>` augmentation for `contextWindowOptions`/`defaultContextWindow`, so SDK consumers can use the public API without importing internal source paths ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
|
|
9
9
|
- Added RPC mode runtime context-window commands so headless clients can read supported token budgets with `get_available_context_windows` and select the active runtime budget with `set_context_window` without persisting context-window settings ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
|
|
10
10
|
- Added upstream pi v0.79.7 automatic theme mode support so `/settings` can choose separate light and dark themes and follow terminal color-scheme changes.
|
|
@@ -12,7 +12,8 @@
|
|
|
12
12
|
|
|
13
13
|
### Changed
|
|
14
14
|
|
|
15
|
-
- Changed
|
|
15
|
+
- Changed the GitHub Copilot **long-context tier to advertise the model's full context window** (`max_context_window_tokens`, for example `github-copilot/gpt-5.5` `1.05m`, and `github-copilot/claude-opus-4.8`/`github-copilot/gemini-3.1-pro-preview` `1m`) instead of GitHub's prompt-token cap, so Copilot models report and display the same window as the native `openai/*` and `anthropic/*` providers (the chat footer denominator now shows the full window). GitHub's lower server-side input cap (`max_prompt_tokens`, e.g. `922k`/`936k`, which equals `max_context_window_tokens − max_output_tokens`) is now parsed and carried as an internal effective input budget (`Model.maxInputTokens`, exposed via the new `getEffectiveInputBudget()` helper): auto-compaction thresholds and the Copilot overflow-recovery guard run against that budget while the picker/footer show the full window. As a result, a prompt that reaches the real prompt cap is now compacted-and-retried automatically (previously the long window equalled the cap), and the friendly “enable long-context/usage-based billing / server-cap” hint fires only when GitHub rejects a prompt *below* the cap (a genuine entitlement/tier drop) rather than at the cap. Sparse catalog payloads without `max_context_window_tokens` still fall back to the long-context prompt threshold, and the on-disk Copilot catalog cache schema version was bumped so existing caches refetch the new windows ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
|
|
16
|
+
- Changed built-in GitHub Copilot context windows to be measured in **input (prompt) tokens** (matching every other provider) and derived from GitHub's live CAPI model catalog (`GET /models`, cached 30 minutes, gated on the Copilot provider) instead of a hardcoded long-context model list, so newly added/removed Copilot models and retiered windows are reflected automatically without shipping a stale snapshot. Each model's window now resolves to `max_prompt_tokens || max_context_window_tokens || 128_000`, and tiered models expose a selectable default window (`token_prices.default.context_max`) plus a long window set to the model's full `max_context_window_tokens` (e.g. `gpt-5.5` 272k/1.05m, Claude/Gemini 200k/1m), with `max_prompt_tokens` retained as the internal effective compaction/overflow budget — while preserving custom provider entries and explicit `models.json` overrides and relying on GitHub's API-version header and server-side tier selection rather than payload fields or model-id variants ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
|
|
16
17
|
- Bumped the bundled upstream pi runtime libraries `@earendil-works/pi-agent-core`, `@earendil-works/pi-ai`, and `@earendil-works/pi-tui` from `^0.79.4` to `^0.79.6` so Atomic's installed pi runtime packages pick up upstream v0.79.5/v0.79.6 provider, model, thinking-payload, and shared TUI compatibility fixes; no Atomic coding-agent source changes were made for upstream coding-agent-only marked export or fetch-override behavior in this dependency sync ([#1413](https://github.com/bastani-inc/atomic/issues/1413)).
|
|
17
18
|
- Synced Atomic's coding-agent fork with upstream pi v0.79.7, including the new self-only default for bare `atomic update` (`atomic update --all` restores the previous all-packages behavior), automatic light/dark theme settings, configured project config directory labels, extension example updates, model-search parity, tree navigator horizontal panning, and the latest user-facing docs.
|
|
18
19
|
- Bumped the bundled upstream pi runtime libraries `@earendil-works/pi-agent-core`, `@earendil-works/pi-ai`, and `@earendil-works/pi-tui` from `^0.79.6` to `^0.79.7` so Atomic inherits upstream v0.79.7 TUI color-scheme, Warp image, generated model catalog, and agent-core fixes.
|
|
@@ -22,14 +23,26 @@
|
|
|
22
23
|
|
|
23
24
|
### Fixed
|
|
24
25
|
|
|
26
|
+
- Fixed `github-copilot/*` Gemini models (for example `github-copilot/gemini-3.1-pro-preview` and `github-copilot/gemini-3.5-flash`) failing **every** chat turn with `Error: 400 invalid request body`. These models are served through GitHub's Copilot API (CAPI), which translates the OpenAI chat-completions request into a Google GenAI `GenerateContent` request and forwards tool/function JSON Schema `anyOf`/`oneOf` verbatim into Gemini's `FunctionDeclaration` schema. Gemini rejects a union whose branch is a complex **object** schema, so Google returned HTTP 400 and CAPI relabelled it `{"error":{"code":"invalid_request_body"}}`. Because Atomic's bundled `workflow` tool — and any tool using the TypeBox `Type.Union([Type.Object(...), Type.String()])` pattern for fields such as `task`, `chain`, and `parallel` — is present in normal chat turns, the request failed before the model ever ran (it was previously masked only when a fallback model existed). Atomic now sanitizes outbound tool JSON Schemas for GitHub Copilot Gemini models into the subset CAPI/Gemini honors: it resolves object/array-bearing `anyOf`/`oneOf` to their most expressive branch, converts `const`/literal unions to `enum`, collapses nullable unions to `nullable`, prunes `required` to existing properties, and drops non-portable keywords (`additionalProperties`, `patternProperties`, `$schema`, `format`, `pattern`, numeric/length bounds, `default`, `title`, etc.). The transform is gated to `github-copilot` Gemini `openai-completions` models and runs last in the provider-payload pipeline (so it also covers extension/SDK-injected tools), leaving every other provider/model payload unchanged.
|
|
27
|
+
- Fixed `github-copilot/*` Gemini models getting stuck in an infinite tool-call retry loop (most visibly on the workflow `structured_output` tool). Capturing the raw CAPI stream confirmed that Gemini serializes array/object function-call arguments as **flattened indexed keys** on the wire — for example `{ keywords: ["a", "b"] }` arrives as `{ "keywords[0]": "a", "keywords[1]": "b" }` — so schema validation failed (`keywords: must have required properties keywords` and `root: must not have additional properties`) and the model re-emitted the same shape forever. Atomic now reconstructs flattened tool-call arguments (`name[i]`, `name[i].sub`, `parent.child`) back into proper arrays/objects in each tool's `prepareArguments` step, before validation runs. Gated to GitHub Copilot Gemini models at call time and a no-op for well-formed arguments, so it covers built-in, extension, SDK, and MCP tools without affecting any other provider/model.
|
|
28
|
+
- Fixed `github-copilot/*` Gemini models (for example `github-copilot/gemini-3.1-pro-preview`) silently dying mid-task instead of continuing the turn. Inspecting the affected sessions and confirming against GitHub's Copilot API (CAPI) source showed two distinct degenerate stream endings that Atomic was not recovering from: (1) CAPI's `getAzureFinishReason` maps several Gemini finish reasons — `MALFORMED_FUNCTION_CALL`, `OTHER`, `LANGUAGE`, and `UNEXPECTED_TOOL_CALL` — to a bare OpenAI `finish_reason: "error"`, which `pi-ai` surfaces as `"Provider finish_reason: error"`; the auto-retry classifier's regex did not match it, so the turn ended with an empty assistant message and no retry; and (2) Gemini intermittently ends the stream with `finish_reason: "stop"`, an **empty content array**, and **0 output tokens**, which Atomic treated as a successful (if empty) turn and stopped. Atomic now treats bare `finish_reason: error`/`content_filter` as retryable and detects degenerate empty completions (no text/tool-call/thinking content **and** zero output tokens on a `stop`/`toolUse` turn) as retryable, re-issuing the request with the existing exponential-backoff path. Empty `stop` completions also no longer reset the auto-retry counter, so repeated empties stay bounded by `maxRetries` instead of retrying forever.
|
|
29
|
+
- Fixed the **root cause** behind `github-copilot/*` Gemini (for example `github-copilot/gemini-3.1-pro-preview`) returning repeated empty completions and "stopping to respond" after its first tool call. Gemini is a thinking model: each function/tool call it emits comes with an opaque **thought signature** that must be replayed, verbatim, on the next request or Gemini refuses to continue the reasoning chain. Confirmed against GitHub's Copilot API (CAPI) source, CAPI carries that signature in a non-standard `reasoning_opaque` field on the assistant message / streamed delta and reads the same `reasoning_opaque` back off the assistant message on replay to re-attach the signature to each Gemini function-call part (keyed by `tool_call.id`). The bundled `pi-ai` OpenAI-completions client never captured or replayed `reasoning_opaque` (it only round-trips the OpenRouter-style `reasoning_details: [{ type: "reasoning.encrypted", id, data }]` shape, which CAPI does not emit), so the real Gemini thought signature was dropped inbound and never sent back. With it missing, CAPI substitutes the sentinel `skip_thought_signature_validator` on the first replayed function call and Gemini responds with an empty candidate / `finish_reason: "stop"` and zero output tokens — which the empty-completion retry above then re-issued against the same signature-less history until `maxRetries` was exhausted. Atomic now bridges `reasoning_opaque` to the mechanism the client already round-trips: a `globalThis.fetch` interceptor scoped to `*.githubcopilot.com` event streams rewrites each CAPI Gemini SSE delta that carries both `reasoning_opaque` and a `tool_calls[].id` to add a matching `reasoning_details` entry (captured by the client as the tool call's `thoughtSignature`), and a provider-payload (`onPayload`) transform converts the `reasoning_details` the client re-emits on replayed assistant messages back into the single `reasoning_opaque` field CAPI reads. Both transforms are gated to GitHub Copilot Gemini `openai-completions` models and are no-ops for every other provider/model and for Gemini turns that carry no thought signature; the thinking text round-trips inside the same opaque blob, so combined think-then-tool-call turns keep their signatures across session save/load.
|
|
30
|
+
- Fixed a second `github-copilot/*` Gemini multi-turn failure that surfaced once thought signatures were preserved: a turn after any **array/object tool call** (most visibly `edit`) ended with a bare `finish_reason: "error"` and then retried to exhaustion. CAPI delivers Gemini's array/object function-call arguments as **flattened indexed keys** (for example an `edit` call arrives as `{ "edits[0].newText": "...", "edits[0].oldText": "...", "path": "..." }`), and Atomic only reconstructed them at tool **execution** time — the persisted assistant message kept the raw flattened keys. On the next turn that message was replayed verbatim, CAPI parsed those literal keys straight into the Gemini `FunctionCall.Args`, and the resulting call no longer matched the tool's declared schema (nor the structure Gemini originally signed), so Gemini ended the turn with `MALFORMED_FUNCTION_CALL` / `UNEXPECTED_TOOL_CALL` / `OTHER` — all of which CAPI maps to a bare OpenAI `finish_reason: "error"`. Atomic now also reconstructs flattened tool-call arguments on the **outbound replay payload** for GitHub Copilot Gemini: each replayed assistant `tool_calls[].function.arguments` is unflattened (reusing the same `unflattenGeminiToolArguments` logic with the tool's own parameter schema, looked up from the request `tools`) back into the nested arrays/objects Gemini produced, before the request reaches CAPI. This runs in the provider-payload pipeline after schema sanitization and alongside the `reasoning_opaque` restore, is gated to GitHub Copilot Gemini `openai-completions` models, fails open on non-JSON arguments, and is a no-op for already well-formed arguments — healing both new sessions and already-persisted transcripts that contain flattened Gemini tool calls.
|
|
31
|
+
- Reduced `github-copilot/*` Gemini `MALFORMED_FUNCTION_CALL` failures (surfaced as `finish_reason: "error"`) by emitting tool/function JSON Schemas in the shape Gemini resolves most reliably. The Gemini schema sanitizer now infers an explicit `type` on container nodes that omit one (`properties`/`required` ⇒ `object`, `items` ⇒ `array`) and collapses a tuple-form `items` array — which Gemini's single-`items` function-declaration schema rejects — into a single (most expressive object/array) schema. Gated to `github-copilot` Gemini `openai-completions` models and applied last in the provider-payload pipeline, so every other provider/model payload is unchanged.
|
|
32
|
+
- Fixed `github-copilot/*` Gemini tool calls with **nested object arguments but no arrays** still failing validation and looping. CAPI flattens such arguments to purely dotted keys (for example `{ "metadata.confidence": 0.5 }` with no bracket index anywhere), which the previous reconstruction — gated on the presence of a `name[<digit>]` bracket key — skipped, so the nested-object call never validated. Atomic now also reconstructs purely dotted keys, disambiguated by the tool's own parameter schema: a dotted key is split into a nested path only when its head segment names an object/array container property (including container branches of an `anyOf`/`oneOf` union), so legitimate argument keys that happen to contain a dot are left intact. Bracket-indexed reconstruction is unchanged, and the transform remains gated to GitHub Copilot Gemini models and a no-op for well-formed arguments.
|
|
33
|
+
- Hardened the GitHub Copilot Gemini tool-argument reconstruction against prototype pollution. `unflattenGeminiToolArguments` previously walked model-emitted key paths into a fresh object without guarding `__proto__`/`constructor`/`prototype`, so a steered Gemini tool call mixing a bracket key with e.g. `__proto__.polluted` could reach and mutate `Object.prototype` process-wide. Reconstruction now drops any key whose parsed path contains one of those segments (at any position, including the final segment and a literal plain key). The parse/assign/compact reconstruction (and this single guard) lives in one canonical module shared with the `@bastani/mcp` `callTool` normalizer, so the two implementations can no longer diverge on the fix.
|
|
34
|
+
- Scoped the GitHub Copilot Gemini `content_filter` retry. The earlier finish-reason retry change treated `finish_reason: "content_filter"` as retryable for **every** provider/model; a genuine `content_filter` safety block on a non-Gemini provider would therefore be re-issued up to `maxRetries` times before its inevitable failure. `content_filter` is now retried only for GitHub Copilot Gemini models (where CAPI maps spurious Gemini RECITATION/safety blocks to it); a bare `finish_reason: "error"` remains retryable for all providers as a generic transient failure.
|
|
25
35
|
- Fixed RPC unknown-command errors to include the request id so RPC clients do not hang waiting for a response.
|
|
26
36
|
- Fixed `/model` autocomplete and model-selection searches to match provider/model queries regardless of whether the provider or model token is typed first.
|
|
27
37
|
- Fixed the tree navigator to horizontally pan deep entries so the selected item remains readable.
|
|
28
|
-
- Fixed long-context selection for GitHub Copilot's rounded 1M model names: requesting `1m`
|
|
38
|
+
- Fixed long-context selection for GitHub Copilot's rounded 1M model names: requesting `1m` now selects the advertised full context window when the catalog exposes it, and otherwise resolves to the largest advertised long-context window at or below the request (for example `936k` for sparse catalog payloads) instead of falling back to the short `200k` tier. Interactive/context-picker persistence now writes the effective selected budget to per-model `defaultContextWindows["provider/modelId"]` settings instead of the global `defaultContextWindow` fallback, so Copilot-specific prompt caps such as `936k`/`922k` do not leak into Anthropic, Cursor, or other providers on restart. Legacy/stale global `defaultContextWindow` values from earlier builds are now treated as optional fallbacks and ignored without warning when unsupported by the active model.
|
|
29
39
|
- Fixed a GitHub Copilot context-window warning on restart: after selecting a long-context window (e.g. `claude-opus-4.8` → `936k`) and reopening Atomic, startup validated the persisted selection before the (async, auth-gated) Copilot catalog loaded, so the model still looked limited to its default window and Atomic warned “Context window 936k is not supported… Supported values: 200k” and reset the choice. The model registry now seeds the Copilot context-window catalog synchronously from its on-disk cache at construction (ignoring the refresh TTL, gated on a `github-copilot` credential), so a returning user's selection is recognized immediately while the live refresh still runs in the background ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
|
|
30
40
|
- Fixed context-window startup, session-switch, settings, and RPC edge cases: unknown provider fallback models no longer inherit selectable context-window options from provider defaults, fatal startup diagnostics no longer persist context-window settings, `AgentSession.setModel()` preserves an incoming target model's explicit selected context window, model-switch paths that change effective context windows now notify listeners via `context_window_changed`, the interactive context-window picker keys selection on raw token counts so colliding formatted labels never change which window is selected, RPC `set_model` returns the effective post-switch session model, and explicit startup `contextWindow` selections are journaled even when they equal the model scalar default ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
|
|
31
41
|
- Fixed `AgentSession.setContextWindow()` so bare SDK/runtime calls update the active session, append `context_window_change`, and emit `context_window_changed` without persisting settings; callers must pass `{ persistDefault: true }` to update the active model's `defaultContextWindows["provider/modelId"]` setting ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
|
|
32
42
|
- Fixed `packages/coding-agent` source-CLI subprocess tests (`session-id-readonly`, `startup-session-name`, `stdout-cleanliness`) crashing with `ERR_MODULE_NOT_FOUND` (for example `src/core/tools/oversized-tool-result.js`) when the Vitest worker pool runs under Node. They now launch the TypeScript source CLI with Bun explicitly via a `bunExecutable()` helper (matching `context-window-cli`/`rpc-context-window`) instead of assuming `process.execPath` is Bun, so the package test suite is portable across environments. The repo-wide `.js`->`.ts` source-import convention and shipped `dist/` are unchanged ([#1419](https://github.com/bastani-inc/atomic/issues/1419)).
|
|
43
|
+
- Fixed a credential-store **load failure being misreported as `No API key found`**. When a fresh `AuthStorage` could not read `auth.json` (for example it was briefly locked by a concurrent process, surfacing an `ELOCKED` error), `reload()` recorded the error but left an empty in-memory credential set, and the prompt preflight then threw `No API key found for <provider>` — even though the credentials existed on disk. `AuthStorage` now exposes `getLoadError()`, and the prompt preflight surfaces the real load failure (`Could not load stored credentials for <provider>: …`, with the original error attached as `cause`) instead of claiming the key is absent, so a transient store-read failure is no longer indistinguishable from genuinely missing credentials. The message intentionally still reads as a recoverable auth failure so model fallback keeps retrying ([#1431](https://github.com/bastani-inc/atomic/issues/1431)).
|
|
44
|
+
- Fixed `createAgentSession()` constructing a throwaway `AuthStorage` even when a `modelRegistry` was supplied. Because `AuthStorage` eagerly calls `reload()` in its constructor — taking the `auth.json` file lock — building one only to discard it added redundant lock contention on every session creation. `createAgentSession()` now only creates an `AuthStorage` when neither a `modelRegistry` nor an `authStorage` is provided, so callers that reuse one registry across sessions (such as workflow stage model fallback) no longer trigger an extra contended credential reload per session ([#1431](https://github.com/bastani-inc/atomic/issues/1431)).
|
|
45
|
+
- Fixed the remaining `auth.json` **lock-contention hard failure** under many concurrent sessions (for example a workflow that fans out parallel stages through model fallback). `AuthStorage.reload()` previously acquired the exclusive `proper-lockfile` write lock just to *read* `auth.json`, and its sync acquisition (`acquireLockSyncWithRetry`) used a 200 ms **event-loop-blocking busy-wait**; when one stage held the lock across an async OAuth token refresh, sibling stages busy-waited (starving the very event loop the holder needed to release), gave up with `ELOCKED`, and recorded a credential load failure. With the #1431 message fix in place this no longer misreported as `No API key found`, but it could still burn a stage's configured fallback candidates (each skipped as a recoverable auth error) until the chain exhausted and the stage hard-failed. Pure reads are now **lock-free**: `AuthStorageBackend` gains an optional `read()` method (built-in backends implement it; custom backends that omit it fall back to the previous locked read, so the released interface stays compatible) and `reload()` uses it without taking any lock, while writers persist `auth.json` **atomically** (sibling temp file + `rename`) so a lock-free reader always observes a complete previous-or-next snapshot, never a torn one. The exclusive lock is retained only for read-modify-write paths (credential `set`/`remove` and locked OAuth refresh), and file permissions stay `0600`. Concurrent session creation no longer contends on or is starved by the credential store ([#1431](https://github.com/bastani-inc/atomic/issues/1431)).
|
|
33
46
|
|
|
34
47
|
## [0.8.30] - 2026-06-17
|
|
35
48
|
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
### Changed
|
|
6
6
|
|
|
7
|
-
- Published a synchronized Atomic 0.8.31-alpha.
|
|
7
|
+
- Published a synchronized Atomic 0.8.31-alpha.3 prerelease; no functional Cursor provider changes were made after 0.8.30.
|
|
8
8
|
|
|
9
9
|
## [0.8.30] - 2026-06-17
|
|
10
10
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bastani/cursor",
|
|
3
|
-
"version": "0.8.31-alpha.
|
|
3
|
+
"version": "0.8.31-alpha.4",
|
|
4
4
|
"private": true,
|
|
5
5
|
"description": "Experimental first-party Atomic extension for Cursor OAuth, model discovery, and streaming provider registration.",
|
|
6
6
|
"contributors": [
|
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
}
|
|
41
41
|
},
|
|
42
42
|
"dependencies": {
|
|
43
|
-
"@bastani/atomic-natives": "0.8.31-alpha.
|
|
43
|
+
"@bastani/atomic-natives": "0.8.31-alpha.4",
|
|
44
44
|
"@bufbuild/protobuf": "^2.0.0"
|
|
45
45
|
}
|
|
46
46
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bastani/intercom",
|
|
3
|
-
"version": "0.8.31-alpha.
|
|
3
|
+
"version": "0.8.31-alpha.4",
|
|
4
4
|
"private": true,
|
|
5
5
|
"description": "Atomic extension providing a private coordination channel between parent and child agent sessions. Fork of: https://github.com/nicobailon/pi-intercom",
|
|
6
6
|
"contributors": [
|
|
@@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
### Fixed
|
|
11
|
+
|
|
12
|
+
- Hardened `unflattenToolArguments` against prototype pollution: a flattened key whose path walks through `__proto__`, `constructor`, or `prototype` (at any position, including the final segment and a literal plain key) is now dropped instead of being written, so a model-emitted key such as `__proto__.polluted` can no longer reach and mutate `Object.prototype`. The reconstruction logic (parse/assign/compact plus this guard) is now imported from a single canonical implementation in `@bastani/atomic` (`reconstructFlattenedKeys`) instead of being duplicated in `packages/mcp/utils.ts`, so the host-runtime and MCP `callTool` paths can no longer drift (the previous near-duplicate copies had already diverged on the security guard). Behavior for well-formed and ordinary flattened arguments is unchanged.
|
|
13
|
+
- Fixed MCP tool calls failing under GitHub Copilot Gemini models (e.g. `github-copilot/gemini-3.1-pro-preview`). Gemini, served through Copilot's CAPI/GenAI gateway, serializes array/object function-call arguments as flattened indexed keys on the wire — for example `{ keywords: ["a", "b"] }` arrives as `{ "keywords[0]": "a", "keywords[1]": "b" }` — which MCP servers reject as invalid arguments. The extension now normalizes arguments at the `callTool` boundary (both direct-tool and proxy/gateway paths) via `unflattenToolArguments`, reconstructing `name[i]`, `name[i].sub`, and `parent.child` keys back into proper arrays/objects before they reach the server. The normalizer is provider-agnostic and self-gating (a no-op unless flattened keys are present), so well-formed arguments — including those already normalized by the host runtime — pass through untouched.
|
|
14
|
+
|
|
10
15
|
### Changed
|
|
11
16
|
|
|
12
17
|
- Aligned the MCP extension peer dependencies with upstream pi AI/TUI `^0.79.7` so MCP-backed sessions can use the host's latest provider catalog, model-search, theme/color-scheme, Warp image capability, and shared TUI compatibility fixes; no MCP extension code changes were made for this metadata sync ([#1413](https://github.com/bastani-inc/atomic/issues/1413)).
|
|
@@ -10,7 +10,7 @@ import { maybeStartUiSession, type UiSessionRuntime } from "./ui-session.ts";
|
|
|
10
10
|
import { formatToolName, isToolExcluded } from "./types.ts";
|
|
11
11
|
import { resourceNameToToolName } from "./resource-tools.ts";
|
|
12
12
|
import { authenticate, supportsOAuth } from "./mcp-auth-flow.ts";
|
|
13
|
-
import { formatAuthRequiredMessage } from "./utils.ts";
|
|
13
|
+
import { formatAuthRequiredMessage, unflattenToolArguments } from "./utils.ts";
|
|
14
14
|
|
|
15
15
|
const BUILTIN_NAMES = new Set(["read", "bash", "edit", "write", "grep", "find", "ls", "mcp"]);
|
|
16
16
|
|
|
@@ -369,7 +369,9 @@ export function createDirectToolExecutor(
|
|
|
369
369
|
|
|
370
370
|
const resultPromise = connection.client.callTool({
|
|
371
371
|
name: spec.originalName,
|
|
372
|
-
|
|
372
|
+
// Normalize provider-flattened argument keys (e.g. Gemini's `keywords[0]`)
|
|
373
|
+
// back into arrays/objects before the MCP server validates them.
|
|
374
|
+
arguments: unflattenToolArguments(params),
|
|
373
375
|
_meta: uiSession?.requestMeta,
|
|
374
376
|
});
|
|
375
377
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bastani/mcp",
|
|
3
|
-
"version": "0.8.31-alpha.
|
|
3
|
+
"version": "0.8.31-alpha.4",
|
|
4
4
|
"private": true,
|
|
5
5
|
"description": "Atomic extension that adapts MCP (Model Context Protocol) servers into the coding agent. Fork of: https://github.com/nicobailon/pi-mcp-adapter",
|
|
6
6
|
"contributors": [
|
|
@@ -6,7 +6,7 @@ import { lazyConnect, updateServerMetadata, updateMetadataCache, getFailureAgeSe
|
|
|
6
6
|
import { buildToolMetadata, getToolNames, findToolByName, formatSchema } from "./tool-metadata.ts";
|
|
7
7
|
import { transformMcpContent } from "./tool-registrar.ts";
|
|
8
8
|
import { maybeStartUiSession, type UiSessionRuntime } from "./ui-session.ts";
|
|
9
|
-
import { formatAuthRequiredMessage, truncateAtWord } from "./utils.ts";
|
|
9
|
+
import { formatAuthRequiredMessage, truncateAtWord, unflattenToolArguments } from "./utils.ts";
|
|
10
10
|
import { authenticate, supportsOAuth } from "./mcp-auth-flow.ts";
|
|
11
11
|
|
|
12
12
|
type ProxyToolResult = AgentToolResult<Record<string, unknown>>;
|
|
@@ -718,7 +718,9 @@ export async function executeCall(
|
|
|
718
718
|
|
|
719
719
|
const resultPromise = connection.client.callTool({
|
|
720
720
|
name: toolMeta.originalName,
|
|
721
|
-
|
|
721
|
+
// Normalize provider-flattened argument keys (e.g. Gemini's `keywords[0]`)
|
|
722
|
+
// back into arrays/objects before the MCP server validates them.
|
|
723
|
+
arguments: unflattenToolArguments(args),
|
|
722
724
|
_meta: uiSession?.requestMeta,
|
|
723
725
|
});
|
|
724
726
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { ExtensionAPI } from "@bastani/atomic";
|
|
2
|
+
import { reconstructFlattenedKeys } from "@bastani/atomic";
|
|
2
3
|
import { homedir, platform } from "node:os";
|
|
3
4
|
import { join } from "node:path";
|
|
4
5
|
import type { McpConfig, ServerEntry } from "./types.ts";
|
|
@@ -127,3 +128,27 @@ export function extractToolUiStreamMode(toolMeta: Record<string, unknown> | unde
|
|
|
127
128
|
}
|
|
128
129
|
return undefined;
|
|
129
130
|
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Reconstruct flattened tool-call arguments into proper nested arrays/objects.
|
|
134
|
+
*
|
|
135
|
+
* Some upstream providers — notably GitHub Copilot Gemini models proxied through
|
|
136
|
+
* Google's GenAI API — serialize array/object function-call arguments as
|
|
137
|
+
* flattened, indexed keys on the wire. For example a tool called with
|
|
138
|
+
* `{ keywords: ["a", "b"] }` arrives as `{ "keywords[0]": "a", "keywords[1]": "b" }`,
|
|
139
|
+
* which an MCP server then rejects as invalid arguments.
|
|
140
|
+
*
|
|
141
|
+
* This normalizer runs at the MCP `callTool` boundary so arguments are correct
|
|
142
|
+
* regardless of how the model/provider serialized them. It is provider-agnostic
|
|
143
|
+
* and **self-gating**: it is a no-op unless at least one bracket-indexed key
|
|
144
|
+
* (`name[<digit>]`) is present, so well-formed arguments pass through untouched
|
|
145
|
+
* (including arguments already normalized upstream by the host runtime).
|
|
146
|
+
*/
|
|
147
|
+
export function unflattenToolArguments(
|
|
148
|
+
args: Record<string, unknown> | null | undefined,
|
|
149
|
+
): Record<string, unknown> {
|
|
150
|
+
if (args === null || args === undefined) return {};
|
|
151
|
+
const keys = Object.keys(args);
|
|
152
|
+
if (!keys.some((key) => /\[\d+\]/.test(key))) return args;
|
|
153
|
+
return reconstructFlattenedKeys(args, () => true);
|
|
154
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bastani/subagents",
|
|
3
|
-
"version": "0.8.31-alpha.
|
|
3
|
+
"version": "0.8.31-alpha.4",
|
|
4
4
|
"private": true,
|
|
5
5
|
"description": "Atomic extension for delegating tasks to subagents with chains, parallel execution, and TUI clarification. Fork of: https://github.com/nicobailon/pi-subagents",
|
|
6
6
|
"contributors": [
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bastani/web-access",
|
|
3
|
-
"version": "0.8.31-alpha.
|
|
3
|
+
"version": "0.8.31-alpha.4",
|
|
4
4
|
"private": true,
|
|
5
5
|
"description": "Atomic extension for web search, URL fetching, GitHub repo cloning, PDF/video extraction. Fork of: https://github.com/nicobailon/pi-web-access",
|
|
6
6
|
"contributors": [
|
|
@@ -22,6 +22,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
|
22
22
|
- Changed the builtin `deep-research-codebase`, `goal`, `ralph`, and `open-claude-design` workflows to run their GitHub Copilot `claude-opus-4.8` fallbacks at the model's largest advertised long-context (~1M/936K) window via the new `(1m)` token, automatically degrading to the 200K short window when Copilot's long-context tier is unavailable. Other models in each fallback chain are unaffected.
|
|
23
23
|
- Aligned the workflows extension peer dependency with upstream pi TUI `^0.79.7` so workflow graph, custom UI, and prompt-broker integrations consume the latest shared TUI color-scheme, Warp image capability, and compatibility fixes; no workflows extension code changes were made for this metadata sync ([#1413](https://github.com/bastani-inc/atomic/issues/1413)).
|
|
24
24
|
|
|
25
|
+
### Fixed
|
|
26
|
+
|
|
27
|
+
- Fixed workflow stage **model fallback misreporting configured providers as `No API key found`**. Each fallback candidate session was created with a fresh `AuthStorage`/`ModelRegistry`, so after a primary model failed (for example the Ralph `reviewer-a` chain hitting an unavailable `anthropic/claude-fable-5` and getting a real provider 404), every fallback candidate re-read `auth.json` from scratch. Under concurrent reviewer stages and OAuth token refreshes holding the `auth.json` lock, that fresh synchronous reload could fail and silently fall back to an empty credential set, reporting `No API key found` for `anthropic`/`openai-codex`/`github-copilot` even while sibling reviewer stages used those exact providers successfully. A stage now captures the `ModelRegistry` (and its already-loaded `AuthStorage`) from its first session and threads it into every subsequent fallback candidate, so a successfully-loaded credential store is reused across the whole fallback chain instead of being discarded and re-loaded per candidate. Combined with the coding-agent change that surfaces a real credential-store load failure instead of `No API key found`, a transient store-read failure remains a recoverable/retryable auth failure ([#1431](https://github.com/bastani-inc/atomic/issues/1431)).
|
|
28
|
+
- Fixed post-completion workflow follow-ups replaying the entire model-fallback chain from an unavailable primary instead of resuming on the model the stage settled on. After model fallback succeeded, the stage kept its working `session` but left `sessionPromise` undefined, and `ensureSession()` only checked `sessionPromise` — so a follow-up (`ctx.followUp`/`ctx.steer`/`ensureAttached`, and post-completion `workflow send`/TUI prompts) created a brand-new session from `candidates[0]` (the primary), discarding the working fallback session. For a chain whose primary 404s (e.g. `anthropic/claude-fable-5`), every follow-up re-ran `primary -> 404 -> ... -> working model` and could leave the stage stuck on the unavailable primary. `ensureSession()` now reuses an already-attached session, and `promptWithFallback()` retries the last-settled model first (for both live retained sessions and disk-reattached sessions), restarting the full chain from the primary only if that model fails again retryably ([#1431](https://github.com/bastani-inc/atomic/issues/1431)).
|
|
29
|
+
|
|
25
30
|
## [0.8.30] - 2026-06-17
|
|
26
31
|
|
|
27
32
|
### Changed
|
|
@@ -554,6 +554,7 @@ async function runRalphWorkflow(
|
|
|
554
554
|
model: "github-copilot/gemini-3.1-pro-preview (1m):high",
|
|
555
555
|
fallbackModels: [
|
|
556
556
|
"google/gemini-3.1-pro-preview:high",
|
|
557
|
+
"google-vertex/gemini-3.1-pro-preview:high",
|
|
557
558
|
"openai-codex/gpt-5.5:xhigh",
|
|
558
559
|
"github-copilot/gpt-5.5:xhigh",
|
|
559
560
|
"openai/gpt-5.5:xhigh",
|
|
@@ -723,6 +723,18 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
|
|
|
723
723
|
let candidatesPromise: Promise<WorkflowResolvedModelCandidate[]> | undefined;
|
|
724
724
|
let activeCandidateIndex: number | undefined;
|
|
725
725
|
let selectedModel: string | undefined;
|
|
726
|
+
// A single ModelRegistry (carrying its AuthStorage) reused across every model
|
|
727
|
+
// fallback candidate in this stage. Captured from the first created session
|
|
728
|
+
// and threaded into subsequent candidate sessions so fallback does not rebuild
|
|
729
|
+
// auth/model state per candidate — which can misreport configured providers as
|
|
730
|
+
// "No API key found" under auth.json lock contention (issue #1431).
|
|
731
|
+
let sharedModelRegistry: CreateAgentSessionOptions["modelRegistry"];
|
|
732
|
+
// When true, the next promptWithFallback() call first retries the model the
|
|
733
|
+
// session last settled on (a post-completion follow-up, a subsequent turn, or
|
|
734
|
+
// a reattached session) before replaying the chain from the primary. Set on
|
|
735
|
+
// every successful attempt and by ensureSession()'s reattach branch; cleared
|
|
736
|
+
// when the current session is disposed.
|
|
737
|
+
let resumeCurrentSession = false;
|
|
726
738
|
const modelAttempts: WorkflowModelAttempt[] = [];
|
|
727
739
|
const modelWarnings: string[] = [];
|
|
728
740
|
const pendingFallbackWarnings: string[] = [];
|
|
@@ -748,7 +760,10 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
|
|
|
748
760
|
return candidatesPromise;
|
|
749
761
|
}
|
|
750
762
|
|
|
751
|
-
function stageOptionsForCandidate(
|
|
763
|
+
function stageOptionsForCandidate(
|
|
764
|
+
candidate: WorkflowResolvedModelCandidate | undefined,
|
|
765
|
+
resumeOptions?: { restoreSavedModel?: boolean },
|
|
766
|
+
): StageOptions | undefined {
|
|
752
767
|
const optionsForCandidate: StageOptions = candidate === undefined
|
|
753
768
|
? { ...(effectiveStageOptions ?? {}) }
|
|
754
769
|
: {
|
|
@@ -763,6 +778,12 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
|
|
|
763
778
|
fallbackModels: undefined,
|
|
764
779
|
fallbackThinkingLevels: undefined,
|
|
765
780
|
};
|
|
781
|
+
// When resuming a reattached session (a post-completion follow-up), drop any
|
|
782
|
+
// model override so the SDK restores the model the session last used — the
|
|
783
|
+
// one that actually worked — instead of forcing the primary/candidate model.
|
|
784
|
+
if (resumeOptions?.restoreSavedModel) {
|
|
785
|
+
delete optionsForCandidate.model;
|
|
786
|
+
}
|
|
766
787
|
if (reattachSessionFile !== undefined && optionsForCandidate.sessionManager === undefined) {
|
|
767
788
|
const cwd = optionsForCandidate.cwd ?? process.cwd();
|
|
768
789
|
optionsForCandidate.sessionManager = SessionManager.open(
|
|
@@ -773,6 +794,11 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
|
|
|
773
794
|
optionsForCandidate.context = undefined;
|
|
774
795
|
optionsForCandidate.forkFromSessionFile = undefined;
|
|
775
796
|
}
|
|
797
|
+
// Reuse the registry captured from the first session for later fallback
|
|
798
|
+
// candidates. A caller-supplied modelRegistry is preserved (issue #1431).
|
|
799
|
+
if (sharedModelRegistry !== undefined && optionsForCandidate.modelRegistry === undefined) {
|
|
800
|
+
optionsForCandidate.modelRegistry = sharedModelRegistry;
|
|
801
|
+
}
|
|
776
802
|
return Object.keys(optionsForCandidate).length === 0 ? undefined : optionsForCandidate;
|
|
777
803
|
}
|
|
778
804
|
|
|
@@ -829,6 +855,16 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
|
|
|
829
855
|
function attachSession(created: StageSessionRuntime | StageSessionCreateResult): StageSessionRuntime {
|
|
830
856
|
const result = normalizeSessionCreateResult(created);
|
|
831
857
|
session = result.session;
|
|
858
|
+
// Capture the SDK ModelRegistry from the first real session so subsequent
|
|
859
|
+
// fallback candidates reuse the same already-loaded auth/model state instead
|
|
860
|
+
// of re-creating it per candidate (issue #1431). The test stub session has
|
|
861
|
+
// no modelRegistry, so capture is simply skipped there.
|
|
862
|
+
if (sharedModelRegistry === undefined) {
|
|
863
|
+
const withRegistry = result.session as Partial<Pick<AgentSession, "modelRegistry">>;
|
|
864
|
+
if (withRegistry.modelRegistry !== undefined) {
|
|
865
|
+
sharedModelRegistry = withRegistry.modelRegistry;
|
|
866
|
+
}
|
|
867
|
+
}
|
|
832
868
|
sessionSettingsManager = result.settingsManager ?? result.session.settingsManager;
|
|
833
869
|
if (pendingThinkingLevel !== undefined) {
|
|
834
870
|
result.session.setThinkingLevel(pendingThinkingLevel);
|
|
@@ -851,12 +887,13 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
|
|
|
851
887
|
async function createSession(
|
|
852
888
|
candidate: WorkflowResolvedModelCandidate | undefined,
|
|
853
889
|
consumer: AgentSessionConsumer,
|
|
890
|
+
resumeOptions?: { restoreSavedModel?: boolean },
|
|
854
891
|
): Promise<StageSessionRuntime> {
|
|
855
892
|
applyCandidateThinking(candidate);
|
|
856
893
|
const created = adapters.agentSession
|
|
857
|
-
? await adapters.agentSession.create(stripWorkflowOnlyOptions(stageOptionsForCandidate(candidate)) as StageSessionCreateOptions, {
|
|
894
|
+
? await adapters.agentSession.create(stripWorkflowOnlyOptions(stageOptionsForCandidate(candidate, resumeOptions)) as StageSessionCreateOptions, {
|
|
858
895
|
...meta,
|
|
859
|
-
stageOptions: stageOptionsForCandidate(candidate),
|
|
896
|
+
stageOptions: stageOptionsForCandidate(candidate, resumeOptions),
|
|
860
897
|
})
|
|
861
898
|
: missingAdapter(consumer);
|
|
862
899
|
return attachSession(created);
|
|
@@ -864,12 +901,37 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
|
|
|
864
901
|
|
|
865
902
|
async function ensureSession(consumer: AgentSessionConsumer = "prompt"): Promise<StageSessionRuntime> {
|
|
866
903
|
if (disposed) throw new Error(`atomic-workflows: stage "${stageName}" session has been disposed`);
|
|
904
|
+
// Reuse an already-attached session. After model fallback settles, `session`
|
|
905
|
+
// is set but `sessionPromise` is left undefined; without this guard a
|
|
906
|
+
// follow-up's ensureSession() (via ctx.followUp / ctx.steer / __ensureSession)
|
|
907
|
+
// would create a brand-new session from the primary candidate and discard the
|
|
908
|
+
// working fallback session (issue #1431 follow-up).
|
|
909
|
+
if (session !== undefined) return session;
|
|
867
910
|
if (!sessionPromise) {
|
|
868
911
|
sessionPromise = (async () => {
|
|
869
912
|
if (!hasExplicitModelFallbackConfig) return createSession(undefined, consumer);
|
|
870
913
|
const candidates = await modelCandidates();
|
|
871
914
|
const first = candidates[0];
|
|
872
915
|
if (first === undefined) return createSession(undefined, consumer);
|
|
916
|
+
|
|
917
|
+
// Reattaching a previously-run session (e.g. a post-completion
|
|
918
|
+
// follow-up after the session was disposed): resume on the model the
|
|
919
|
+
// session last settled on — the one that actually worked — instead of
|
|
920
|
+
// replaying the fallback chain from an unavailable primary.
|
|
921
|
+
// promptWithFallback retries that model first; if it fails again it
|
|
922
|
+
// restarts the full chain from the primary.
|
|
923
|
+
if (reattachSessionFile !== undefined) {
|
|
924
|
+
const resumed = await createSession(undefined, consumer, { restoreSavedModel: true });
|
|
925
|
+
const restoredId = workflowModelId(resumed.model);
|
|
926
|
+
const restoredIndex = restoredId === undefined
|
|
927
|
+
? -1
|
|
928
|
+
: candidates.findIndex((entry) => entry.id === restoredId);
|
|
929
|
+
activeCandidateIndex = restoredIndex >= 0 ? restoredIndex : undefined;
|
|
930
|
+
selectedModel = restoredId ?? first.id;
|
|
931
|
+
resumeCurrentSession = true;
|
|
932
|
+
return resumed;
|
|
933
|
+
}
|
|
934
|
+
|
|
873
935
|
activeCandidateIndex = 0;
|
|
874
936
|
selectedModel = first.id;
|
|
875
937
|
return createSession(first, consumer);
|
|
@@ -889,6 +951,7 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
|
|
|
889
951
|
session = undefined;
|
|
890
952
|
sessionPromise = undefined;
|
|
891
953
|
sessionSettingsManager = undefined;
|
|
954
|
+
resumeCurrentSession = false;
|
|
892
955
|
for (const unsubscribe of listenerUnsubscribes.values()) unsubscribe();
|
|
893
956
|
listenerUnsubscribes.clear();
|
|
894
957
|
unsubscribeTerminateWatcher?.();
|
|
@@ -956,14 +1019,61 @@ export function createStageContext(opts: StageRunnerOpts): InternalStageContext
|
|
|
956
1019
|
return;
|
|
957
1020
|
}
|
|
958
1021
|
|
|
959
|
-
let index = activeCandidateIndex ?? 0;
|
|
960
1022
|
const capturedStructuredOutputForAttempt = (): boolean =>
|
|
961
1023
|
structuredOutputCapture?.called === true && signal?.aborted !== true;
|
|
962
1024
|
const recordSuccessfulAttempt = (candidate: WorkflowResolvedModelCandidate): void => {
|
|
963
1025
|
modelAttempts.push({ model: candidate.id, success: true, ...modelAttemptReasoning(candidate) });
|
|
964
1026
|
pendingFallbackWarnings.length = 0;
|
|
1027
|
+
// The session settled on a working model; a later follow-up/turn should
|
|
1028
|
+
// resume on it rather than replaying the chain from the primary.
|
|
1029
|
+
resumeCurrentSession = true;
|
|
965
1030
|
};
|
|
966
1031
|
|
|
1032
|
+
// Resume preamble: when the stage already settled on a working model (a
|
|
1033
|
+
// post-completion follow-up, a subsequent turn, or a reattached session),
|
|
1034
|
+
// retry that model first instead of replaying the chain from an unavailable
|
|
1035
|
+
// primary. If that model now fails retryably, restart the full chain from
|
|
1036
|
+
// the primary.
|
|
1037
|
+
if (resumeCurrentSession && session !== undefined) {
|
|
1038
|
+
resumeCurrentSession = false;
|
|
1039
|
+
const resumedSession = session;
|
|
1040
|
+
const resumedLabel = selectedModel ?? workflowModelId(resumedSession.model) ?? candidates[0]!.id;
|
|
1041
|
+
notifyModelFallbackMetaChange();
|
|
1042
|
+
try {
|
|
1043
|
+
const { terminalScanStartIndex } = await promptWithPauseResume(resumedSession, text, sdkOptions);
|
|
1044
|
+
const terminalFailure = latestTerminalAssistantFailureSince(resumedSession.messages, terminalScanStartIndex);
|
|
1045
|
+
if (terminalFailure === undefined || capturedStructuredOutputForAttempt()) {
|
|
1046
|
+
modelAttempts.push({ model: resumedLabel, success: true });
|
|
1047
|
+
pendingFallbackWarnings.length = 0;
|
|
1048
|
+
resumeCurrentSession = true;
|
|
1049
|
+
return;
|
|
1050
|
+
}
|
|
1051
|
+
throw new WorkflowPromptModelFailure(terminalFailure);
|
|
1052
|
+
} catch (err) {
|
|
1053
|
+
if (capturedStructuredOutputForAttempt() && isRetryableModelFailure(err)) {
|
|
1054
|
+
modelAttempts.push({ model: resumedLabel, success: true });
|
|
1055
|
+
pendingFallbackWarnings.length = 0;
|
|
1056
|
+
resumeCurrentSession = true;
|
|
1057
|
+
return;
|
|
1058
|
+
}
|
|
1059
|
+
const message = errorMessage(err);
|
|
1060
|
+
modelAttempts.push({ model: resumedLabel, success: false, error: message });
|
|
1061
|
+
if (signal?.aborted || !isRetryableModelFailure(err)) {
|
|
1062
|
+
modelWarnings.push(...pendingFallbackWarnings);
|
|
1063
|
+
pendingFallbackWarnings.length = 0;
|
|
1064
|
+
notifyModelFallbackMetaChange();
|
|
1065
|
+
throw err;
|
|
1066
|
+
}
|
|
1067
|
+
// The resumed model failed retryably: restart the whole fallback chain
|
|
1068
|
+
// from the primary. disposeCurrentSession clears resumeCurrentSession.
|
|
1069
|
+
pendingFallbackWarnings.push(`[fallback] resume on ${resumedLabel} failed: ${message}. Restarting fallback from ${candidateLabel(candidates[0]!)}.`);
|
|
1070
|
+
await disposeCurrentSession();
|
|
1071
|
+
activeCandidateIndex = undefined;
|
|
1072
|
+
}
|
|
1073
|
+
}
|
|
1074
|
+
|
|
1075
|
+
let index = activeCandidateIndex ?? 0;
|
|
1076
|
+
|
|
967
1077
|
while (index < candidates.length) {
|
|
968
1078
|
const candidate = candidates[index]!;
|
|
969
1079
|
const activeSession = session && activeCandidateIndex === index
|
|
@@ -563,6 +563,31 @@ export declare class AgentSession {
|
|
|
563
563
|
* Context overflow errors are NOT retryable (handled by compaction instead).
|
|
564
564
|
*/
|
|
565
565
|
private _isRetryableError;
|
|
566
|
+
/**
|
|
567
|
+
* For GitHub Copilot Gemini, reconstruct flattened tool-call arguments
|
|
568
|
+
* (for example `edits[0].newText`) into the nested arrays/objects Gemini
|
|
569
|
+
* produced before the assistant message is persisted, so saved transcripts
|
|
570
|
+
* never carry the flattened CAPI wire shape and replays loaded from disk match
|
|
571
|
+
* the structure Gemini signed. In-place, gated to Copilot Gemini, and a no-op
|
|
572
|
+
* for well-formed arguments or any other provider/model. The outbound replay
|
|
573
|
+
* normalizer still heals already-persisted (legacy) sessions on the wire.
|
|
574
|
+
*/
|
|
575
|
+
private _normalizePersistedGeminiToolArgs;
|
|
576
|
+
/**
|
|
577
|
+
* Detect a degenerate empty completion: the provider ended the stream with no
|
|
578
|
+
* usable content and zero output tokens. Seen with github-copilot Gemini models
|
|
579
|
+
* that emit finish_reason "stop" (or a tool-use stop) with an empty content array
|
|
580
|
+
* and 0 output tokens, leaving the turn dead instead of producing the next step.
|
|
581
|
+
*
|
|
582
|
+
* These are treated as retryable so the harness re-issues the request rather than
|
|
583
|
+
* silently stopping mid-task. Guarded tightly (no text, no tool call, no thinking,
|
|
584
|
+
* and output === 0) so legitimate non-empty turns are never matched.
|
|
585
|
+
*
|
|
586
|
+
* Intentionally provider-agnostic (not gated to Copilot Gemini): a degenerate
|
|
587
|
+
* empty turn is a transient failure for any provider. It is bounded by
|
|
588
|
+
* `maxRetries` and falls through to normal handling on exhaustion.
|
|
589
|
+
*/
|
|
590
|
+
private _isEmptyCompletion;
|
|
566
591
|
private _handleRetryableError;
|
|
567
592
|
/**
|
|
568
593
|
* Cancel in-progress retry.
|