@bastani/atomic 0.8.31-alpha.3 → 0.8.31-alpha.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/dist/builtin/cursor/CHANGELOG.md +1 -1
- package/dist/builtin/cursor/package.json +2 -2
- package/dist/builtin/intercom/package.json +1 -1
- package/dist/builtin/mcp/CHANGELOG.md +5 -0
- package/dist/builtin/mcp/direct-tools.ts +4 -2
- package/dist/builtin/mcp/package.json +1 -1
- package/dist/builtin/mcp/proxy-modes.ts +4 -2
- package/dist/builtin/mcp/utils.ts +25 -0
- package/dist/builtin/subagents/package.json +1 -1
- package/dist/builtin/web-access/package.json +1 -1
- package/dist/builtin/workflows/CHANGELOG.md +9 -0
- package/dist/builtin/workflows/builtin/ralph-review-gate.ts +89 -0
- package/dist/builtin/workflows/builtin/ralph.ts +16 -51
- package/dist/builtin/workflows/package.json +1 -1
- package/dist/builtin/workflows/src/extension/dispatcher.ts +3 -0
- package/dist/builtin/workflows/src/extension/index.ts +15 -0
- package/dist/builtin/workflows/src/extension/runtime.ts +7 -0
- package/dist/builtin/workflows/src/runs/foreground/executor.ts +103 -7
- package/dist/builtin/workflows/src/runs/foreground/stage-runner.ts +133 -10
- package/dist/builtin/workflows/src/shared/persistence-restore.ts +2 -0
- package/dist/core/agent-session.d.ts +25 -0
- package/dist/core/agent-session.d.ts.map +1 -1
- package/dist/core/agent-session.js +124 -8
- package/dist/core/agent-session.js.map +1 -1
- package/dist/core/auth-guidance.d.ts +12 -0
- package/dist/core/auth-guidance.d.ts.map +1 -1
- package/dist/core/auth-guidance.js +24 -0
- package/dist/core/auth-guidance.js.map +1 -1
- package/dist/core/auth-storage.d.ts +42 -0
- package/dist/core/auth-storage.d.ts.map +1 -1
- package/dist/core/auth-storage.js +71 -10
- package/dist/core/auth-storage.js.map +1 -1
- package/dist/core/copilot-gemini-payload-sanitizer.d.ts +72 -0
- package/dist/core/copilot-gemini-payload-sanitizer.d.ts.map +1 -0
- package/dist/core/copilot-gemini-payload-sanitizer.js +296 -0
- package/dist/core/copilot-gemini-payload-sanitizer.js.map +1 -0
- package/dist/core/copilot-gemini-reasoning.d.ts +118 -0
- package/dist/core/copilot-gemini-reasoning.d.ts.map +1 -0
- package/dist/core/copilot-gemini-reasoning.js +260 -0
- package/dist/core/copilot-gemini-reasoning.js.map +1 -0
- package/dist/core/copilot-gemini-tool-arguments.d.ts +42 -0
- package/dist/core/copilot-gemini-tool-arguments.d.ts.map +1 -0
- package/dist/core/copilot-gemini-tool-arguments.js +179 -0
- package/dist/core/copilot-gemini-tool-arguments.js.map +1 -0
- package/dist/core/flattened-tool-arguments.d.ts +41 -0
- package/dist/core/flattened-tool-arguments.d.ts.map +1 -0
- package/dist/core/flattened-tool-arguments.js +136 -0
- package/dist/core/flattened-tool-arguments.js.map +1 -0
- package/dist/core/http-dispatcher.d.ts.map +1 -1
- package/dist/core/http-dispatcher.js +5 -0
- package/dist/core/http-dispatcher.js.map +1 -1
- package/dist/core/sdk.d.ts.map +1 -1
- package/dist/core/sdk.js +38 -8
- package/dist/core/sdk.js.map +1 -1
- package/dist/core/session-manager.d.ts +1 -1
- package/dist/core/session-manager.d.ts.map +1 -1
- package/dist/core/session-manager.js.map +1 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/docs/providers.md +1 -0
- package/docs/sessions.md +4 -0
- package/docs/workflows.md +7 -1
- package/examples/extensions/gondolin/package-lock.json +183 -183
- package/package.json +2 -2
package/CHANGELOG.md
CHANGED
|
@@ -23,6 +23,16 @@
|
|
|
23
23
|
|
|
24
24
|
### Fixed
|
|
25
25
|
|
|
26
|
+
- Exposed `SessionManager.usesDefaultSessionDir()` through the read-only extension session-manager surface so bundled extensions can distinguish default global session storage from non-default `--session-dir`, `ATOMIC_CODING_AGENT_SESSION_DIR`, or settings-backed session directories without path guessing ([#1444](https://github.com/bastani-inc/atomic/issues/1444)).
|
|
27
|
+
- Fixed `github-copilot/*` Gemini models (for example `github-copilot/gemini-3.1-pro-preview` and `github-copilot/gemini-3.5-flash`) failing **every** chat turn with `Error: 400 invalid request body`. These models are served through GitHub's Copilot API (CAPI), which translates the OpenAI chat-completions request into a Google GenAI `GenerateContent` request and forwards tool/function JSON Schema `anyOf`/`oneOf` verbatim into Gemini's `FunctionDeclaration` schema. Gemini rejects a union whose branch is a complex **object** schema, so Google returned HTTP 400 and CAPI relabelled it `{"error":{"code":"invalid_request_body"}}`. Because Atomic's bundled `workflow` tool — and any tool using the TypeBox `Type.Union([Type.Object(...), Type.String()])` pattern for fields such as `task`, `chain`, and `parallel` — is present in normal chat turns, the request failed before the model ever ran (it was previously masked only when a fallback model existed). Atomic now sanitizes outbound tool JSON Schemas for GitHub Copilot Gemini models into the subset CAPI/Gemini honors: it resolves object/array-bearing `anyOf`/`oneOf` to their most expressive branch, converts `const`/literal unions to `enum`, collapses nullable unions to `nullable`, prunes `required` to existing properties, and drops non-portable keywords (`additionalProperties`, `patternProperties`, `$schema`, `format`, `pattern`, numeric/length bounds, `default`, `title`, etc.). The transform is gated to `github-copilot` Gemini `openai-completions` models and runs last in the provider-payload pipeline (so it also covers extension/SDK-injected tools), leaving every other provider/model payload unchanged.
|
|
28
|
+
- Fixed `github-copilot/*` Gemini models getting stuck in an infinite tool-call retry loop (most visibly on the workflow `structured_output` tool). Capturing the raw CAPI stream confirmed that Gemini serializes array/object function-call arguments as **flattened indexed keys** on the wire — for example `{ keywords: ["a", "b"] }` arrives as `{ "keywords[0]": "a", "keywords[1]": "b" }` — so schema validation failed (`keywords: must have required properties keywords` and `root: must not have additional properties`) and the model re-emitted the same shape forever. Atomic now reconstructs flattened tool-call arguments (`name[i]`, `name[i].sub`, `parent.child`) back into proper arrays/objects in each tool's `prepareArguments` step, before validation runs. Gated to GitHub Copilot Gemini models at call time and a no-op for well-formed arguments, so it covers built-in, extension, SDK, and MCP tools without affecting any other provider/model.
|
|
29
|
+
- Fixed `github-copilot/*` Gemini models (for example `github-copilot/gemini-3.1-pro-preview`) silently dying mid-task instead of continuing the turn. Inspecting the affected sessions and confirming against GitHub's Copilot API (CAPI) source showed two distinct degenerate stream endings that Atomic was not recovering from: (1) CAPI's `getAzureFinishReason` maps several Gemini finish reasons — `MALFORMED_FUNCTION_CALL`, `OTHER`, `LANGUAGE`, and `UNEXPECTED_TOOL_CALL` — to a bare OpenAI `finish_reason: "error"`, which `pi-ai` surfaces as `"Provider finish_reason: error"`; the auto-retry classifier's regex did not match it, so the turn ended with an empty assistant message and no retry; and (2) Gemini intermittently ends the stream with `finish_reason: "stop"`, an **empty content array**, and **0 output tokens**, which Atomic treated as a successful (if empty) turn and stopped. Atomic now treats bare `finish_reason: error`/`content_filter` as retryable and detects degenerate empty completions (no text/tool-call/thinking content **and** zero output tokens on a `stop`/`toolUse` turn) as retryable, re-issuing the request with the existing exponential-backoff path. Empty `stop` completions also no longer reset the auto-retry counter, so repeated empties stay bounded by `maxRetries` instead of retrying forever.
|
|
30
|
+
- Fixed the **root cause** behind `github-copilot/*` Gemini (for example `github-copilot/gemini-3.1-pro-preview`) returning repeated empty completions and "stopping to respond" after its first tool call. Gemini is a thinking model: each function/tool call it emits comes with an opaque **thought signature** that must be replayed, verbatim, on the next request or Gemini refuses to continue the reasoning chain. Confirmed against GitHub's Copilot API (CAPI) source, CAPI carries that signature in a non-standard `reasoning_opaque` field on the assistant message / streamed delta and reads the same `reasoning_opaque` back off the assistant message on replay to re-attach the signature to each Gemini function-call part (keyed by `tool_call.id`). The bundled `pi-ai` OpenAI-completions client never captured or replayed `reasoning_opaque` (it only round-trips the OpenRouter-style `reasoning_details: [{ type: "reasoning.encrypted", id, data }]` shape, which CAPI does not emit), so the real Gemini thought signature was dropped inbound and never sent back. With it missing, CAPI substitutes the sentinel `skip_thought_signature_validator` on the first replayed function call and Gemini responds with an empty candidate / `finish_reason: "stop"` and zero output tokens — which the empty-completion retry above then re-issued against the same signature-less history until `maxRetries` was exhausted. Atomic now bridges `reasoning_opaque` to the mechanism the client already round-trips: a `globalThis.fetch` interceptor scoped to `*.githubcopilot.com` event streams rewrites each CAPI Gemini SSE delta that carries both `reasoning_opaque` and a `tool_calls[].id` to add a matching `reasoning_details` entry (captured by the client as the tool call's `thoughtSignature`), and a provider-payload (`onPayload`) transform converts the `reasoning_details` the client re-emits on replayed assistant messages back into the single `reasoning_opaque` field CAPI reads. Both transforms are gated to GitHub Copilot Gemini `openai-completions` models and are no-ops for every other provider/model and for Gemini turns that carry no thought signature; the thinking text round-trips inside the same opaque blob, so combined think-then-tool-call turns keep their signatures across session save/load.
|
|
31
|
+
- Fixed a second `github-copilot/*` Gemini multi-turn failure that surfaced once thought signatures were preserved: a turn after any **array/object tool call** (most visibly `edit`) ended with a bare `finish_reason: "error"` and then retried to exhaustion. CAPI delivers Gemini's array/object function-call arguments as **flattened indexed keys** (for example an `edit` call arrives as `{ "edits[0].newText": "...", "edits[0].oldText": "...", "path": "..." }`), and Atomic only reconstructed them at tool **execution** time — the persisted assistant message kept the raw flattened keys. On the next turn that message was replayed verbatim, CAPI parsed those literal keys straight into the Gemini `FunctionCall.Args`, and the resulting call no longer matched the tool's declared schema (nor the structure Gemini originally signed), so Gemini ended the turn with `MALFORMED_FUNCTION_CALL` / `UNEXPECTED_TOOL_CALL` / `OTHER` — all of which CAPI maps to a bare OpenAI `finish_reason: "error"`. Atomic now also reconstructs flattened tool-call arguments on the **outbound replay payload** for GitHub Copilot Gemini: each replayed assistant `tool_calls[].function.arguments` is unflattened (reusing the same `unflattenGeminiToolArguments` logic with the tool's own parameter schema, looked up from the request `tools`) back into the nested arrays/objects Gemini produced, before the request reaches CAPI. This runs in the provider-payload pipeline after schema sanitization and alongside the `reasoning_opaque` restore, is gated to GitHub Copilot Gemini `openai-completions` models, fails open on non-JSON arguments, and is a no-op for already well-formed arguments — healing both new sessions and already-persisted transcripts that contain flattened Gemini tool calls.
|
|
32
|
+
- Reduced `github-copilot/*` Gemini `MALFORMED_FUNCTION_CALL` failures (surfaced as `finish_reason: "error"`) by emitting tool/function JSON Schemas in the shape Gemini resolves most reliably. The Gemini schema sanitizer now infers an explicit `type` on container nodes that omit one (`properties`/`required` ⇒ `object`, `items` ⇒ `array`) and collapses a tuple-form `items` array — which Gemini's single-`items` function-declaration schema rejects — into a single (most expressive object/array) schema. Gated to `github-copilot` Gemini `openai-completions` models and applied last in the provider-payload pipeline, so every other provider/model payload is unchanged.
|
|
33
|
+
- Fixed `github-copilot/*` Gemini tool calls with **nested object arguments but no arrays** still failing validation and looping. CAPI flattens such arguments to purely dotted keys (for example `{ "metadata.confidence": 0.5 }` with no bracket index anywhere), which the previous reconstruction — gated on the presence of a `name[<digit>]` bracket key — skipped, so the nested-object call never validated. Atomic now also reconstructs purely dotted keys, disambiguated by the tool's own parameter schema: a dotted key is split into a nested path only when its head segment names an object/array container property (including container branches of an `anyOf`/`oneOf` union), so legitimate argument keys that happen to contain a dot are left intact. Bracket-indexed reconstruction is unchanged, and the transform remains gated to GitHub Copilot Gemini models and a no-op for well-formed arguments.
|
|
34
|
+
- Hardened the GitHub Copilot Gemini tool-argument reconstruction against prototype pollution. `unflattenGeminiToolArguments` previously walked model-emitted key paths into a fresh object without guarding `__proto__`/`constructor`/`prototype`, so a steered Gemini tool call mixing a bracket key with e.g. `__proto__.polluted` could reach and mutate `Object.prototype` process-wide. Reconstruction now drops any key whose parsed path contains one of those segments (at any position, including the final segment and a literal plain key). The parse/assign/compact reconstruction (and this single guard) lives in one canonical module shared with the `@bastani/mcp` `callTool` normalizer, so the two implementations can no longer diverge on the fix.
|
|
35
|
+
- Scoped the GitHub Copilot Gemini `content_filter` retry. The earlier finish-reason retry change treated `finish_reason: "content_filter"` as retryable for **every** provider/model; a genuine `content_filter` safety block on a non-Gemini provider would therefore be re-issued up to `maxRetries` times before its inevitable failure. `content_filter` is now retried only for GitHub Copilot Gemini models (where CAPI maps spurious Gemini RECITATION/safety blocks to it); a bare `finish_reason: "error"` remains retryable for all providers as a generic transient failure.
|
|
26
36
|
- Fixed RPC unknown-command errors to include the request id so RPC clients do not hang waiting for a response.
|
|
27
37
|
- Fixed `/model` autocomplete and model-selection searches to match provider/model queries regardless of whether the provider or model token is typed first.
|
|
28
38
|
- Fixed the tree navigator to horizontally pan deep entries so the selected item remains readable.
|
|
@@ -31,6 +41,9 @@
|
|
|
31
41
|
- Fixed context-window startup, session-switch, settings, and RPC edge cases: unknown provider fallback models no longer inherit selectable context-window options from provider defaults, fatal startup diagnostics no longer persist context-window settings, `AgentSession.setModel()` preserves an incoming target model's explicit selected context window, model-switch paths that change effective context windows now notify listeners via `context_window_changed`, the interactive context-window picker keys selection on raw token counts so colliding formatted labels never change which window is selected, RPC `set_model` returns the effective post-switch session model, and explicit startup `contextWindow` selections are journaled even when they equal the model scalar default ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
|
|
32
42
|
- Fixed `AgentSession.setContextWindow()` so bare SDK/runtime calls update the active session, append `context_window_change`, and emit `context_window_changed` without persisting settings; callers must pass `{ persistDefault: true }` to update the active model's `defaultContextWindows["provider/modelId"]` setting ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
|
|
33
43
|
- Fixed `packages/coding-agent` source-CLI subprocess tests (`session-id-readonly`, `startup-session-name`, `stdout-cleanliness`) crashing with `ERR_MODULE_NOT_FOUND` (for example `src/core/tools/oversized-tool-result.js`) when the Vitest worker pool runs under Node. They now launch the TypeScript source CLI with Bun explicitly via a `bunExecutable()` helper (matching `context-window-cli`/`rpc-context-window`) instead of assuming `process.execPath` is Bun, so the package test suite is portable across environments. The repo-wide `.js`->`.ts` source-import convention and shipped `dist/` are unchanged ([#1419](https://github.com/bastani-inc/atomic/issues/1419)).
|
|
44
|
+
- Fixed a credential-store **load failure being misreported as `No API key found`**. When a fresh `AuthStorage` could not read `auth.json` (for example it was briefly locked by a concurrent process, surfacing an `ELOCKED` error), `reload()` recorded the error but left an empty in-memory credential set, and the prompt preflight then threw `No API key found for <provider>` — even though the credentials existed on disk. `AuthStorage` now exposes `getLoadError()`, and the prompt preflight surfaces the real load failure (`Could not load stored credentials for <provider>: …`, with the original error attached as `cause`) instead of claiming the key is absent, so a transient store-read failure is no longer indistinguishable from genuinely missing credentials. The message intentionally still reads as a recoverable auth failure so model fallback keeps retrying ([#1431](https://github.com/bastani-inc/atomic/issues/1431)).
|
|
45
|
+
- Fixed `createAgentSession()` constructing a throwaway `AuthStorage` even when a `modelRegistry` was supplied. Because `AuthStorage` eagerly calls `reload()` in its constructor — taking the `auth.json` file lock — building one only to discard it added redundant lock contention on every session creation. `createAgentSession()` now only creates an `AuthStorage` when neither a `modelRegistry` nor an `authStorage` is provided, so callers that reuse one registry across sessions (such as workflow stage model fallback) no longer trigger an extra contended credential reload per session ([#1431](https://github.com/bastani-inc/atomic/issues/1431)).
|
|
46
|
+
- Fixed the remaining `auth.json` **lock-contention hard failure** under many concurrent sessions (for example a workflow that fans out parallel stages through model fallback). `AuthStorage.reload()` previously acquired the exclusive `proper-lockfile` write lock just to *read* `auth.json`, and its sync acquisition (`acquireLockSyncWithRetry`) used a 200 ms **event-loop-blocking busy-wait**; when one stage held the lock across an async OAuth token refresh, sibling stages busy-waited (starving the very event loop the holder needed to release), gave up with `ELOCKED`, and recorded a credential load failure. With the #1431 message fix in place this no longer misreported as `No API key found`, but it could still burn a stage's configured fallback candidates (each skipped as a recoverable auth error) until the chain exhausted and the stage hard-failed. Pure reads are now **lock-free**: `AuthStorageBackend` gains an optional `read()` method (built-in backends implement it; custom backends that omit it fall back to the previous locked read, so the released interface stays compatible) and `reload()` uses it without taking any lock, while writers persist `auth.json` **atomically** (sibling temp file + `rename`) so a lock-free reader always observes a complete previous-or-next snapshot, never a torn one. The exclusive lock is retained only for read-modify-write paths (credential `set`/`remove` and locked OAuth refresh), and file permissions stay `0600`. Concurrent session creation no longer contends on or is starved by the credential store ([#1431](https://github.com/bastani-inc/atomic/issues/1431)).
|
|
34
47
|
|
|
35
48
|
## [0.8.30] - 2026-06-17
|
|
36
49
|
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
### Changed
|
|
6
6
|
|
|
7
|
-
- Published a synchronized Atomic 0.8.31-alpha.
|
|
7
|
+
- Published a synchronized Atomic 0.8.31-alpha.5 prerelease; no functional Cursor provider changes were made after 0.8.30.
|
|
8
8
|
|
|
9
9
|
## [0.8.30] - 2026-06-17
|
|
10
10
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bastani/cursor",
|
|
3
|
-
"version": "0.8.31-alpha.
|
|
3
|
+
"version": "0.8.31-alpha.5",
|
|
4
4
|
"private": true,
|
|
5
5
|
"description": "Experimental first-party Atomic extension for Cursor OAuth, model discovery, and streaming provider registration.",
|
|
6
6
|
"contributors": [
|
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
}
|
|
41
41
|
},
|
|
42
42
|
"dependencies": {
|
|
43
|
-
"@bastani/atomic-natives": "0.8.31-alpha.
|
|
43
|
+
"@bastani/atomic-natives": "0.8.31-alpha.5",
|
|
44
44
|
"@bufbuild/protobuf": "^2.0.0"
|
|
45
45
|
}
|
|
46
46
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bastani/intercom",
|
|
3
|
-
"version": "0.8.31-alpha.
|
|
3
|
+
"version": "0.8.31-alpha.5",
|
|
4
4
|
"private": true,
|
|
5
5
|
"description": "Atomic extension providing a private coordination channel between parent and child agent sessions. Fork of: https://github.com/nicobailon/pi-intercom",
|
|
6
6
|
"contributors": [
|
|
@@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
### Fixed
|
|
11
|
+
|
|
12
|
+
- Hardened `unflattenToolArguments` against prototype pollution: a flattened key whose path walks through `__proto__`, `constructor`, or `prototype` (at any position, including the final segment and a literal plain key) is now dropped instead of being written, so a model-emitted key such as `__proto__.polluted` can no longer reach and mutate `Object.prototype`. The reconstruction logic (parse/assign/compact plus this guard) is now imported from a single canonical implementation in `@bastani/atomic` (`reconstructFlattenedKeys`) instead of being duplicated in `packages/mcp/utils.ts`, so the host-runtime and MCP `callTool` paths can no longer drift (the previous near-duplicate copies had already diverged on the security guard). Behavior for well-formed and ordinary flattened arguments is unchanged.
|
|
13
|
+
- Fixed MCP tool calls failing under GitHub Copilot Gemini models (e.g. `github-copilot/gemini-3.1-pro-preview`). Gemini, served through Copilot's CAPI/GenAI gateway, serializes array/object function-call arguments as flattened indexed keys on the wire — for example `{ keywords: ["a", "b"] }` arrives as `{ "keywords[0]": "a", "keywords[1]": "b" }` — which MCP servers reject as invalid arguments. The extension now normalizes arguments at the `callTool` boundary (both direct-tool and proxy/gateway paths) via `unflattenToolArguments`, reconstructing `name[i]`, `name[i].sub`, and `parent.child` keys back into proper arrays/objects before they reach the server. The normalizer is provider-agnostic and self-gating (a no-op unless flattened keys are present), so well-formed arguments — including those already normalized by the host runtime — pass through untouched.
|
|
14
|
+
|
|
10
15
|
### Changed
|
|
11
16
|
|
|
12
17
|
- Aligned the MCP extension peer dependencies with upstream pi AI/TUI `^0.79.7` so MCP-backed sessions can use the host's latest provider catalog, model-search, theme/color-scheme, Warp image capability, and shared TUI compatibility fixes; no MCP extension code changes were made for this metadata sync ([#1413](https://github.com/bastani-inc/atomic/issues/1413)).
|
|
@@ -10,7 +10,7 @@ import { maybeStartUiSession, type UiSessionRuntime } from "./ui-session.ts";
|
|
|
10
10
|
import { formatToolName, isToolExcluded } from "./types.ts";
|
|
11
11
|
import { resourceNameToToolName } from "./resource-tools.ts";
|
|
12
12
|
import { authenticate, supportsOAuth } from "./mcp-auth-flow.ts";
|
|
13
|
-
import { formatAuthRequiredMessage } from "./utils.ts";
|
|
13
|
+
import { formatAuthRequiredMessage, unflattenToolArguments } from "./utils.ts";
|
|
14
14
|
|
|
15
15
|
const BUILTIN_NAMES = new Set(["read", "bash", "edit", "write", "grep", "find", "ls", "mcp"]);
|
|
16
16
|
|
|
@@ -369,7 +369,9 @@ export function createDirectToolExecutor(
|
|
|
369
369
|
|
|
370
370
|
const resultPromise = connection.client.callTool({
|
|
371
371
|
name: spec.originalName,
|
|
372
|
-
|
|
372
|
+
// Normalize provider-flattened argument keys (e.g. Gemini's `keywords[0]`)
|
|
373
|
+
// back into arrays/objects before the MCP server validates them.
|
|
374
|
+
arguments: unflattenToolArguments(params),
|
|
373
375
|
_meta: uiSession?.requestMeta,
|
|
374
376
|
});
|
|
375
377
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bastani/mcp",
|
|
3
|
-
"version": "0.8.31-alpha.
|
|
3
|
+
"version": "0.8.31-alpha.5",
|
|
4
4
|
"private": true,
|
|
5
5
|
"description": "Atomic extension that adapts MCP (Model Context Protocol) servers into the coding agent. Fork of: https://github.com/nicobailon/pi-mcp-adapter",
|
|
6
6
|
"contributors": [
|
|
@@ -6,7 +6,7 @@ import { lazyConnect, updateServerMetadata, updateMetadataCache, getFailureAgeSe
|
|
|
6
6
|
import { buildToolMetadata, getToolNames, findToolByName, formatSchema } from "./tool-metadata.ts";
|
|
7
7
|
import { transformMcpContent } from "./tool-registrar.ts";
|
|
8
8
|
import { maybeStartUiSession, type UiSessionRuntime } from "./ui-session.ts";
|
|
9
|
-
import { formatAuthRequiredMessage, truncateAtWord } from "./utils.ts";
|
|
9
|
+
import { formatAuthRequiredMessage, truncateAtWord, unflattenToolArguments } from "./utils.ts";
|
|
10
10
|
import { authenticate, supportsOAuth } from "./mcp-auth-flow.ts";
|
|
11
11
|
|
|
12
12
|
type ProxyToolResult = AgentToolResult<Record<string, unknown>>;
|
|
@@ -718,7 +718,9 @@ export async function executeCall(
|
|
|
718
718
|
|
|
719
719
|
const resultPromise = connection.client.callTool({
|
|
720
720
|
name: toolMeta.originalName,
|
|
721
|
-
|
|
721
|
+
// Normalize provider-flattened argument keys (e.g. Gemini's `keywords[0]`)
|
|
722
|
+
// back into arrays/objects before the MCP server validates them.
|
|
723
|
+
arguments: unflattenToolArguments(args),
|
|
722
724
|
_meta: uiSession?.requestMeta,
|
|
723
725
|
});
|
|
724
726
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { ExtensionAPI } from "@bastani/atomic";
|
|
2
|
+
import { reconstructFlattenedKeys } from "@bastani/atomic";
|
|
2
3
|
import { homedir, platform } from "node:os";
|
|
3
4
|
import { join } from "node:path";
|
|
4
5
|
import type { McpConfig, ServerEntry } from "./types.ts";
|
|
@@ -127,3 +128,27 @@ export function extractToolUiStreamMode(toolMeta: Record<string, unknown> | unde
|
|
|
127
128
|
}
|
|
128
129
|
return undefined;
|
|
129
130
|
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Reconstruct flattened tool-call arguments into proper nested arrays/objects.
|
|
134
|
+
*
|
|
135
|
+
* Some upstream providers — notably GitHub Copilot Gemini models proxied through
|
|
136
|
+
* Google's GenAI API — serialize array/object function-call arguments as
|
|
137
|
+
* flattened, indexed keys on the wire. For example a tool called with
|
|
138
|
+
* `{ keywords: ["a", "b"] }` arrives as `{ "keywords[0]": "a", "keywords[1]": "b" }`,
|
|
139
|
+
* which an MCP server then rejects as invalid arguments.
|
|
140
|
+
*
|
|
141
|
+
* This normalizer runs at the MCP `callTool` boundary so arguments are correct
|
|
142
|
+
* regardless of how the model/provider serialized them. It is provider-agnostic
|
|
143
|
+
* and **self-gating**: it is a no-op unless at least one bracket-indexed key
|
|
144
|
+
* (`name[<digit>]`) is present, so well-formed arguments pass through untouched
|
|
145
|
+
* (including arguments already normalized upstream by the host runtime).
|
|
146
|
+
*/
|
|
147
|
+
export function unflattenToolArguments(
|
|
148
|
+
args: Record<string, unknown> | null | undefined,
|
|
149
|
+
): Record<string, unknown> {
|
|
150
|
+
if (args === null || args === undefined) return {};
|
|
151
|
+
const keys = Object.keys(args);
|
|
152
|
+
if (!keys.some((key) => /\[\d+\]/.test(key))) return args;
|
|
153
|
+
return reconstructFlattenedKeys(args, () => true);
|
|
154
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bastani/subagents",
|
|
3
|
-
"version": "0.8.31-alpha.
|
|
3
|
+
"version": "0.8.31-alpha.5",
|
|
4
4
|
"private": true,
|
|
5
5
|
"description": "Atomic extension for delegating tasks to subagents with chains, parallel execution, and TUI clarification. Fork of: https://github.com/nicobailon/pi-subagents",
|
|
6
6
|
"contributors": [
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bastani/web-access",
|
|
3
|
-
"version": "0.8.31-alpha.
|
|
3
|
+
"version": "0.8.31-alpha.5",
|
|
4
4
|
"private": true,
|
|
5
5
|
"description": "Atomic extension for web search, URL fetching, GitHub repo cloning, PDF/video extraction. Fork of: https://github.com/nicobailon/pi-web-access",
|
|
6
6
|
"contributors": [
|
|
@@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
|
12
12
|
|
|
13
13
|
### Added
|
|
14
14
|
|
|
15
|
+
- Added a deterministic workflow-stage resume stop hook: after an interactive interrupt/pause is resumed with a message, the executor suppresses the #1099/#1264 readiness prompt for that resume-answer turn (including `ask_user_question` turns) and, when the stage remains promptable, sends `Continue where you left off.` in the same stage session once per resume; schema-backed stages that already finalized with `structured_output` consume the token without a second prompt ([#1407](https://github.com/bastani-inc/atomic/issues/1407)).
|
|
15
16
|
- Added a QA end-to-end proof video to the builtin `ralph` workflow. For UI-applicable or full-stack changes, the orchestrator now runs a `playwright-cli` end-to-end QA pass that drives the running app like a user, records a reviewable video (`playwright-cli video-start`/`video-stop`) to a stable run path, references it in the implementation notes (`## QA E2E Video`), and exposes it as the new optional `qa_video_path` output so the proof is available when the orchestrator finishes. When `create_pr=true`, the final `pull-request` stage attaches or links that video to the created PR/MR/review (embedding/linking where the provider supports media uploads, otherwise surfacing the absolute path). When no user-visible UI scenario applies, no video is produced and the notes record why.
|
|
16
17
|
- Added a per-model context-window authoring token to workflow model strings: a parenthesized size token placed in the model-name portion, *before* the optional `:reasoning` suffix, e.g. `github-copilot/claude-opus-4.8 (1m):xhigh`. Adopting GitHub Copilot's `Claude Opus 4.8 (1M context)` naming convention keeps the window separate from the reasoning level so the two never collide. The token is resolved against the candidate model's advertised windows — an exact match wins, otherwise the largest supported window not exceeding the request (so `(1m)` selects a model's ~936K long-context tier), and it falls back to the model's default (short) window when no larger tier is available. It applies only to the candidate that carries the token, leaving primary and other fallback models untouched. Also surfaced `contextWindow`/`contextWindowStrict` on `StageOptions` and the workflow tool's direct-task schema for stage-level selection.
|
|
17
18
|
|
|
@@ -22,6 +23,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
|
22
23
|
- Changed the builtin `deep-research-codebase`, `goal`, `ralph`, and `open-claude-design` workflows to run their GitHub Copilot `claude-opus-4.8` fallbacks at the model's largest advertised long-context (~1M/936K) window via the new `(1m)` token, automatically degrading to the 200K short window when Copilot's long-context tier is unavailable. Other models in each fallback chain are unaffected.
|
|
23
24
|
- Aligned the workflows extension peer dependency with upstream pi TUI `^0.79.7` so workflow graph, custom UI, and prompt-broker integrations consume the latest shared TUI color-scheme, Warp image capability, and compatibility fixes; no workflows extension code changes were made for this metadata sync ([#1413](https://github.com/bastani-inc/atomic/issues/1413)).
|
|
24
25
|
|
|
26
|
+
### Fixed
|
|
27
|
+
|
|
28
|
+
- Fixed workflow stage transcripts ignoring the host's resolved non-default session directory in headless runs. Stages without an explicit `sessionDir` now inherit the active main-session directory when it comes from `--session-dir`, `ATOMIC_CODING_AGENT_SESSION_DIR`, or settings; explicit per-stage `sessionDir` still wins, default host sessions keep writing stages to the global store, and forked stages inherit the non-default directory too ([#1444](https://github.com/bastani-inc/atomic/issues/1444)).
|
|
29
|
+
- Fixed a manual workflow pause/resume not updating the main-chat run status the way the `workflow` tool and `/workflow pause`/`/workflow resume` do. Pausing a stage from the attached stage chat (Escape) or any direct live-handle path recorded only the **stage** as paused (`recordStagePaused`) and never the **run** (`recordRunPaused`), so the below-editor status widget and `/workflow status` kept showing the run as `running` (`●`) even though work was paused; resume had the symmetric gap. The executor stage-control handle now records run-level pause/resume itself — marking the run paused once no stage is still actively running (mirroring `pauseRun`'s all-active-stages-paused rule) and restoring it on resume — so manual and tool-driven pause/resume update the main chat identically. Both run-level transitions are idempotent, so the tool/slash path and cascade re-entry stay safe.
|
|
30
|
+
- Fixed the builtin `ralph` workflow review loop iterating until `max_loops` even when reviewers judged the patch correct. The unanimous-approval gate required a literally empty `findings` array, so a single low-priority **P3** nit — or a placeholder/dummy finding a reviewer appended because it wrongly believed an empty array would fail schema validation — kept the loop spinning despite every reviewer reporting `overall_correctness: "patch is correct"`. Approval is now **severity-aware and deterministic**: a reviewer approves when it judged the patch correct, reported no `reviewer_error`, and filed no *blocking* finding, where blocking = **P0/P1/P2** (priority 0/1/2) and **P3** (priority 3) is a non-blocking nice-to-have; a finding without a determinable priority (`null`/`undefined`) is treated as blocking so ambiguity never silently approves. The decision is computed from finding priorities rather than the reviewer's self-reported `stop_review_loop` flag. Extracted the gate into `builtin/ralph-review-gate.ts` (`reviewDecisionApproved`, `isBlockingFinding`) with unit coverage, and updated the reviewer prompt so an empty `findings` array is explicitly valid and placeholder findings are never fabricated ([#1407](https://github.com/bastani-inc/atomic/issues/1407)).
|
|
31
|
+
- Fixed workflow stage **model fallback misreporting configured providers as `No API key found`**. Each fallback candidate session was created with a fresh `AuthStorage`/`ModelRegistry`, so after a primary model failed (for example the Ralph `reviewer-a` chain hitting an unavailable `anthropic/claude-fable-5` and getting a real provider 404), every fallback candidate re-read `auth.json` from scratch. Under concurrent reviewer stages and OAuth token refreshes holding the `auth.json` lock, that fresh synchronous reload could fail and silently fall back to an empty credential set, reporting `No API key found` for `anthropic`/`openai-codex`/`github-copilot` even while sibling reviewer stages used those exact providers successfully. A stage now captures the `ModelRegistry` (and its already-loaded `AuthStorage`) from its first session and threads it into every subsequent fallback candidate, so a successfully-loaded credential store is reused across the whole fallback chain instead of being discarded and re-loaded per candidate. Combined with the coding-agent change that surfaces a real credential-store load failure instead of `No API key found`, a transient store-read failure remains a recoverable/retryable auth failure ([#1431](https://github.com/bastani-inc/atomic/issues/1431)).
|
|
32
|
+
- Fixed post-completion workflow follow-ups replaying the entire model-fallback chain from an unavailable primary instead of resuming on the model the stage settled on. After model fallback succeeded, the stage kept its working `session` but left `sessionPromise` undefined, and `ensureSession()` only checked `sessionPromise` — so a follow-up (`ctx.followUp`/`ctx.steer`/`ensureAttached`, and post-completion `workflow send`/TUI prompts) created a brand-new session from `candidates[0]` (the primary), discarding the working fallback session. For a chain whose primary 404s (e.g. `anthropic/claude-fable-5`), every follow-up re-ran `primary -> 404 -> ... -> working model` and could leave the stage stuck on the unavailable primary. `ensureSession()` now reuses an already-attached session, and `promptWithFallback()` retries the last-settled model first (for both live retained sessions and disk-reattached sessions), restarting the full chain from the primary only if that model fails again retryably ([#1431](https://github.com/bastani-inc/atomic/issues/1431)).
|
|
33
|
+
|
|
25
34
|
## [0.8.30] - 2026-06-17
|
|
26
35
|
|
|
27
36
|
### Changed
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Review-gate severity logic for the builtin `ralph` workflow.
|
|
3
|
+
*
|
|
4
|
+
* The bounded review loop must stop as soon as the patch is judged correct, even
|
|
5
|
+
* when a reviewer leaves a low-priority nit (or, occasionally, appends a
|
|
6
|
+
* placeholder finding because it wrongly believed an empty `findings` array would
|
|
7
|
+
* fail schema validation). Requiring a literally empty `findings` array made the
|
|
8
|
+
* loop iterate forever in those cases despite unanimous "patch is correct"
|
|
9
|
+
* verdicts.
|
|
10
|
+
*
|
|
11
|
+
* Approval is therefore severity-aware and deterministic. A single reviewer
|
|
12
|
+
* approves when it judged the patch correct, reported no `reviewer_error`, and
|
|
13
|
+
* filed no *blocking* finding:
|
|
14
|
+
*
|
|
15
|
+
* - Blocking = P0/P1/P2 (numeric priority 0, 1, or 2).
|
|
16
|
+
* - Non-blocking = P3 (numeric priority 3) — a nice-to-have that should not keep
|
|
17
|
+
* the loop spinning.
|
|
18
|
+
* - A finding whose priority cannot be determined (`null`/`undefined`) is treated
|
|
19
|
+
* as blocking, so genuine ambiguity never silently approves.
|
|
20
|
+
*
|
|
21
|
+
* The decision is computed from the structured findings rather than the
|
|
22
|
+
* reviewer's self-reported `stop_review_loop` boolean, so the gate does not
|
|
23
|
+
* depend on the model correctly deriving that flag.
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
export type ReviewFinding = {
|
|
27
|
+
readonly title: string;
|
|
28
|
+
readonly body: string;
|
|
29
|
+
readonly confidence_score: number;
|
|
30
|
+
readonly priority?: number | null;
|
|
31
|
+
readonly code_location: {
|
|
32
|
+
readonly absolute_file_path: string;
|
|
33
|
+
readonly line_range: {
|
|
34
|
+
readonly start: number;
|
|
35
|
+
readonly end: number;
|
|
36
|
+
};
|
|
37
|
+
};
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
export type ReviewerError = {
|
|
41
|
+
readonly kind:
|
|
42
|
+
| "validation_unavailable"
|
|
43
|
+
| "dependency_unavailable"
|
|
44
|
+
| "tool_failure"
|
|
45
|
+
| "reviewer_failure";
|
|
46
|
+
readonly message: string;
|
|
47
|
+
readonly attempted_recovery: string;
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
export type ReviewDecision = {
|
|
51
|
+
readonly findings: readonly ReviewFinding[];
|
|
52
|
+
readonly overall_correctness: "patch is correct" | "patch is incorrect";
|
|
53
|
+
readonly overall_explanation: string;
|
|
54
|
+
readonly overall_confidence_score: number;
|
|
55
|
+
readonly stop_review_loop: boolean;
|
|
56
|
+
readonly reviewer_error?: ReviewerError | null;
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Highest finding priority that still blocks approval. P0=0, P1=1, P2=2 block;
|
|
61
|
+
* P3=3 does not.
|
|
62
|
+
*/
|
|
63
|
+
export const MAX_BLOCKING_PRIORITY = 2;
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* True when a finding must keep the review loop iterating. P0/P1/P2 block; P3 is
|
|
67
|
+
* a non-blocking nice-to-have. A finding without a determinable priority
|
|
68
|
+
* (`null`/`undefined`) is treated as blocking so ambiguity never silently
|
|
69
|
+
* approves.
|
|
70
|
+
*/
|
|
71
|
+
export function isBlockingFinding(finding: ReviewFinding): boolean {
|
|
72
|
+
const priority = finding.priority;
|
|
73
|
+
if (priority === undefined || priority === null) return true;
|
|
74
|
+
return priority <= MAX_BLOCKING_PRIORITY;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* A single reviewer approves (would stop the loop) when it judged the patch
|
|
79
|
+
* correct, surfaced no reviewer execution error, and filed no blocking
|
|
80
|
+
* (P0/P1/P2) finding. P3 nice-to-haves and placeholder/dummy findings do not
|
|
81
|
+
* block approval.
|
|
82
|
+
*/
|
|
83
|
+
export function reviewDecisionApproved(decision: ReviewDecision): boolean {
|
|
84
|
+
return (
|
|
85
|
+
decision.overall_correctness === "patch is correct" &&
|
|
86
|
+
decision.reviewer_error == null &&
|
|
87
|
+
!decision.findings.some(isBlockingFinding)
|
|
88
|
+
);
|
|
89
|
+
}
|
|
@@ -18,6 +18,7 @@ import type {
|
|
|
18
18
|
WorkflowTaskResult,
|
|
19
19
|
} from "../src/shared/types.js";
|
|
20
20
|
import { E2E_VERIFICATION_GUIDANCE, WORKER_PREFLIGHT_CONTRACT } from "./shared-prompts.js";
|
|
21
|
+
import { reviewDecisionApproved, type ReviewDecision } from "./ralph-review-gate.js";
|
|
21
22
|
|
|
22
23
|
const DEFAULT_MAX_LOOPS = 10;
|
|
23
24
|
const DEFAULT_RESEARCH_DIR = "research";
|
|
@@ -25,44 +26,15 @@ const IMPLEMENTATION_NOTES_FILENAME = "implementation-notes.md";
|
|
|
25
26
|
const QA_E2E_VIDEO_FILENAME = "qa-e2e-evidence.webm";
|
|
26
27
|
const MAX_RESEARCH_SLUG_LENGTH = 80;
|
|
27
28
|
// Reviewer fan-out launches three independent reviewers; the loop stops only when
|
|
28
|
-
// all three reviewers independently approve
|
|
29
|
-
//
|
|
30
|
-
//
|
|
29
|
+
// all three reviewers independently approve. Approval is severity-aware: a
|
|
30
|
+
// reviewer approves when it judged the patch correct, reported no reviewer_error,
|
|
31
|
+
// and filed no *blocking* (P0/P1/P2) finding. P3 nice-to-haves no longer keep the
|
|
32
|
+
// loop iterating, so a single low-priority nit (or a placeholder finding) can no
|
|
33
|
+
// longer strand an otherwise-approved patch. Requiring unanimous approval still
|
|
34
|
+
// means a blocking finding from any one reviewer keeps the loop going. See
|
|
35
|
+
// ./ralph-review-gate.ts for the gate types and decision logic.
|
|
31
36
|
const REVIEWER_COUNT = 3;
|
|
32
37
|
|
|
33
|
-
type ReviewFinding = {
|
|
34
|
-
readonly title: string;
|
|
35
|
-
readonly body: string;
|
|
36
|
-
readonly confidence_score: number;
|
|
37
|
-
readonly priority?: number | null;
|
|
38
|
-
readonly code_location: {
|
|
39
|
-
readonly absolute_file_path: string;
|
|
40
|
-
readonly line_range: {
|
|
41
|
-
readonly start: number;
|
|
42
|
-
readonly end: number;
|
|
43
|
-
};
|
|
44
|
-
};
|
|
45
|
-
};
|
|
46
|
-
|
|
47
|
-
type ReviewerError = {
|
|
48
|
-
readonly kind:
|
|
49
|
-
| "validation_unavailable"
|
|
50
|
-
| "dependency_unavailable"
|
|
51
|
-
| "tool_failure"
|
|
52
|
-
| "reviewer_failure";
|
|
53
|
-
readonly message: string;
|
|
54
|
-
readonly attempted_recovery: string;
|
|
55
|
-
};
|
|
56
|
-
|
|
57
|
-
type ReviewDecision = {
|
|
58
|
-
readonly findings: readonly ReviewFinding[];
|
|
59
|
-
readonly overall_correctness: "patch is correct" | "patch is incorrect";
|
|
60
|
-
readonly overall_explanation: string;
|
|
61
|
-
readonly overall_confidence_score: number;
|
|
62
|
-
readonly stop_review_loop: boolean;
|
|
63
|
-
readonly reviewer_error?: ReviewerError | null;
|
|
64
|
-
};
|
|
65
|
-
|
|
66
38
|
const reviewFindingSchema = Type.Object(
|
|
67
39
|
{
|
|
68
40
|
title: Type.String(),
|
|
@@ -220,15 +192,6 @@ function reviewDecisionFromResult(result: WorkflowTaskResult): ReviewDecision |
|
|
|
220
192
|
return result.structured as ReviewDecision | undefined;
|
|
221
193
|
}
|
|
222
194
|
|
|
223
|
-
function reviewDecisionApproved(decision: ReviewDecision): boolean {
|
|
224
|
-
return (
|
|
225
|
-
decision.stop_review_loop === true &&
|
|
226
|
-
decision.overall_correctness === "patch is correct" &&
|
|
227
|
-
decision.findings.length === 0 &&
|
|
228
|
-
decision.reviewer_error == null
|
|
229
|
-
);
|
|
230
|
-
}
|
|
231
|
-
|
|
232
195
|
function reviewerErrorDecision(error: string): ReviewDecision {
|
|
233
196
|
return {
|
|
234
197
|
findings: [],
|
|
@@ -554,6 +517,7 @@ async function runRalphWorkflow(
|
|
|
554
517
|
model: "github-copilot/gemini-3.1-pro-preview (1m):high",
|
|
555
518
|
fallbackModels: [
|
|
556
519
|
"google/gemini-3.1-pro-preview:high",
|
|
520
|
+
"google-vertex/gemini-3.1-pro-preview:high",
|
|
557
521
|
"openai-codex/gpt-5.5:xhigh",
|
|
558
522
|
"github-copilot/gpt-5.5:xhigh",
|
|
559
523
|
"openai/gpt-5.5:xhigh",
|
|
@@ -784,14 +748,14 @@ async function runRalphWorkflow(
|
|
|
784
748
|
"Speculation is insufficient: identify the code path, scenario, environment, or input that is provably affected.",
|
|
785
749
|
"Do not flag intentional behavior changes as bugs unless they clearly violate the task or documented contract.",
|
|
786
750
|
"Ignore trivial style unless it obscures meaning or violates documented standards in a way that affects correctness/security/maintainability.",
|
|
787
|
-
"If no finding clears this bar, return an empty findings array, mark the patch correct, and set stop_review_loop true.",
|
|
751
|
+
"If no finding clears this bar, return an empty findings array, mark the patch correct, and set stop_review_loop true. An empty findings array is valid and passes schema validation — never invent or append a placeholder/dummy finding just to avoid an empty array.",
|
|
788
752
|
].join("\n"),
|
|
789
753
|
],
|
|
790
754
|
[
|
|
791
755
|
"comment_guidelines",
|
|
792
756
|
[
|
|
793
757
|
"Each finding title must start with a priority tag: [P0] drop-everything blocker, [P1] urgent next-cycle fix, [P2] normal fix, [P3] low-priority nice-to-have.",
|
|
794
|
-
"Also include numeric priority: 0 for P0, 1 for P1, 2 for P2, 3 for P3; use null only if priority genuinely cannot be determined.",
|
|
758
|
+
"Also include numeric priority: 0 for P0, 1 for P1, 2 for P2, 3 for P3; use null only if priority genuinely cannot be determined. Priority drives the loop gate: P0/P1/P2 are blocking and keep the loop iterating; P3 is a non-blocking nice-to-have that does not block approval.",
|
|
795
759
|
"The body must be one concise paragraph explaining why this is a bug and the exact scenario, environment, or inputs required for it to arise.",
|
|
796
760
|
"Use a matter-of-fact, non-accusatory tone. Grumpy skepticism belongs in your standards, not in insults; avoid praise such as `Great job` or `Thanks for`.",
|
|
797
761
|
"Keep code_location ranges as short as possible, ideally one line and never longer than 5-10 lines unless unavoidable.",
|
|
@@ -804,7 +768,7 @@ async function runRalphWorkflow(
|
|
|
804
768
|
"how_many_findings",
|
|
805
769
|
[
|
|
806
770
|
"Return all findings the original author would definitely want to fix.",
|
|
807
|
-
"If no such findings exist, return an empty findings array and mark the patch correct.",
|
|
771
|
+
"If no such findings exist, return an empty findings array and mark the patch correct. Do not pad the array with placeholder or speculative findings.",
|
|
808
772
|
"Do not stop after the first qualifying finding; continue until every qualifying finding is listed.",
|
|
809
773
|
].join("\n"),
|
|
810
774
|
],
|
|
@@ -835,7 +799,7 @@ async function runRalphWorkflow(
|
|
|
835
799
|
[
|
|
836
800
|
"decision_rules",
|
|
837
801
|
[
|
|
838
|
-
"Set stop_review_loop=true
|
|
802
|
+
"Set stop_review_loop=true when the patch is correct, reviewer_error is null/omitted, and there are no blocking (P0/P1/P2) findings; remaining P3 nice-to-haves do not block approval. The loop gate is computed from finding priorities, so an unresolved P0/P1/P2 keeps the loop going regardless of this flag.",
|
|
839
803
|
"If you hit a reviewer/tool/validation error, set stop_review_loop=false and populate reviewer_error instead of pretending the patch is approved.",
|
|
840
804
|
].join("\n"),
|
|
841
805
|
],
|
|
@@ -907,8 +871,9 @@ async function runRalphWorkflow(
|
|
|
907
871
|
).length;
|
|
908
872
|
// Require unanimous approval: every reviewer must have run and independently
|
|
909
873
|
// approved. A fan-out error that collapses to a single error entry (fewer than
|
|
910
|
-
// REVIEWER_COUNT reviews) or any reviewer surfacing a
|
|
911
|
-
// iterating rather than letting a majority paper over
|
|
874
|
+
// REVIEWER_COUNT reviews) or any reviewer surfacing a blocking (P0/P1/P2)
|
|
875
|
+
// finding keeps the loop iterating rather than letting a majority paper over
|
|
876
|
+
// outstanding issues. P3 nice-to-haves do not block approval.
|
|
912
877
|
approved =
|
|
913
878
|
reviewEntries.length === REVIEWER_COUNT &&
|
|
914
879
|
approvalCount === REVIEWER_COUNT;
|
|
@@ -69,6 +69,8 @@ export interface DispatcherOpts {
|
|
|
69
69
|
policy?: WorkflowExecutionPolicy;
|
|
70
70
|
/** Invocation cwd used for workflow execution. */
|
|
71
71
|
cwd?: string;
|
|
72
|
+
/** Host-resolved non-default session directory inherited by stages without explicit sessionDir. */
|
|
73
|
+
defaultSessionDir?: string;
|
|
72
74
|
}
|
|
73
75
|
|
|
74
76
|
// ---------------------------------------------------------------------------
|
|
@@ -173,6 +175,7 @@ export async function dispatch(
|
|
|
173
175
|
models: opts.models,
|
|
174
176
|
executionMode: policy.mode,
|
|
175
177
|
cwd: opts.cwd,
|
|
178
|
+
defaultSessionDir: opts.defaultSessionDir,
|
|
176
179
|
});
|
|
177
180
|
if (policy.awaitTerminalRun === true) {
|
|
178
181
|
const tracker = opts.jobs ?? defaultJobTracker;
|
|
@@ -2562,6 +2562,17 @@ function factory(pi: ExtensionAPI): void {
|
|
|
2562
2562
|
: undefined,
|
|
2563
2563
|
parentSession: () => intercomParentSession ?? undefined,
|
|
2564
2564
|
};
|
|
2565
|
+
const hostStageSessionDir: { current: string | undefined } = { current: undefined };
|
|
2566
|
+
const resolveDefaultStageSessionDir = (): string | undefined => hostStageSessionDir.current;
|
|
2567
|
+
const updateHostStageSessionDir = (sessionManager: SessionManager | undefined): void => {
|
|
2568
|
+
try {
|
|
2569
|
+
hostStageSessionDir.current = sessionManager?.usesDefaultSessionDir?.() === false
|
|
2570
|
+
? sessionManager.getSessionDir?.()
|
|
2571
|
+
: undefined;
|
|
2572
|
+
} catch {
|
|
2573
|
+
hostStageSessionDir.current = undefined;
|
|
2574
|
+
}
|
|
2575
|
+
};
|
|
2565
2576
|
|
|
2566
2577
|
const startupDiscovery = discoverStartupWorkflowsSync();
|
|
2567
2578
|
const runtimeRef: { current: ExtensionRuntime } = {
|
|
@@ -2574,6 +2585,7 @@ function factory(pi: ExtensionAPI): void {
|
|
|
2574
2585
|
mcp: mcpPort,
|
|
2575
2586
|
intercom: intercomPort,
|
|
2576
2587
|
config: runtimeConfigRef.current,
|
|
2588
|
+
resolveDefaultStageSessionDir,
|
|
2577
2589
|
}),
|
|
2578
2590
|
};
|
|
2579
2591
|
const discoveryRef: { current: DiscoveryResult | null } = { current: null };
|
|
@@ -2641,6 +2653,7 @@ function factory(pi: ExtensionAPI): void {
|
|
|
2641
2653
|
intercom: intercomPort,
|
|
2642
2654
|
config: runtimeConfigRef.current,
|
|
2643
2655
|
models,
|
|
2656
|
+
resolveDefaultStageSessionDir,
|
|
2644
2657
|
});
|
|
2645
2658
|
}
|
|
2646
2659
|
|
|
@@ -2735,6 +2748,7 @@ function factory(pi: ExtensionAPI): void {
|
|
|
2735
2748
|
mcp: mcpPort,
|
|
2736
2749
|
intercom: intercomPort,
|
|
2737
2750
|
config: runtimeConfigRef.current,
|
|
2751
|
+
resolveDefaultStageSessionDir,
|
|
2738
2752
|
});
|
|
2739
2753
|
}
|
|
2740
2754
|
|
|
@@ -4025,6 +4039,7 @@ function factory(pi: ExtensionAPI): void {
|
|
|
4025
4039
|
}
|
|
4026
4040
|
|
|
4027
4041
|
const sessionManager = ctx?.sessionManager ?? pi.sessionManager;
|
|
4042
|
+
updateHostStageSessionDir(sessionManager);
|
|
4028
4043
|
if (sessionManager) {
|
|
4029
4044
|
const cfg = configLoadRef.current?.config;
|
|
4030
4045
|
withWorkflowLifecycleNotificationsSuppressed(
|
|
@@ -86,6 +86,8 @@ export interface ExtensionRuntimeOpts {
|
|
|
86
86
|
jobs?: JobTracker;
|
|
87
87
|
/** Invocation cwd used for workflow execution. Defaults to process.cwd(). */
|
|
88
88
|
cwd?: string;
|
|
89
|
+
/** Resolve the host's non-default session directory for workflow stage transcripts. */
|
|
90
|
+
resolveDefaultStageSessionDir?: () => string | undefined;
|
|
89
91
|
}
|
|
90
92
|
|
|
91
93
|
// ---------------------------------------------------------------------------
|
|
@@ -149,6 +151,7 @@ export function createExtensionRuntime(opts: ExtensionRuntimeOpts = {}): Extensi
|
|
|
149
151
|
const models = opts.models;
|
|
150
152
|
const jobs = opts.jobs;
|
|
151
153
|
const runtimeCwd = opts.cwd ?? process.cwd();
|
|
154
|
+
const resolveDefaultStageSessionDir = opts.resolveDefaultStageSessionDir;
|
|
152
155
|
|
|
153
156
|
function runOptions(args: WorkflowToolArgs, policy?: WorkflowExecutionPolicy): RunOpts {
|
|
154
157
|
const argConcurrency =
|
|
@@ -166,6 +169,7 @@ export function createExtensionRuntime(opts: ExtensionRuntimeOpts = {}): Extensi
|
|
|
166
169
|
...(config?.statusFilePath !== undefined ? { statusFilePath: config.statusFilePath } : {}),
|
|
167
170
|
resumeInFlight: config?.resumeInFlight ?? "ask",
|
|
168
171
|
};
|
|
172
|
+
const defaultSessionDir = resolveDefaultStageSessionDir?.();
|
|
169
173
|
return {
|
|
170
174
|
adapters,
|
|
171
175
|
store: activeStore,
|
|
@@ -174,6 +178,7 @@ export function createExtensionRuntime(opts: ExtensionRuntimeOpts = {}): Extensi
|
|
|
174
178
|
mcp,
|
|
175
179
|
config: effectiveConfig,
|
|
176
180
|
models,
|
|
181
|
+
...(defaultSessionDir !== undefined ? { defaultSessionDir } : {}),
|
|
177
182
|
...(policy !== undefined ? { executionMode: policy.mode } : {}),
|
|
178
183
|
registry,
|
|
179
184
|
cwd: runtimeCwd,
|
|
@@ -510,6 +515,7 @@ export function createExtensionRuntime(opts: ExtensionRuntimeOpts = {}): Extensi
|
|
|
510
515
|
},
|
|
511
516
|
|
|
512
517
|
dispatch(args: WorkflowToolArgs, options?: RuntimeDispatchOptions): Promise<WorkflowToolResult> {
|
|
518
|
+
const defaultSessionDir = resolveDefaultStageSessionDir?.();
|
|
513
519
|
return dispatch(args, {
|
|
514
520
|
registry,
|
|
515
521
|
adapters,
|
|
@@ -522,6 +528,7 @@ export function createExtensionRuntime(opts: ExtensionRuntimeOpts = {}): Extensi
|
|
|
522
528
|
models,
|
|
523
529
|
policy: options?.policy,
|
|
524
530
|
cwd: runtimeCwd,
|
|
531
|
+
...(defaultSessionDir !== undefined ? { defaultSessionDir } : {}),
|
|
525
532
|
});
|
|
526
533
|
},
|
|
527
534
|
|