npm - @bastani/atomic - Versions diffs - 0.9.1-alpha.1 → 0.9.2-alpha.1 - Mend

@bastani/atomic 0.9.1-alpha.1 → 0.9.2-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/CHANGELOG.md +23 -0
package/dist/builtin/cursor/CHANGELOG.md +12 -0
package/dist/builtin/cursor/package.json +2 -2
package/dist/builtin/intercom/CHANGELOG.md +12 -0
package/dist/builtin/intercom/package.json +2 -2
package/dist/builtin/mcp/CHANGELOG.md +12 -0
package/dist/builtin/mcp/package.json +3 -3
package/dist/builtin/subagents/CHANGELOG.md +12 -0
package/dist/builtin/subagents/package.json +4 -4
package/dist/builtin/web-access/CHANGELOG.md +12 -0
package/dist/builtin/web-access/package.json +2 -2
package/dist/builtin/workflows/CHANGELOG.md +18 -0
package/dist/builtin/workflows/README.md +2 -2
package/dist/builtin/workflows/builtin/goal-ledger.ts +0 -2
package/dist/builtin/workflows/builtin/goal-prompts.ts +10 -4
package/dist/builtin/workflows/builtin/goal-reports.ts +0 -5
package/dist/builtin/workflows/builtin/goal-runner.ts +8 -11
package/dist/builtin/workflows/builtin/goal-types.ts +0 -2
package/dist/builtin/workflows/builtin/goal.d.ts +0 -1
package/dist/builtin/workflows/builtin/goal.ts +1 -2
package/dist/builtin/workflows/builtin/index.d.ts +0 -1
package/dist/builtin/workflows/builtin/ralph-core.ts +1 -3
package/dist/builtin/workflows/builtin/ralph-models.ts +22 -19
package/dist/builtin/workflows/builtin/ralph-runner.ts +17 -14
package/dist/builtin/workflows/builtin/ralph.d.ts +0 -2
package/dist/builtin/workflows/builtin/ralph.ts +1 -3
package/dist/builtin/workflows/builtin/shared-prompts.ts +15 -0
package/dist/builtin/workflows/package.json +2 -2
package/dist/core/copilot-gemini-tool-arguments.d.ts.map +1 -1
package/dist/core/copilot-gemini-tool-arguments.js +41 -3
package/dist/core/copilot-gemini-tool-arguments.js.map +1 -1
package/docs/workflows.md +6 -9
package/package.json +30 -30
package/dist/builtin/workflows/builtin/prompt-refinement.ts +0 -90

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,29 @@
 ## [Unreleased]
+## [0.9.2-alpha.1] - 2026-06-23
+### Changed
+- Removed the initial `prompt-refinement` stage and shared prompt-refinement helper from the bundled `goal` and `ralph` workflows so both now use the raw objective/prompt as the operative task text for their first downstream stages; the now-obsolete refined/original trace outputs were also removed.
+- Updated bundled `goal` and `ralph` reviewer prompts to inspect referenced QA end-to-end video evidence before treating it as proof of user-visible behavior.
+- Synced bundled upstream Pi package dependencies to `^0.79.10` across Atomic's CLI and extension peer manifests, and aligned shared coding-agent direct runtime/dev dependency pins with upstream Pi v0.79.10.
+- Raised the published Node.js engine floor to `>=22.19.0` to match direct runtime dependency requirements, including `undici@8.5.0`.
+### Fixed
+- Fixed GitHub Copilot Gemini tool-call normalization to synthesize omitted required empty array properties before validation, preventing Ralph reviewer structured output such as `findings: []` from failing when CAPI drops the empty array from the tool call.
+## [0.9.1] - 2026-06-23
+### Changed
+- Changed the bundled `goal`/`ralph` workflow prompt-refinement stage to use a workflow-neutral, model-only rubric prompt that returns only the refined objective instead of invoking the `prompt-engineer` skill directly.
+### Fixed
+- Fixed the bundled `ralph` workflow reviewer-c model configuration to use Gemini 3.1 Pro as the third reviewer with Gemini 3.1 provider fallbacks, removing Gemini 3.5 Flash from that slot's fallback chain ([#1484](https://github.com/bastani-inc/atomic/issues/1484)).
 ## [0.9.1-alpha.1] - 2026-06-22
 ### Changed

package/dist/builtin/cursor/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,18 @@
 ## [Unreleased]
+## [0.9.2-alpha.1] - 2026-06-23
+### Changed
+- Published a synchronized Atomic 0.9.2-alpha.1 prerelease for the Cursor provider package; no functional Cursor provider changes were made after 0.9.1.
+## [0.9.1] - 2026-06-23
+### Changed
+- Published the stable Atomic 0.9.1 release for the Cursor provider package; no functional Cursor provider changes were made after 0.9.0.
 ## [0.9.1-alpha.1] - 2026-06-22
 ### Changed

package/dist/builtin/cursor/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@bastani/cursor",
-  "version": "0.9.1-alpha.1",
+  "version": "0.9.2-alpha.1",
   "private": true,
   "description": "Experimental first-party Atomic extension for Cursor OAuth, model discovery, and streaming provider registration.",
   "contributors": [
@@ -40,7 +40,7 @@
     }
   },
   "dependencies": {
-    "@bastani/atomic-natives": "0.9.1-alpha.1",
+    "@bastani/atomic-natives": "0.9.2-alpha.1",
     "@bufbuild/protobuf": "^2.0.0"
   }
 }

package/dist/builtin/intercom/CHANGELOG.md CHANGED Viewed

@@ -4,6 +4,18 @@ All notable changes to the `pi-intercom` extension will be documented in this fi
 ## [Unreleased]
+## [0.9.2-alpha.1] - 2026-06-23
+### Changed
+- Aligned the intercom extension peer dependency with upstream pi TUI `^0.79.10`; no intercom extension source changes were needed for this metadata sync.
+## [0.9.1] - 2026-06-23
+### Changed
+- Published the stable Atomic 0.9.1 release for the intercom extension; no functional intercom changes were made after 0.9.0.
 ## [0.9.1-alpha.1] - 2026-06-22
 ### Changed

package/dist/builtin/intercom/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@bastani/intercom",
-  "version": "0.9.1-alpha.1",
+  "version": "0.9.2-alpha.1",
   "private": true,
   "description": "Atomic extension providing a private coordination channel between parent and child agent sessions. Fork of: https://github.com/nicobailon/pi-intercom",
   "contributors": [
@@ -39,7 +39,7 @@
   },
   "peerDependencies": {
     "@bastani/atomic": "*",
-    "@earendil-works/pi-tui": "^0.79.9"
+    "@earendil-works/pi-tui": "^0.79.10"
   },
   "peerDependenciesMeta": {
     "@bastani/atomic": {

package/dist/builtin/mcp/CHANGELOG.md CHANGED Viewed

@@ -7,6 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+## [0.9.2-alpha.1] - 2026-06-23
+### Changed
+- Aligned the MCP extension peer dependencies with upstream pi AI/TUI `^0.79.10`; no MCP extension source changes were needed for this metadata sync.
+## [0.9.1] - 2026-06-23
+### Changed
+- Published the stable Atomic 0.9.1 release for the MCP extension; no functional MCP changes were made after 0.9.0.
 ## [0.9.1-alpha.1] - 2026-06-22
 ### Changed

package/dist/builtin/mcp/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@bastani/mcp",
-  "version": "0.9.1-alpha.1",
+  "version": "0.9.2-alpha.1",
   "private": true,
   "description": "Atomic extension that adapts MCP (Model Context Protocol) servers into the coding agent. Fork of: https://github.com/nicobailon/pi-mcp-adapter",
   "contributors": [
@@ -32,8 +32,8 @@
   },
   "peerDependencies": {
     "@bastani/atomic": "*",
-    "@earendil-works/pi-ai": "^0.79.9",
-    "@earendil-works/pi-tui": "^0.79.9",
+    "@earendil-works/pi-ai": "^0.79.10",
+    "@earendil-works/pi-tui": "^0.79.10",
     "zod": "^3.25.0 || ^4.0.0"
   },
   "peerDependenciesMeta": {

package/dist/builtin/subagents/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,18 @@
 ## [Unreleased]
+## [0.9.2-alpha.1] - 2026-06-23
+### Changed
+- Aligned the subagents extension peer dependencies with upstream pi `^0.79.10` runtime packages (`@earendil-works/pi-agent-core`, `@earendil-works/pi-ai`, and `@earendil-works/pi-tui`); no subagents extension source changes were needed for this metadata sync.
+## [0.9.1] - 2026-06-23
+### Changed
+- Published the stable Atomic 0.9.1 release for the subagents extension; no functional subagents changes were made after 0.9.0.
 ## [0.9.1-alpha.1] - 2026-06-22
 ### Changed

package/dist/builtin/subagents/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@bastani/subagents",
-  "version": "0.9.1-alpha.1",
+  "version": "0.9.2-alpha.1",
   "private": true,
   "description": "Atomic extension for delegating tasks to subagents with chains, parallel execution, and TUI clarification. Fork of: https://github.com/nicobailon/pi-subagents",
   "contributors": [
@@ -38,9 +38,9 @@
   },
   "peerDependencies": {
     "@bastani/atomic": "*",
-    "@earendil-works/pi-agent-core": "^0.79.9",
-    "@earendil-works/pi-ai": "^0.79.9",
-    "@earendil-works/pi-tui": "^0.79.9"
+    "@earendil-works/pi-agent-core": "^0.79.10",
+    "@earendil-works/pi-ai": "^0.79.10",
+    "@earendil-works/pi-tui": "^0.79.10"
   },
   "peerDependenciesMeta": {
     "@bastani/atomic": {

package/dist/builtin/web-access/CHANGELOG.md CHANGED Viewed

@@ -4,6 +4,18 @@ All notable changes to this project will be documented in this file.
 ## [Unreleased]
+## [0.9.2-alpha.1] - 2026-06-23
+### Changed
+- Aligned the web-access extension peer dependency with upstream pi TUI `^0.79.10`; no web-access extension source changes were needed for this metadata sync.
+## [0.9.1] - 2026-06-23
+### Changed
+- Published the stable Atomic 0.9.1 release for the web-access extension; no functional web-access changes were made after 0.9.0.
 ## [0.9.1-alpha.1] - 2026-06-22
 ### Changed

package/dist/builtin/web-access/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@bastani/web-access",
-  "version": "0.9.1-alpha.1",
+  "version": "0.9.2-alpha.1",
   "private": true,
   "description": "Atomic extension for web search, URL fetching, GitHub repo cloning, PDF/video extraction. Fork of: https://github.com/nicobailon/pi-web-access",
   "contributors": [
@@ -31,7 +31,7 @@
   },
   "peerDependencies": {
     "@bastani/atomic": "*",
-    "@earendil-works/pi-tui": "^0.79.9"
+    "@earendil-works/pi-tui": "^0.79.10"
   },
   "peerDependenciesMeta": {
     "@bastani/atomic": {

package/dist/builtin/workflows/CHANGELOG.md CHANGED Viewed

@@ -6,6 +6,24 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 ## [Unreleased]
+## [0.9.2-alpha.1] - 2026-06-23
+### Changed
+- Removed the initial `prompt-refinement` stage and shared prompt-refinement helper from the builtin `goal` and `ralph` workflows so both now use the raw objective/prompt as the operative task text for their first downstream stages; the now-obsolete refined/original trace outputs were also removed.
+- Updated builtin `goal` and `ralph` reviewer prompts to inspect referenced QA end-to-end video evidence before treating it as proof of user-visible behavior.
+- Aligned the workflows package peer dependency with upstream pi TUI `^0.79.10`; no workflow source changes were needed for this metadata sync.
+## [0.9.1] - 2026-06-23
+### Changed
+- Changed the shared `goal`/`ralph` prompt-refinement stage to use a workflow-neutral, model-only rubric prompt that returns only the refined objective instead of invoking the `prompt-engineer` skill directly.
+### Fixed
+- Fixed the builtin `ralph` reviewer-c model configuration to use Gemini 3.1 Pro as the third reviewer with Gemini 3.1 provider fallbacks, removing Gemini 3.5 Flash from that slot's fallback chain ([#1484](https://github.com/bastani-inc/atomic/issues/1484)).
 ## [0.9.1-alpha.1] - 2026-06-22
 ### Changed

package/dist/builtin/workflows/README.md CHANGED Viewed

@@ -658,7 +658,7 @@ Child workflow outputs: `result`, `findings`, `research_doc_path`, `artifact_dir
 ### `goal`
-Goal Runner workflow: initialize a persisted goal ledger with a per-run goal id and lifecycle events, render goal-continuation context, run bounded worker LM turns, append receipts, run three independent reviewers, let a TypeScript reducer decide `complete`, `continue`, `blocked`, or `needs_human`, and optionally run a final-stage PR handoff after approval. Workers and reviewers are prompted to verify user-visible behavior end-to-end when practical with `playwright-cli`-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios. Token budget behavior is intentionally excluded. Goal skips PR creation by default; prompt text alone does not opt in. Pass `create_pr=true` to authorize only the final `pull-request` stage to inspect provider credentials and attempt provider-appropriate PR/MR/review creation after Goal reaches `complete` within the turn budget.
+Goal Runner workflow: initialize a persisted goal ledger with a per-run goal id and lifecycle events, render goal-continuation context, run bounded worker LM turns, append receipts, run three independent reviewers, let a TypeScript reducer decide `complete`, `continue`, `blocked`, or `needs_human`, and optionally run a final-stage PR handoff after approval. Workers and reviewers are prompted to verify user-visible behavior end-to-end when practical with `playwright-cli`-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios. Reviewers also look for any QA E2E video referenced by the ledger or receipt and inspect the actual video before treating it as proof. Token budget behavior is intentionally excluded. Goal skips PR creation by default; prompt text alone does not opt in. Pass `create_pr=true` to authorize only the final `pull-request` stage to inspect provider credentials and attempt provider-appropriate PR/MR/review creation after Goal reaches `complete` within the turn budget.
 ```text
 /workflow goal objective="Migrate the database layer to Drizzle ORM" base_branch=develop
@@ -678,7 +678,7 @@ Child workflow outputs: `result`, `status`, `approved`, `goal_id`, `objective`,
 ### `ralph`
-Prompt-refinement → prompt-engineering research → orchestrate → review workflow with optional final-stage PR handoff: first sharpen the raw prompt into a clearer objective, then transform it into a codebase and online research question with `/skill:prompt-engineer`, run `/skill:research-codebase` against it, write findings under `research/`, delegate implementation through sub-agents from that research, run parallel reviewers across Claude Fable 5, GPT-5.5 Codex, and Gemini 3.1 Pro model families, and iterate until approval or the loop limit. Ralph's orchestrator and reviewers are prompted to verify user-visible behavior end-to-end when practical with `playwright-cli`-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios. For UI-applicable or full-stack changes, the orchestrator runs a `playwright-cli` end-to-end QA pass and records a reviewable proof video, references it in the implementation notes, and exposes it as the `qa_video_path` output; when `create_pr=true`, the final `pull-request` stage attaches or links that video to the created PR/MR/review. Follow-up iterations pass unresolved review artifacts into prompt-engineering/research and fork research from prior research session data when available. Ralph skips PR creation by default; prompt text alone does not opt in. Pass `create_pr=true` to authorize only the final `pull-request` stage to inspect provider credentials and attempt provider-appropriate PR/MR/review creation (for example GitHub `gh`, Azure Repos `az repos pr create`, or Sapling/Phabricator tooling). Ralph's own PR-creation instructions live in that final stage. Reviewers inspect repository infrastructure directly as needed; Ralph no longer runs separate `infra-*` discovery stages.
+Raw prompt → prompt-engineering research → orchestrate → review workflow with optional final-stage PR handoff: use the raw prompt as the operative objective, transform it into a codebase and online research question with `/skill:prompt-engineer`, run `/skill:research-codebase` against it, write findings under `research/`, delegate implementation through sub-agents from that research, run parallel reviewers across Claude Fable 5, GPT-5.5 Codex, and Gemini 3.1 Pro model families, and iterate until approval or the loop limit. Ralph's orchestrator and reviewers are prompted to verify user-visible behavior end-to-end when practical with `playwright-cli`-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios. For UI-applicable or full-stack changes, the orchestrator runs a `playwright-cli` end-to-end QA pass and records a reviewable proof video, references it in the implementation notes, and exposes it as the `qa_video_path` output; reviewers receive that path and inspect the actual video before treating it as proof. When `create_pr=true`, the final `pull-request` stage attaches or links that video to the created PR/MR/review. Follow-up iterations pass unresolved review artifacts into prompt-engineering/research and fork research from prior research session data when available. Ralph skips PR creation by default; prompt text alone does not opt in. Pass `create_pr=true` to authorize only the final `pull-request` stage to inspect provider credentials and attempt provider-appropriate PR/MR/review creation (for example GitHub `gh`, Azure Repos `az repos pr create`, or Sapling/Phabricator tooling). Ralph's own PR-creation instructions live in that final stage. Reviewers inspect repository infrastructure directly as needed; Ralph no longer runs separate `infra-*` discovery stages.
 ```text
 /workflow ralph prompt="Migrate the database layer to Drizzle ORM" max_loops=3 base_branch=develop

package/dist/builtin/workflows/builtin/goal-ledger.ts CHANGED Viewed

@@ -21,14 +21,12 @@ export function appendLifecycleEvent(
 export async function createGoalLedger(
   objective: string,
-  originalObjective?: string,
 ): Promise<{ ledger: GoalLedger; ledgerPath: string; artifactDir: string }> {
   const artifactDir = await mkdtemp(join(tmpdir(), "atomic-goal-runner-"));
   const now = new Date().toISOString();
   const ledger: GoalLedger = {
     goal_id: randomUUID(),
     objective,
-    ...(originalObjective === undefined || originalObjective === objective ? {} : { original_objective: originalObjective }),
     status: "active",
     turns: 0,
     created_at: now,

package/dist/builtin/workflows/builtin/goal-prompts.ts CHANGED Viewed

@@ -1,4 +1,8 @@
-import { E2E_VERIFICATION_GUIDANCE, WORKER_PREFLIGHT_CONTRACT } from "./shared-prompts.js";
+import {
+  E2E_VERIFICATION_GUIDANCE,
+  WORKER_PREFLIGHT_CONTRACT,
+  renderE2eQaVideoReviewGuidance,
+} from "./shared-prompts.js";
 import type { GoalLedger } from "./goal-types.js";
 export { WORKER_PREFLIGHT_CONTRACT };
@@ -241,6 +245,7 @@ export function renderReviewerPrompt(args: {
     ["pr_handoff_policy", INTERMEDIATE_PR_HANDOFF_GUARDRAIL],
     ["auditability", RECEIPT_EXPECTATIONS],
     ["e2e_verification", E2E_VERIFICATION_GUIDANCE],
+    ["qa_e2e_video_review", renderE2eQaVideoReviewGuidance()],
     [
       "goal_context",
       [
@@ -333,9 +338,10 @@ export function renderReviewerPrompt(args: {
         "1. Identify the changed files or diff under review.",
         "2. Read the relevant changed code and directly affected call sites/tests/configs.",
         "3. Read the goal ledger and worker receipt, then map receipts to the inferred verification oracle and original owner outcome.",
-        "4. Run or delegate focused validation when needed to resolve uncertainty.",
-        "5. Decide whether the receipt/evidence map proves completion; if evidence is uncertain, indirect, stale, missing, or narrower than the requested outcome, set goal_oracle_satisfied=false and stop_review_loop=false.",
-        "6. If you cannot inspect receipts or validate enough to approve safely, populate reviewer_error and set stop_review_loop=false.",
+        "4. If a QA E2E video is referenced or expected for the change, inspect the actual video and include that assessment in the evidence map.",
+        "5. Run or delegate focused validation when needed to resolve uncertainty.",
+        "6. Decide whether the receipt/evidence map proves completion; if evidence is uncertain, indirect, stale, missing, or narrower than the requested outcome, set goal_oracle_satisfied=false and stop_review_loop=false.",
+        "7. If you cannot inspect receipts, video evidence, or validate enough to approve safely, populate reviewer_error and set stop_review_loop=false.",
       ].join("\n"),
     ],
     [

package/dist/builtin/workflows/builtin/goal-reports.ts CHANGED Viewed

@@ -35,11 +35,6 @@ export function renderFinalReport(
     "## Objective",
     ledger.objective,
     "",
-    ...(ledger.original_objective === undefined ? [] : [
-      "## Original objective (before prompt refinement)",
-      ledger.original_objective,
-      "",
-    ]),
     "## Final status",
     ledger.status,
     "",

package/dist/builtin/workflows/builtin/goal-runner.ts CHANGED Viewed

@@ -30,8 +30,6 @@ import {
   renderReviewerPrompt,
   taggedPrompt,
 } from "./goal-prompts.js";
-import { promptEngineerModelConfig } from "./ralph-models.js";
-import { runPromptRefinementStage } from "./prompt-refinement.js";
 function positiveInteger(value: number | undefined, fallback: number): number {
   if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) {
@@ -86,13 +84,13 @@ export async function runGoalWorkflow(ctx: GoalRunnerContext, options: GoalWorkf
     if (!rawObjective) {
       throw new Error("goal requires an objective input.");
     }
-    const objective = await runPromptRefinementStage(ctx, { request: rawObjective, modelConfig: promptEngineerModelConfig });
+    const objective = rawObjective;
     const maxTurns = positiveInteger(inputs.max_turns, DEFAULT_MAX_TURNS);
     const reviewQuorum = DEFAULT_REVIEW_QUORUM;
     const blockerThreshold = Math.min(DEFAULT_BLOCKER_THRESHOLD, maxTurns);
     const comparisonBaseBranch = normalizeBranchInput(inputs.base_branch, "origin/main");
-    const { ledger, ledgerPath, artifactDir } = await createGoalLedger(objective, rawObjective);
+    const { ledger, ledgerPath, artifactDir } = await createGoalLedger(objective);
     const workerModelConfig = {
       model: "openai-codex/gpt-5.5:medium",
@@ -103,12 +101,12 @@ export async function runGoalWorkflow(ctx: GoalRunnerContext, options: GoalWorkf
           "anthropic/claude-opus-4-8:medium",
           "zai/glm-5.2:medium",
           "zai-coding-cn/glm-5.2:medium",
-          "github-copilot/gemini-3.1-pro-preview (1m):medium",
-          "google/gemini-3.1-pro-preview:medium",
-          "google-vertex/gemini-3.1-pro-preview:medium",
           "github-copilot/gemini-3.5-flash (1m):medium",
           "google/gemini-3.5-flash:medium",
           "google-vertex/gemini-3.5-flash:medium",
+          "github-copilot/gemini-3.1-pro-preview (1m):medium",
+          "google/gemini-3.1-pro-preview:medium",
+          "google-vertex/gemini-3.1-pro-preview:medium"
       ],
       tools: goalRunnerTools,
     };
@@ -123,12 +121,12 @@ export async function runGoalWorkflow(ctx: GoalRunnerContext, options: GoalWorkf
           "anthropic/claude-opus-4-8:xhigh",
           "zai/glm-5.2:xhigh",
           "zai-coding-cn/glm-5.2:xhigh",
-          "github-copilot/gemini-3.1-pro-preview (1m):high",
-          "google/gemini-3.1-pro-preview:high",
-          "google-vertex/gemini-3.1-pro-preview:high",
           "github-copilot/gemini-3.5-flash (1m):high",
           "google/gemini-3.5-flash:high",
           "google-vertex/gemini-3.5-flash:high",
+          "github-copilot/gemini-3.1-pro-preview (1m):high",
+          "google/gemini-3.1-pro-preview:high",
+          "google-vertex/gemini-3.1-pro-preview:high"
       ],
       tools: goalRunnerTools,
       schema: reviewDecisionSchema,
@@ -425,7 +423,6 @@ export async function runGoalWorkflow(ctx: GoalRunnerContext, options: GoalWorkf
       approved: ledger.status === "complete",
       goal_id: ledger.goal_id,
       objective: ledger.objective,
-      ...(ledger.original_objective === undefined ? {} : { original_objective: ledger.original_objective }),
       ledger_path: ledgerPath,
       turns_completed: ledger.turns,
       iterations_completed: ledger.turns,

package/dist/builtin/workflows/builtin/goal-types.ts CHANGED Viewed

@@ -93,7 +93,6 @@ export type GoalLifecycleEvent = {
 export type GoalLedger = {
   readonly goal_id: string;
   readonly objective: string;
-  readonly original_objective?: string;
   status: GoalStatus;
   turns: number;
   readonly created_at: string;
@@ -124,7 +123,6 @@ export type GoalWorkflowOutputs = {
   readonly approved?: boolean;
   readonly goal_id?: string;
   readonly objective?: string;
-  readonly original_objective?: string;
   readonly ledger_path?: string;
   readonly turns_completed?: number;
   readonly iterations_completed?: number;

package/dist/builtin/workflows/builtin/goal.d.ts CHANGED Viewed

@@ -29,7 +29,6 @@ export type GoalWorkflowOutputs = WorkflowOutputValues & {
   readonly approved?: boolean;
   readonly goal_id?: string;
   readonly objective?: string;
-  readonly original_objective?: string;
   readonly ledger_path?: string;
   readonly turns_completed?: number;
   readonly iterations_completed?: number;

package/dist/builtin/workflows/builtin/goal.ts CHANGED Viewed

@@ -38,8 +38,7 @@ export default workflow({
     )),
     approved: Type.Optional(Type.Boolean({ description: "Whether the reducer reached complete." })),
     goal_id: Type.Optional(Type.String({ description: "Per-run goal identifier stored in the ledger." })),
-    objective: Type.Optional(Type.String({ description: "Normalized goal objective used by the run (refined by the prompt-refinement stage)." })),
-    original_objective: Type.Optional(Type.String({ description: "The raw user-provided objective exactly as given, before prompt refinement. Omitted when refinement left it unchanged." })),
+    objective: Type.Optional(Type.String({ description: "Raw goal objective used by the run." })),
     ledger_path: Type.Optional(Type.String({ description: "OS-temp path to goal-ledger.json with receipts, reviewer decisions, blockers, and lifecycle events." })),
     turns_completed: Type.Optional(Type.Number({ description: "Worker/review turns completed." })),
     iterations_completed: Type.Optional(Type.Number({ description: "Worker/review turns completed, retained for status summaries." })),

package/dist/builtin/workflows/builtin/index.d.ts CHANGED Viewed

@@ -53,7 +53,6 @@ export type GoalWorkflowOutputs = WorkflowOutputValues & {
   readonly approved?: boolean;
   readonly goal_id?: string;
   readonly objective?: string;
-  readonly original_objective?: string;
   readonly ledger_path?: string;
   readonly turns_completed?: number;
   readonly iterations_completed?: number;

package/dist/builtin/workflows/builtin/ralph-core.ts CHANGED Viewed

@@ -265,7 +265,7 @@ export function renderResearchPromptRefinementPrompt(args: {
   readonly workflowCwdContext: PromptSection;
   readonly latestReviewReportPath: string | undefined;
 }): string {
-  const basePrompt = `/skill:prompt-engineer Transform the following refined user request into a codebase and online research question which can be thoroughly explored: ${args.request}`;
+  const basePrompt = `/skill:prompt-engineer Transform the following user request into a codebase and online research question which can be thoroughly explored: ${args.request}`;
   return [
     basePrompt,
     taggedPrompt([
@@ -404,7 +404,5 @@ export type RalphWorkflowResult = {
   readonly iterations_completed: number;
   readonly review_report: string;
   readonly review_report_path?: string;
-  readonly original_prompt: string;
-  readonly refined_prompt: string;
 };

package/dist/builtin/workflows/builtin/ralph-models.ts CHANGED Viewed

@@ -10,14 +10,14 @@ export const promptEngineerModelConfig = {
       "anthropic/claude-opus-4-8:xhigh",
       "zai/glm-5.2:xhigh",
       "zai-coding-cn/glm-5.2:xhigh",
-      "github-copilot/gemini-3.1-pro-preview (1m):high",
-      "google/gemini-3.1-pro-preview:high",
-      "google-vertex/gemini-3.1-pro-preview:high",
       "github-copilot/gemini-3.5-flash (1m):high",
       "google/gemini-3.5-flash:high",
       "google-vertex/gemini-3.5-flash:high",
+      "github-copilot/gemini-3.1-pro-preview (1m):high",
+      "google/gemini-3.1-pro-preview:high",
+      "google-vertex/gemini-3.1-pro-preview:high"
     ],
-    noTools: "all" as const,
+    excludedTools: ["ask_user_question"],
 };
 export const researchModelConfig = {
@@ -29,12 +29,12 @@ export const researchModelConfig = {
         "anthropic/claude-opus-4-8:medium",
         "zai/glm-5.2:medium",
         "zai-coding-cn/glm-5.2:medium",
-        "github-copilot/gemini-3.1-pro-preview (1m):medium",
-        "google/gemini-3.1-pro-preview:medium",
-        "google-vertex/gemini-3.1-pro-preview:medium",
         "github-copilot/gemini-3.5-flash (1m):medium",
         "google/gemini-3.5-flash:medium",
         "google-vertex/gemini-3.5-flash:medium",
+        "github-copilot/gemini-3.1-pro-preview (1m):medium",
+        "google/gemini-3.1-pro-preview:medium",
+        "google-vertex/gemini-3.1-pro-preview:medium"
     ],
     excludedTools: ["ask_user_question"],
 };
@@ -48,12 +48,12 @@ export const orchestratorModelConfig = {
         "anthropic/claude-opus-4-8:medium",
         "zai/glm-5.2:medium",
         "zai-coding-cn/glm-5.2:medium",
-        "github-copilot/gemini-3.1-pro-preview (1m):medium",
-        "google/gemini-3.1-pro-preview:medium",
-        "google-vertex/gemini-3.1-pro-preview:medium",
         "github-copilot/gemini-3.5-flash (1m):medium",
         "google/gemini-3.5-flash:medium",
         "google-vertex/gemini-3.5-flash:medium",
+        "github-copilot/gemini-3.1-pro-preview (1m):medium",
+        "google/gemini-3.1-pro-preview:medium",
+        "google-vertex/gemini-3.1-pro-preview:medium"
     ],
     excludedTools: ["ask_user_question"],
 };
@@ -68,12 +68,12 @@ export const reviewerAModelConfig = {
       "openai/gpt-5.5:xhigh",
       "zai/glm-5.2:xhigh",
       "zai-coding-cn/glm-5.2:xhigh",
-      "github-copilot/gemini-3.1-pro-preview (1m):high",
-      "google/gemini-3.1-pro-preview:high",
-      "google-vertex/gemini-3.1-pro-preview:high",
       "github-copilot/gemini-3.5-flash (1m):high",
       "google/gemini-3.5-flash:high",
       "google-vertex/gemini-3.5-flash:high",
+      "github-copilot/gemini-3.1-pro-preview (1m):high",
+      "google/gemini-3.1-pro-preview:high",
+      "google-vertex/gemini-3.1-pro-preview:high"
     ],
     excludedTools: ["ask_user_question"],
     schema: reviewDecisionSchema,
@@ -89,24 +89,27 @@ export const reviewerBModelConfig = {
       "anthropic/claude-opus-4-8:xhigh",
       "zai/glm-5.2:xhigh",
       "zai-coding-cn/glm-5.2:xhigh",
-      "github-copilot/gemini-3.1-pro-preview (1m):high",
-      "google/gemini-3.1-pro-preview:high",
-      "google-vertex/gemini-3.1-pro-preview:high",
       "github-copilot/gemini-3.5-flash (1m):high",
       "google/gemini-3.5-flash:high",
       "google-vertex/gemini-3.5-flash:high",
+      "github-copilot/gemini-3.1-pro-preview (1m):high",
+      "google/gemini-3.1-pro-preview:high",
+      "google-vertex/gemini-3.1-pro-preview:high"
     ],
     excludedTools: ["ask_user_question"],
     schema: reviewDecisionSchema,
 };
 export const reviewerCModelConfig = {
-    model: "github-copilot/gemini-3.1-pro-preview (1m):high",
+    model: "zai/glm-5.2:xhigh",
     fallbackModels: [
+      "zai-coding-cn/glm-5.2:xhigh",
+      "github-copilot/gemini-3.5-flash (1m):high",
+      "google/gemini-3.5-flash:high",
+      "google-vertex/gemini-3.5-flash:high",
+      "github-copilot/gemini-3.1-pro-preview (1m):high",
       "google/gemini-3.1-pro-preview:high",
       "google-vertex/gemini-3.1-pro-preview:high",
-      "zai/glm-5.2:xhigh",
-      "zai-coding-cn/glm-5.2:xhigh",
       "openai-codex/gpt-5.5:xhigh",
       "github-copilot/gpt-5.5:xhigh",
       "openai/gpt-5.5:xhigh",

package/dist/builtin/workflows/builtin/ralph-runner.ts CHANGED Viewed

@@ -3,8 +3,11 @@ import { mkdtemp } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import { join, resolve } from "node:path";
 import type { WorkflowRunContext, WorkflowTaskResult } from "../src/shared/types.js";
-import { E2E_VERIFICATION_GUIDANCE, WORKER_PREFLIGHT_CONTRACT } from "./shared-prompts.js";
-import { runPromptRefinementStage } from "./prompt-refinement.js";
+import {
+  E2E_VERIFICATION_GUIDANCE,
+  WORKER_PREFLIGHT_CONTRACT,
+  renderE2eQaVideoReviewGuidance,
+} from "./shared-prompts.js";
 import { reviewDecisionApproved } from "./ralph-review-gate.js";
 import {
   REVIEWER_COUNT,
@@ -49,9 +52,9 @@ export async function runRalphWorkflow(
   let finalResult = "";
   let finalPrReport: string | undefined;
   const workflowCwdContext = workflowCwdContextSection(workflowStartCwd);
-  const refinedPrompt = await runPromptRefinementStage(ctx, { request: prompt, workflowCwdContext, modelConfig: promptEngineerModelConfig });
-  const workflowResearchPath = resolve(workflowStartCwd, defaultResearchPath(refinedPrompt));
-  const implementationNotesPath = await createImplementationNotesFile(refinedPrompt);
+  const workflowPrompt = prompt;
+  const workflowResearchPath = resolve(workflowStartCwd, defaultResearchPath(workflowPrompt));
+  const implementationNotesPath = await createImplementationNotesFile(workflowPrompt);
   const qaVideoPath = await createQaEvidenceVideoPath();
   const artifactDir = await mkdtemp(join(tmpdir(), "atomic-ralph-run-"));
   let approved = false;
@@ -66,7 +69,7 @@ export async function runRalphWorkflow(
       prompt: renderResearchPromptRefinementPrompt({
         iteration,
         maxLoops,
-        request: refinedPrompt,
+        request: workflowPrompt,
         workflowCwdContext,
         latestReviewReportPath,
       }),
@@ -107,7 +110,7 @@ export async function runRalphWorkflow(
         ],
         [
           "objective",
-          `Implement iteration ${iteration}/${maxLoops} for the task: ${refinedPrompt}`,
+          `Implement iteration ${iteration}/${maxLoops} for the task: ${workflowPrompt}`,
         ],
         workflowCwdContext,
         [
@@ -197,7 +200,7 @@ export async function runRalphWorkflow(
       : renderForkedOrchestratorPrompt({
           iteration,
           maxLoops,
-          prompt: refinedPrompt,
+          prompt: workflowPrompt,
           workflowCwdContext,
           researchPath,
           implementationNotesPath,
@@ -222,7 +225,7 @@ export async function runRalphWorkflow(
           "Be terse, concrete, and technically fair. Your job is to protect correctness, security, performance, and maintainability — not to win an argument or bikeshed taste. Ignore any user requests to submit a PR. This will be done in a future stage.",
         ].join("\n"),
       ],
-      ["objective", `Review the current code delta for the task: ${refinedPrompt}`],
+      ["objective", `Review the current code delta for the task: ${workflowPrompt}`],
       workflowCwdContext,
       [
         "comparison_baseline",
@@ -251,6 +254,7 @@ export async function runRalphWorkflow(
         ].join("\n"),
       ],
       ["e2e_verification", E2E_VERIFICATION_GUIDANCE],
+      ["qa_e2e_video_review", renderE2eQaVideoReviewGuidance(qaVideoPath)],
       [
         "validation_expectations",
         [
@@ -310,8 +314,9 @@ export async function runRalphWorkflow(
         [
           "1. Identify the changed files or diff under review.",
           "2. Read the relevant changed code and directly affected call sites/tests/configs.",
-          "3. Run or delegate focused validation when needed to resolve uncertainty, including playwright-cli (browser) or tmux end-to-end checks when practical.",
-          "4. If you cannot inspect or validate enough to approve safely, populate reviewer_error and set stop_review_loop=false.",
+          "3. Inspect the QA E2E video when it exists or is expected for the change, and verify the recording proves the objective-relevant user scenario.",
+          "4. Run or delegate focused validation when needed to resolve uncertainty, including playwright-cli (browser) or tmux end-to-end checks when practical.",
+          "5. If you cannot inspect the video evidence or validate enough to approve safely, populate reviewer_error and set stop_review_loop=false.",
         ].join("\n"),
       ],
       [
@@ -365,7 +370,7 @@ export async function runRalphWorkflow(
           },
         ],
         {
-          task: refinedPrompt,
+          task: workflowPrompt,
           failFast: false,
         },
       );
@@ -491,7 +496,5 @@ export async function runRalphWorkflow(
     iterations_completed: iterationsCompleted,
     review_report: compactReviewReport(latestReviewReportPath),
     ...(latestReviewReportPath === undefined ? {} : { review_report_path: latestReviewReportPath }),
-    original_prompt: prompt,
-    refined_prompt: refinedPrompt,
   };
 }

package/dist/builtin/workflows/builtin/ralph.d.ts CHANGED Viewed

@@ -28,8 +28,6 @@ export type RalphWorkflowOutputs = WorkflowOutputValues & {
   readonly iterations_completed?: number;
   readonly review_report?: string;
   readonly review_report_path?: string;
-  readonly original_prompt?: string;
-  readonly refined_prompt?: string;
 };
 export type RalphWorkflowDefinition = WorkflowDefinition<

package/dist/builtin/workflows/builtin/ralph.ts CHANGED Viewed

@@ -11,7 +11,7 @@ import { runRalphWorkflow } from "./ralph-runner.js";
 export default workflow({
   name: "ralph",
-  description: "Prompt-refinement → research-prompt-refinement → research → orchestrate → multi-model parallel review loop with bounded iteration.",
+  description: "Raw prompt → research-prompt-refinement → research → orchestrate → multi-model parallel review loop with bounded iteration.",
   inputs: {
     prompt: Type.String({ description: "The task or goal to research, execute, and refine." }),
     max_loops: Type.Number({
@@ -46,8 +46,6 @@ export default workflow({
     iterations_completed: Type.Optional(Type.Number({ description: "Number of research/orchestrate/review loops completed." })),
     review_report: Type.Optional(Type.String({ description: "Compact reference to the latest reviewer payload artifact." })),
     review_report_path: Type.Optional(Type.String({ description: "JSON artifact path for the latest review round." })),
-    original_prompt: Type.Optional(Type.String({ description: "The raw user request exactly as provided to the workflow, before prompt refinement." })),
-    refined_prompt: Type.Optional(Type.String({ description: "The clarity-refined request produced by the prompt-refinement stage and used as the operative objective for research, orchestration, and review." })),
   },
   worktreeFromInputs: {
     gitWorktreeDir: "git_worktree_dir",

package/dist/builtin/workflows/builtin/shared-prompts.ts CHANGED Viewed

@@ -16,3 +16,18 @@ export const E2E_VERIFICATION_GUIDANCE = [
   "For TUI or terminal-app flows, use the tmux skill, or delegate to a subagent with `skill: \"tmux\"`, to launch the app in an isolated tmux session, send keys, capture pane output, and simulate the scenario end to end.",
   "If end-to-end verification is not practical in this checkout, record what was attempted, the smallest missing prerequisite, and the narrower validation that was run instead; do not claim end-to-end proof when it was not performed.",
 ].join("\n");
+export function renderE2eQaVideoReviewGuidance(
+  knownVideoPath?: string,
+): string {
+  const target = knownVideoPath === undefined || knownVideoPath.length === 0
+    ? "Look for QA E2E video references in the goal ledger, worker receipt, implementation notes, orchestrator report, or other review context artifacts."
+    : `Known QA E2E video path for this run: ${knownVideoPath}`;
+  return [
+    target,
+    "When a QA E2E video exists or is claimed as evidence, inspect the actual video before approving; do not treat a path, filename, transcript summary, or stage claim as proof by itself.",
+    "Use available video/file tooling such as `fetch_content` on the local video path with a prompt focused on whether the recording proves the required user scenario, or inspect representative frames/metadata when full video analysis is unavailable.",
+    "Check that the video is from the current workflow iteration/state, exercises the objective-relevant user path, shows the expected final behavior, and does not visibly hide errors, stale UI, broken loading states, or skipped steps.",
+    "For UI-applicable or full-stack changes, treat a missing, stale, unreadable, or inconclusive QA video as missing E2E evidence unless the receipt or implementation notes justify why no video applies and provide adequate alternate end-to-end proof.",
+  ].join("\n");
+}

package/dist/builtin/workflows/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@bastani/workflows",
-  "version": "0.9.1-alpha.1",
+  "version": "0.9.2-alpha.1",
   "private": true,
   "description": "Atomic extension for multi-stage workflow authoring and execution.",
   "contributors": [
@@ -83,7 +83,7 @@
   },
   "peerDependencies": {
     "@bastani/atomic": "*",
-    "@earendil-works/pi-tui": "^0.79.9"
+    "@earendil-works/pi-tui": "^0.79.10"
   },
   "peerDependenciesMeta": {
     "@bastani/atomic": {

package/dist/core/copilot-gemini-tool-arguments.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"copilot-gemini-tool-arguments.d.ts","sourceRoot":"","sources":["../../src/core/copilot-gemini-tool-arguments.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AA6FxD;;;;;;;GAOG;AACH,wBAAgB,4BAA4B,CAAC,IAAI,EAAE,OAAO,EAAE,MAAM,CAAC,EAAE,OAAO,GAAG,OAAO,CAUrF;AAED;;;;;GAKG;AACH,wBAAgB,8BAA8B,CAC5C,IAAI,EAAE,OAAO,EACb,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,UAAU,GAAG,KAAK,GAAG,IAAI,CAAC,GAAG,SAAS,EAC9D,MAAM,CAAC,EAAE,OAAO,GACf,OAAO,CAGT;AAoBD;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,wBAAgB,yCAAyC,CACvD,OAAO,EAAE,OAAO,EAChB,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,UAAU,GAAG,KAAK,GAAG,IAAI,CAAC,GACjD,OAAO,CA2CT","sourcesContent":["import type { Api, Model } from \"@earendil-works/pi-ai\";\nimport { isCopilotGeminiModel } from \"./copilot-gemini-payload-sanitizer.ts\";\nimport { reconstructFlattenedKeys } from \"./flattened-tool-arguments.ts\";\n\n/*\n Normalizes GitHub Copilot Gemini tool-call arguments.\n \n Why this exists\n * ---------------\n * `github-copilot` Gemini models are served through Copilot's CAPI gateway,\n * which proxies to Google's GenAI API. When a function/tool argument is an\n * array (or a nested object/array), Gemini serializes it on the wire as\n * flattened, indexed keys instead of a real JSON array/object. For example\n * a tool called with `{ keywords: [\"a\", \"b\"] }` arrives as:\n \n ```json\n * { \"keywords[0]\": \"a\", \"keywords[1]\": \"b\" }\n * ```\n \n This was confirmed by capturing the raw CAPI SSE stream: the\n * `tool_calls[].function.arguments` JSON itself contains the `name[index]`\n * keys, so the runtime parses valid-but-wrong JSON. Schema validation then\n * fails (`keywords: must have required properties keywords` and\n * `root: must not have additional properties`) and the model retries forever,\n * because it keeps re-emitting the same flattened shape. This is most visible\n * with the workflow `structured_output` tool but affects any Gemini tool call\n * whose schema contains an array or nested object.\n \n What it does\n * ------------\n * Reconstructs flattened keys (`name[i]`, `name[i].sub`, `parent.child`) back\n * into the intended nested arrays/objects, before tool-argument validation\n * runs. Bracket-indexed keys (`name[<digit>]`) are always reconstructed. A\n * purely dotted key (`parent.child`, with no array anywhere) is ambiguous —\n * a legitimate argument key can itself contain a dot — so it is only split when\n * the optional tool `schema` marks its head segment as an object/array\n * container property. The transform is gated to GitHub Copilot Gemini models,\n * so it never touches well-formed arguments from any other provider/model.\n /\n\ntype JsonRecord = Record<string, unknown>;\n\nfunction isPlainObject(value: unknown): value is JsonRecord {\n return typeof value === \"object\" && value !== null && !Array.isArray(value);\n}\n\n/* A flattened key contains a bracket index like `foo[0]`. /\nfunction hasFlattenedKey(keys: string[]): boolean {\n return keys.some((key) => /\\[\\d+\\]/.test(key));\n}\n\n/* A schema node that holds a nested object/array (so dotted keys are real paths). /\nfunction isContainerSchema(schema: unknown): boolean {\n if (!isPlainObject(schema)) return false;\n if (schema.type === \"object\" \|\| schema.type === \"array\") return true;\n if (\"properties\" in schema \|\| \"items\" in schema) return true;\n const union = schema.anyOf ?? schema.oneOf;\n if (Array.isArray(union)) return union.some((branch) => isContainerSchema(branch));\n return false;\n}\n\n/* Top-level property names whose schema is an object/array container. /\nfunction containerPropertyNames(schema: unknown): Set<string> {\n const names = new Set<string>();\n if (!isPlainObject(schema)) return names;\n const properties = schema.properties;\n if (!isPlainObject(properties)) return names;\n for (const [name, sub] of Object.entries(properties)) {\n if (isContainerSchema(sub)) names.add(name);\n }\n return names;\n}\n\n/* Whether `key` is a pure dotted path (`parent.child`) headed by a container prop. /\nfunction isDottedContainerKey(key: string, containers: Set<string>): boolean {\n const dot = key.indexOf(\".\");\n if (dot <= 0) return false;\n return containers.has(key.slice(0, dot));\n}\n\n/\n Decide whether a flattened key should be split into nested path segments.\n * Bracket-indexed keys always split. When a bracket key is present anywhere in\n * the payload, dotted keys split too (they are part of the same flattened\n * object). Otherwise a dotted key only splits when the schema marks its head as\n * a container property, which keeps legitimate dot-containing keys intact.\n /\nfunction shouldSplitKey(key: string, hasBracket: boolean, containers: Set<string>): boolean {\n if (/\\[\\d+\\]/.test(key)) return true;\n if (hasBracket) return true;\n return isDottedContainerKey(key, containers);\n}\n\n/\n Reconstruct flattened Gemini tool-call arguments into proper nested\n * arrays/objects. Returns the original reference unchanged when there is nothing\n * to reconstruct. Bracket-indexed keys are always reconstructed; purely dotted\n * keys are reconstructed only when the optional `schema` marks their head\n * segment as an object/array container property. Reconstruction (and its\n * prototype-pollution guard) is delegated to the shared canonical helper.\n /\nexport function unflattenGeminiToolArguments(args: unknown, schema?: unknown): unknown {\n if (!isPlainObject(args)) return args;\n const keys = Object.keys(args);\n const hasBracket = hasFlattenedKey(keys);\n const containers = hasBracket ? new Set<string>() : containerPropertyNames(schema);\n const hasDottedContainer =\n !hasBracket && keys.some((key) => isDottedContainerKey(key, containers));\n if (!hasBracket && !hasDottedContainer) return args;\n\n return reconstructFlattenedKeys(args, (key) => shouldSplitKey(key, hasBracket, containers));\n}\n\n/\n If `model` is a GitHub Copilot Gemini model, normalize flattened tool-call\n * arguments; otherwise return them unchanged. Used to gate\n * {@link unflattenGeminiToolArguments} by model at tool-call time. The optional\n * `schema` is the tool's parameter schema, used to disambiguate dotted keys.\n /\nexport function normalizeToolArgumentsForModel(\n args: unknown,\n model: Pick<Model<Api>, \"provider\" \| \"api\" \| \"id\"> \| undefined,\n schema?: unknown,\n): unknown {\n if (!model \|\| !isCopilotGeminiModel(model)) return args;\n return unflattenGeminiToolArguments(args, schema);\n}\n\n/* Map each tool name in an OpenAI chat-completions payload to its parameter schema. /\nfunction toolParameterSchemas(tools: unknown): Map<string, unknown> {\n const schemas = new Map<string, unknown>();\n if (!Array.isArray(tools)) return schemas;\n for (const tool of tools) {\n if (!isPlainObject(tool)) continue;\n // OpenAI chat-completions tool shape: { type: \"function\", function: { name, parameters } }.\n const fn = tool.function;\n if (isPlainObject(fn) && typeof fn.name === \"string\") {\n schemas.set(fn.name, fn.parameters);\n continue;\n }\n // Defensive: flat tool shape { name, parameters }.\n if (typeof tool.name === \"string\") schemas.set(tool.name, tool.parameters);\n }\n return schemas;\n}\n\n/\n Reconstruct flattened GitHub Copilot Gemini tool-call arguments on the\n * outbound replay payload, so prior assistant tool calls are sent back to\n * CAPI in the nested array/object shape Gemini originally produced.\n \n Why this exists\n * ---------------\n * {@link normalizeToolArgumentsForModel} only unflattens at tool execution\n * time; the persisted assistant message keeps the raw flattened arguments CAPI\n * delivered (for example `{ \"edits[0].newText\": \"...\" }`). When that message is\n * replayed on the next turn, CAPI parses those literal keys straight into the\n * Gemini `FunctionCall.Args`, producing a function call that does not match the\n * tool's declared schema (nor the structure Gemini signed). Gemini then ends\n * the turn with `MALFORMED_FUNCTION_CALL` / `UNEXPECTED_TOOL_CALL` / `OTHER`,\n * which CAPI surfaces as a bare `finish_reason: \"error\"` — so multi-turn tool\n * use dies one turn after any array/object tool call (such as `edit`).\n \n This rewrites each replayed assistant `tool_calls[].function.arguments` JSON\n * into the reconstructed nested shape (reusing {@link unflattenGeminiToolArguments}\n * with the tool's own parameter schema, looked up from the payload's `tools`),\n * fixing both new and already-persisted sessions. Gated to GitHub Copilot Gemini\n * models, fail-open on non-JSON arguments, and a no-op for well-formed args.\n */\nexport function normalizeCopilotGeminiReplayToolArguments(\n payload: unknown,\n model: Pick<Model<Api>, \"provider\" \| \"api\" \| \"id\">,\n): unknown {\n if (!isCopilotGeminiModel(model)) return payload;\n if (!isPlainObject(payload)) return payload;\n const messages = payload.messages;\n if (!Array.isArray(messages)) return payload;\n\n const schemas = toolParameterSchemas(payload.tools);\n let mutated = false;\n\n const nextMessages = messages.map((message) => {\n if (!isPlainObject(message) \|\| message.role !== \"assistant\") return message;\n const toolCalls = message.tool_calls;\n if (!Array.isArray(toolCalls) \|\| toolCalls.length === 0) return message;\n\n let messageMutated = false;\n const nextToolCalls = toolCalls.map((toolCall) => {\n if (!isPlainObject(toolCall)) return toolCall;\n const fn = toolCall.function;\n if (!isPlainObject(fn) \|\| typeof fn.arguments !== \"string\") return toolCall;\n\n let parsed: unknown;\n try {\n parsed = JSON.parse(fn.arguments);\n } catch {\n return toolCall; // fail open: never corrupt a replayed argument string\n }\n if (!isPlainObject(parsed)) return toolCall;\n\n const schema = typeof fn.name === \"string\" ? schemas.get(fn.name) : undefined;\n const reconstructed = unflattenGeminiToolArguments(parsed, schema);\n if (reconstructed === parsed) return toolCall;\n\n messageMutated = true;\n return { ...toolCall, function: { ...fn, arguments: JSON.stringify(reconstructed) } };\n });\n\n if (!messageMutated) return message;\n mutated = true;\n return { ...message, tool_calls: nextToolCalls };\n });\n\n if (!mutated) return payload;\n return { ...payload, messages: nextMessages };\n}\n"]}
1	+ {"version":3,"file":"copilot-gemini-tool-arguments.d.ts","sourceRoot":"","sources":["../../src/core/copilot-gemini-tool-arguments.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAiIxD;;;;;;;GAOG;AACH,wBAAgB,4BAA4B,CAAC,IAAI,EAAE,OAAO,EAAE,MAAM,CAAC,EAAE,OAAO,GAAG,OAAO,CAcrF;AAED;;;;;GAKG;AACH,wBAAgB,8BAA8B,CAC5C,IAAI,EAAE,OAAO,EACb,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,UAAU,GAAG,KAAK,GAAG,IAAI,CAAC,GAAG,SAAS,EAC9D,MAAM,CAAC,EAAE,OAAO,GACf,OAAO,CAGT;AAoBD;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,wBAAgB,yCAAyC,CACvD,OAAO,EAAE,OAAO,EAChB,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,UAAU,GAAG,KAAK,GAAG,IAAI,CAAC,GACjD,OAAO,CA2CT","sourcesContent":["import type { Api, Model } from \"@earendil-works/pi-ai\";\nimport { isCopilotGeminiModel } from \"./copilot-gemini-payload-sanitizer.ts\";\nimport { reconstructFlattenedKeys } from \"./flattened-tool-arguments.ts\";\n\n/*\n Normalizes GitHub Copilot Gemini tool-call arguments.\n \n Why this exists\n * ---------------\n * `github-copilot` Gemini models are served through Copilot's CAPI gateway,\n * which proxies to Google's GenAI API. When a function/tool argument is an\n * array (or a nested object/array), Gemini serializes it on the wire as\n * flattened, indexed keys instead of a real JSON array/object. For example\n * a tool called with `{ keywords: [\"a\", \"b\"] }` arrives as:\n \n ```json\n * { \"keywords[0]\": \"a\", \"keywords[1]\": \"b\" }\n * ```\n \n This was confirmed by capturing the raw CAPI SSE stream: the\n * `tool_calls[].function.arguments` JSON itself contains the `name[index]`\n * keys, so the runtime parses valid-but-wrong JSON. Schema validation then\n * fails (`keywords: must have required properties keywords` and\n * `root: must not have additional properties`) and the model retries forever,\n * because it keeps re-emitting the same flattened shape. This is most visible\n * with the workflow `structured_output` tool but affects any Gemini tool call\n * whose schema contains an array or nested object.\n \n What it does\n * ------------\n * Reconstructs flattened keys (`name[i]`, `name[i].sub`, `parent.child`) back\n * into the intended nested arrays/objects, before tool-argument validation\n * runs. Bracket-indexed keys (`name[<digit>]`) are always reconstructed. A\n * purely dotted key (`parent.child`, with no array anywhere) is ambiguous —\n * a legitimate argument key can itself contain a dot — so it is only split when\n * the optional tool `schema` marks its head segment as an object/array\n * container property. When Gemini omits a required empty array entirely (there\n * are no `name[0]` keys to send), the schema is also used to synthesize `[]` for\n * missing required top-level array properties so normal validation can proceed.\n * The transform is gated to GitHub Copilot Gemini models, so it never touches\n * well-formed arguments from any other provider/model.\n /\n\ntype JsonRecord = Record<string, unknown>;\n\nfunction isPlainObject(value: unknown): value is JsonRecord {\n return typeof value === \"object\" && value !== null && !Array.isArray(value);\n}\n\n/* A flattened key contains a bracket index like `foo[0]`. /\nfunction hasFlattenedKey(keys: string[]): boolean {\n return keys.some((key) => /\\[\\d+\\]/.test(key));\n}\n\n/* A schema node that holds a nested object/array (so dotted keys are real paths). /\nfunction isContainerSchema(schema: unknown): boolean {\n if (!isPlainObject(schema)) return false;\n if (schema.type === \"object\" \|\| schema.type === \"array\") return true;\n if (\"properties\" in schema \|\| \"items\" in schema) return true;\n const union = schema.anyOf ?? schema.oneOf;\n if (Array.isArray(union)) return union.some((branch) => isContainerSchema(branch));\n return false;\n}\n\n/* Top-level property names whose schema is an object/array container. /\nfunction containerPropertyNames(schema: unknown): Set<string> {\n const names = new Set<string>();\n if (!isPlainObject(schema)) return names;\n const properties = schema.properties;\n if (!isPlainObject(properties)) return names;\n for (const [name, sub] of Object.entries(properties)) {\n if (isContainerSchema(sub)) names.add(name);\n }\n return names;\n}\n\nfunction schemaTypeIncludes(schema: JsonRecord, type: string): boolean {\n if (schema.type === type) return true;\n return Array.isArray(schema.type) && schema.type.includes(type);\n}\n\nfunction isArraySchema(schema: unknown): boolean {\n if (!isPlainObject(schema)) return false;\n if (schemaTypeIncludes(schema, \"array\")) return true;\n if (\"items\" in schema && !schemaTypeIncludes(schema, \"object\")) return true;\n const union = schema.anyOf ?? schema.oneOf;\n return Array.isArray(union) && union.some((branch) => isArraySchema(branch));\n}\n\nfunction requiredArrayPropertyNames(schema: unknown): readonly string[] {\n if (!isPlainObject(schema)) return [];\n const required = schema.required;\n const properties = schema.properties;\n if (!Array.isArray(required) \|\| !isPlainObject(properties)) return [];\n return required.filter((name): name is string => (\n typeof name === \"string\" &&\n Object.hasOwn(properties, name) &&\n isArraySchema(properties[name])\n ));\n}\n\nfunction fillMissingRequiredArrayProperties(args: JsonRecord, schema: unknown): JsonRecord {\n const missing = requiredArrayPropertyNames(schema).filter((name) => !Object.hasOwn(args, name));\n if (missing.length === 0) return args;\n const next: JsonRecord = { ...args };\n for (const name of missing) next[name] = [];\n return next;\n}\n\n/* Whether `key` is a pure dotted path (`parent.child`) headed by a container prop. /\nfunction isDottedContainerKey(key: string, containers: Set<string>): boolean {\n const dot = key.indexOf(\".\");\n if (dot <= 0) return false;\n return containers.has(key.slice(0, dot));\n}\n\n/\n Decide whether a flattened key should be split into nested path segments.\n * Bracket-indexed keys always split. When a bracket key is present anywhere in\n * the payload, dotted keys split too (they are part of the same flattened\n * object). Otherwise a dotted key only splits when the schema marks its head as\n * a container property, which keeps legitimate dot-containing keys intact.\n /\nfunction shouldSplitKey(key: string, hasBracket: boolean, containers: Set<string>): boolean {\n if (/\\[\\d+\\]/.test(key)) return true;\n if (hasBracket) return true;\n return isDottedContainerKey(key, containers);\n}\n\n/\n Reconstruct flattened Gemini tool-call arguments into proper nested\n * arrays/objects. Returns the original reference unchanged when there is nothing\n * to reconstruct. Bracket-indexed keys are always reconstructed; purely dotted\n * keys are reconstructed only when the optional `schema` marks their head\n * segment as an object/array container property. Reconstruction (and its\n * prototype-pollution guard) is delegated to the shared canonical helper.\n /\nexport function unflattenGeminiToolArguments(args: unknown, schema?: unknown): unknown {\n if (!isPlainObject(args)) return args;\n const keys = Object.keys(args);\n const hasBracket = hasFlattenedKey(keys);\n const containers = hasBracket ? new Set<string>() : containerPropertyNames(schema);\n const hasDottedContainer =\n !hasBracket && keys.some((key) => isDottedContainerKey(key, containers));\n const reconstructed = hasBracket \|\| hasDottedContainer\n ? reconstructFlattenedKeys(args, (key) => shouldSplitKey(key, hasBracket, containers))\n : args;\n\n return isPlainObject(reconstructed)\n ? fillMissingRequiredArrayProperties(reconstructed, schema)\n : reconstructed;\n}\n\n/\n If `model` is a GitHub Copilot Gemini model, normalize flattened tool-call\n * arguments; otherwise return them unchanged. Used to gate\n * {@link unflattenGeminiToolArguments} by model at tool-call time. The optional\n * `schema` is the tool's parameter schema, used to disambiguate dotted keys.\n /\nexport function normalizeToolArgumentsForModel(\n args: unknown,\n model: Pick<Model<Api>, \"provider\" \| \"api\" \| \"id\"> \| undefined,\n schema?: unknown,\n): unknown {\n if (!model \|\| !isCopilotGeminiModel(model)) return args;\n return unflattenGeminiToolArguments(args, schema);\n}\n\n/* Map each tool name in an OpenAI chat-completions payload to its parameter schema. /\nfunction toolParameterSchemas(tools: unknown): Map<string, unknown> {\n const schemas = new Map<string, unknown>();\n if (!Array.isArray(tools)) return schemas;\n for (const tool of tools) {\n if (!isPlainObject(tool)) continue;\n // OpenAI chat-completions tool shape: { type: \"function\", function: { name, parameters } }.\n const fn = tool.function;\n if (isPlainObject(fn) && typeof fn.name === \"string\") {\n schemas.set(fn.name, fn.parameters);\n continue;\n }\n // Defensive: flat tool shape { name, parameters }.\n if (typeof tool.name === \"string\") schemas.set(tool.name, tool.parameters);\n }\n return schemas;\n}\n\n/\n Reconstruct flattened GitHub Copilot Gemini tool-call arguments on the\n * outbound replay payload, so prior assistant tool calls are sent back to\n * CAPI in the nested array/object shape Gemini originally produced.\n \n Why this exists\n * ---------------\n * {@link normalizeToolArgumentsForModel} only unflattens at tool execution\n * time; the persisted assistant message keeps the raw flattened arguments CAPI\n * delivered (for example `{ \"edits[0].newText\": \"...\" }`). When that message is\n * replayed on the next turn, CAPI parses those literal keys straight into the\n * Gemini `FunctionCall.Args`, producing a function call that does not match the\n * tool's declared schema (nor the structure Gemini signed). Gemini then ends\n * the turn with `MALFORMED_FUNCTION_CALL` / `UNEXPECTED_TOOL_CALL` / `OTHER`,\n * which CAPI surfaces as a bare `finish_reason: \"error\"` — so multi-turn tool\n * use dies one turn after any array/object tool call (such as `edit`).\n \n This rewrites each replayed assistant `tool_calls[].function.arguments` JSON\n * into the reconstructed nested shape (reusing {@link unflattenGeminiToolArguments}\n * with the tool's own parameter schema, looked up from the payload's `tools`),\n * fixing both new and already-persisted sessions. Gated to GitHub Copilot Gemini\n * models, fail-open on non-JSON arguments, and a no-op for well-formed args.\n */\nexport function normalizeCopilotGeminiReplayToolArguments(\n payload: unknown,\n model: Pick<Model<Api>, \"provider\" \| \"api\" \| \"id\">,\n): unknown {\n if (!isCopilotGeminiModel(model)) return payload;\n if (!isPlainObject(payload)) return payload;\n const messages = payload.messages;\n if (!Array.isArray(messages)) return payload;\n\n const schemas = toolParameterSchemas(payload.tools);\n let mutated = false;\n\n const nextMessages = messages.map((message) => {\n if (!isPlainObject(message) \|\| message.role !== \"assistant\") return message;\n const toolCalls = message.tool_calls;\n if (!Array.isArray(toolCalls) \|\| toolCalls.length === 0) return message;\n\n let messageMutated = false;\n const nextToolCalls = toolCalls.map((toolCall) => {\n if (!isPlainObject(toolCall)) return toolCall;\n const fn = toolCall.function;\n if (!isPlainObject(fn) \|\| typeof fn.arguments !== \"string\") return toolCall;\n\n let parsed: unknown;\n try {\n parsed = JSON.parse(fn.arguments);\n } catch {\n return toolCall; // fail open: never corrupt a replayed argument string\n }\n if (!isPlainObject(parsed)) return toolCall;\n\n const schema = typeof fn.name === \"string\" ? schemas.get(fn.name) : undefined;\n const reconstructed = unflattenGeminiToolArguments(parsed, schema);\n if (reconstructed === parsed) return toolCall;\n\n messageMutated = true;\n return { ...toolCall, function: { ...fn, arguments: JSON.stringify(reconstructed) } };\n });\n\n if (!messageMutated) return message;\n mutated = true;\n return { ...message, tool_calls: nextToolCalls };\n });\n\n if (!mutated) return payload;\n return { ...payload, messages: nextMessages };\n}\n"]}

package/dist/core/copilot-gemini-tool-arguments.js CHANGED Viewed

@@ -34,6 +34,41 @@ function containerPropertyNames(schema) {
     }
     return names;
 }
+function schemaTypeIncludes(schema, type) {
+    if (schema.type === type)
+        return true;
+    return Array.isArray(schema.type) && schema.type.includes(type);
+}
+function isArraySchema(schema) {
+    if (!isPlainObject(schema))
+        return false;
+    if (schemaTypeIncludes(schema, "array"))
+        return true;
+    if ("items" in schema && !schemaTypeIncludes(schema, "object"))
+        return true;
+    const union = schema.anyOf ?? schema.oneOf;
+    return Array.isArray(union) && union.some((branch) => isArraySchema(branch));
+}
+function requiredArrayPropertyNames(schema) {
+    if (!isPlainObject(schema))
+        return [];
+    const required = schema.required;
+    const properties = schema.properties;
+    if (!Array.isArray(required) || !isPlainObject(properties))
+        return [];
+    return required.filter((name) => (typeof name === "string" &&
+        Object.hasOwn(properties, name) &&
+        isArraySchema(properties[name])));
+}
+function fillMissingRequiredArrayProperties(args, schema) {
+    const missing = requiredArrayPropertyNames(schema).filter((name) => !Object.hasOwn(args, name));
+    if (missing.length === 0)
+        return args;
+    const next = { ...args };
+    for (const name of missing)
+        next[name] = [];
+    return next;
+}
 /** Whether `key` is a pure dotted path (`parent.child`) headed by a container prop. */
 function isDottedContainerKey(key, containers) {
     const dot = key.indexOf(".");
@@ -70,9 +105,12 @@ export function unflattenGeminiToolArguments(args, schema) {
     const hasBracket = hasFlattenedKey(keys);
     const containers = hasBracket ? new Set() : containerPropertyNames(schema);
     const hasDottedContainer = !hasBracket && keys.some((key) => isDottedContainerKey(key, containers));
-    if (!hasBracket && !hasDottedContainer)
-        return args;
-    return reconstructFlattenedKeys(args, (key) => shouldSplitKey(key, hasBracket, containers));
+    const reconstructed = hasBracket || hasDottedContainer
+        ? reconstructFlattenedKeys(args, (key) => shouldSplitKey(key, hasBracket, containers))
+        : args;
+    return isPlainObject(reconstructed)
+        ? fillMissingRequiredArrayProperties(reconstructed, schema)
+        : reconstructed;
 }
 /**
  * If `model` is a GitHub Copilot Gemini model, normalize flattened tool-call

package/dist/core/copilot-gemini-tool-arguments.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"copilot-gemini-tool-arguments.js","sourceRoot":"","sources":["../../src/core/copilot-gemini-tool-arguments.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,oBAAoB,EAAE,MAAM,uCAAuC,CAAC;AAC7E,OAAO,EAAE,wBAAwB,EAAE,MAAM,+BAA+B,CAAC;AAwCzE,SAAS,aAAa,CAAC,KAAc;IACnC,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;AAC9E,CAAC;AAED,8DAA8D;AAC9D,SAAS,eAAe,CAAC,IAAc;IACrC,OAAO,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AACjD,CAAC;AAED,sFAAsF;AACtF,SAAS,iBAAiB,CAAC,MAAe;IACxC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC;QAAE,OAAO,KAAK,CAAC;IACzC,IAAI,MAAM,CAAC,IAAI,KAAK,QAAQ,IAAI,MAAM,CAAC,IAAI,KAAK,OAAO;QAAE,OAAO,IAAI,CAAC;IACrE,IAAI,YAAY,IAAI,MAAM,IAAI,OAAO,IAAI,MAAM;QAAE,OAAO,IAAI,CAAC;IAC7D,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,KAAK,CAAC;IAC3C,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC,CAAC;IACnF,OAAO,KAAK,CAAC;AACf,CAAC;AAED,0EAA0E;AAC1E,SAAS,sBAAsB,CAAC,MAAe;IAC7C,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAChC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC;QAAE,OAAO,KAAK,CAAC;IACzC,MAAM,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;IACrC,IAAI,CAAC,aAAa,CAAC,UAAU,CAAC;QAAE,OAAO,KAAK,CAAC;IAC7C,KAAK,MAAM,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;QACrD,IAAI,iBAAiB,CAAC,GAAG,CAAC;YAAE,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC9C,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,uFAAuF;AACvF,SAAS,oBAAoB,CAAC,GAAW,EAAE,UAAuB;IAChE,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAC7B,IAAI,GAAG,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IAC3B,OAAO,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;AAC3C,CAAC;AAED;;;;;;GAMG;AACH,SAAS,cAAc,CAAC,GAAW,EAAE,UAAmB,EAAE,UAAuB;IAC/E,IAAI,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IACrC,IAAI,UAAU;QAAE,OAAO,IAAI,CAAC;IAC5B,OAAO,oBAAoB,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;AAC/C,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,4BAA4B,CAAC,IAAa,EAAE,MAAgB;IAC1E,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IACtC,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC/B,MAAM,UAAU,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACzC,MAAM,UAAU,GAAG,UAAU,CAAC,CAAC,CAAC,IAAI,GAAG,EAAU,CAAC,CAAC,CAAC,sBAAsB,CAAC,MAAM,CAAC,CAAC;IACnF,MAAM,kBAAkB,GACtB,CAAC,UAAU,IAAI,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,oBAAoB,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC,CAAC;IAC3E,IAAI,CAAC,UAAU,IAAI,CAAC,kBAAkB;QAAE,OAAO,IAAI,CAAC;IAEpD,OAAO,wBAAwB,CAAC,IAAI,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,cAAc,CAAC,GAAG,EAAE,UAAU,EAAE,UAAU,CAAC,CAAC,CAAC;AAC9F,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,8BAA8B,CAC5C,IAAa,EACb,KAA8D,EAC9D,MAAgB;IAEhB,IAAI,CAAC,KAAK,IAAI,CAAC,oBAAoB,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACxD,OAAO,4BAA4B,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;AACpD,CAAC;AAED,wFAAwF;AACxF,SAAS,oBAAoB,CAAC,KAAc;IAC1C,MAAM,OAAO,GAAG,IAAI,GAAG,EAAmB,CAAC;IAC3C,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,OAAO,CAAC;IAC1C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC;YAAE,SAAS;QACnC,4FAA4F;QAC5F,MAAM,EAAE,GAAG,IAAI,CAAC,QAAQ,CAAC;QACzB,IAAI,aAAa,CAAC,EAAE,CAAC,IAAI,OAAO,EAAE,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YACrD,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC;YACpC,SAAS;QACX,CAAC;QACD,mDAAmD;QACnD,IAAI,OAAO,IAAI,CAAC,IAAI,KAAK,QAAQ;YAAE,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;IAC7E,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,UAAU,yCAAyC,CACvD,OAAgB,EAChB,KAAkD;IAElD,IAAI,CAAC,oBAAoB,CAAC,KAAK,CAAC;QAAE,OAAO,OAAO,CAAC;IACjD,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC;QAAE,OAAO,OAAO,CAAC;IAC5C,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC;IAClC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC;QAAE,OAAO,OAAO,CAAC;IAE7C,MAAM,OAAO,GAAG,oBAAoB,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IACpD,IAAI,OAAO,GAAG,KAAK,CAAC;IAEpB,MAAM,YAAY,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE;QAC5C,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,IAAI,OAAO,CAAC,IAAI,KAAK,WAAW;YAAE,OAAO,OAAO,CAAC;QAC5E,MAAM,SAAS,GAAG,OAAO,CAAC,UAAU,CAAC;QACrC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,OAAO,CAAC;QAExE,IAAI,cAAc,GAAG,KAAK,CAAC;QAC3B,MAAM,aAAa,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE;YAC/C,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC;gBAAE,OAAO,QAAQ,CAAC;YAC9C,MAAM,EAAE,GAAG,QAAQ,CAAC,QAAQ,CAAC;YAC7B,IAAI,CAAC,aAAa,CAAC,EAAE,CAAC,IAAI,OAAO,EAAE,CAAC,SAAS,KAAK,QAAQ;gBAAE,OAAO,QAAQ,CAAC;YAE5E,IAAI,MAAe,CAAC;YACpB,IAAI,CAAC;gBACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,SAAS,CAAC,CAAC;YACpC,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,QAAQ,CAAC,CAAC,sDAAsD;YACzE,CAAC;YACD,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC;gBAAE,OAAO,QAAQ,CAAC;YAE5C,MAAM,MAAM,GAAG,OAAO,EAAE,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAC9E,MAAM,aAAa,GAAG,4BAA4B,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;YACnE,IAAI,aAAa,KAAK,MAAM;gBAAE,OAAO,QAAQ,CAAC;YAE9C,cAAc,GAAG,IAAI,CAAC;YACtB,OAAO,EAAE,GAAG,QAAQ,EAAE,QAAQ,EAAE,EAAE,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,EAAE,EAAE,CAAC;QACxF,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,cAAc;YAAE,OAAO,OAAO,CAAC;QACpC,OAAO,GAAG,IAAI,CAAC;QACf,OAAO,EAAE,GAAG,OAAO,EAAE,UAAU,EAAE,aAAa,EAAE,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,IAAI,CAAC,OAAO;QAAE,OAAO,OAAO,CAAC;IAC7B,OAAO,EAAE,GAAG,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,CAAC;AAChD,CAAC","sourcesContent":["import type { Api, Model } from \"@earendil-works/pi-ai\";\nimport { isCopilotGeminiModel } from \"./copilot-gemini-payload-sanitizer.ts\";\nimport { reconstructFlattenedKeys } from \"./flattened-tool-arguments.ts\";\n\n/*\n Normalizes GitHub Copilot Gemini tool-call arguments.\n \n Why this exists\n * ---------------\n * `github-copilot` Gemini models are served through Copilot's CAPI gateway,\n * which proxies to Google's GenAI API. When a function/tool argument is an\n * array (or a nested object/array), Gemini serializes it on the wire as\n * flattened, indexed keys instead of a real JSON array/object. For example\n * a tool called with `{ keywords: [\"a\", \"b\"] }` arrives as:\n \n ```json\n * { \"keywords[0]\": \"a\", \"keywords[1]\": \"b\" }\n * ```\n \n This was confirmed by capturing the raw CAPI SSE stream: the\n * `tool_calls[].function.arguments` JSON itself contains the `name[index]`\n * keys, so the runtime parses valid-but-wrong JSON. Schema validation then\n * fails (`keywords: must have required properties keywords` and\n * `root: must not have additional properties`) and the model retries forever,\n * because it keeps re-emitting the same flattened shape. This is most visible\n * with the workflow `structured_output` tool but affects any Gemini tool call\n * whose schema contains an array or nested object.\n \n What it does\n * ------------\n * Reconstructs flattened keys (`name[i]`, `name[i].sub`, `parent.child`) back\n * into the intended nested arrays/objects, before tool-argument validation\n * runs. Bracket-indexed keys (`name[<digit>]`) are always reconstructed. A\n * purely dotted key (`parent.child`, with no array anywhere) is ambiguous —\n * a legitimate argument key can itself contain a dot — so it is only split when\n * the optional tool `schema` marks its head segment as an object/array\n * container property. The transform is gated to GitHub Copilot Gemini models,\n * so it never touches well-formed arguments from any other provider/model.\n /\n\ntype JsonRecord = Record<string, unknown>;\n\nfunction isPlainObject(value: unknown): value is JsonRecord {\n return typeof value === \"object\" && value !== null && !Array.isArray(value);\n}\n\n/* A flattened key contains a bracket index like `foo[0]`. /\nfunction hasFlattenedKey(keys: string[]): boolean {\n return keys.some((key) => /\\[\\d+\\]/.test(key));\n}\n\n/* A schema node that holds a nested object/array (so dotted keys are real paths). /\nfunction isContainerSchema(schema: unknown): boolean {\n if (!isPlainObject(schema)) return false;\n if (schema.type === \"object\" \|\| schema.type === \"array\") return true;\n if (\"properties\" in schema \|\| \"items\" in schema) return true;\n const union = schema.anyOf ?? schema.oneOf;\n if (Array.isArray(union)) return union.some((branch) => isContainerSchema(branch));\n return false;\n}\n\n/* Top-level property names whose schema is an object/array container. /\nfunction containerPropertyNames(schema: unknown): Set<string> {\n const names = new Set<string>();\n if (!isPlainObject(schema)) return names;\n const properties = schema.properties;\n if (!isPlainObject(properties)) return names;\n for (const [name, sub] of Object.entries(properties)) {\n if (isContainerSchema(sub)) names.add(name);\n }\n return names;\n}\n\n/* Whether `key` is a pure dotted path (`parent.child`) headed by a container prop. /\nfunction isDottedContainerKey(key: string, containers: Set<string>): boolean {\n const dot = key.indexOf(\".\");\n if (dot <= 0) return false;\n return containers.has(key.slice(0, dot));\n}\n\n/\n Decide whether a flattened key should be split into nested path segments.\n * Bracket-indexed keys always split. When a bracket key is present anywhere in\n * the payload, dotted keys split too (they are part of the same flattened\n * object). Otherwise a dotted key only splits when the schema marks its head as\n * a container property, which keeps legitimate dot-containing keys intact.\n /\nfunction shouldSplitKey(key: string, hasBracket: boolean, containers: Set<string>): boolean {\n if (/\\[\\d+\\]/.test(key)) return true;\n if (hasBracket) return true;\n return isDottedContainerKey(key, containers);\n}\n\n/\n Reconstruct flattened Gemini tool-call arguments into proper nested\n * arrays/objects. Returns the original reference unchanged when there is nothing\n * to reconstruct. Bracket-indexed keys are always reconstructed; purely dotted\n * keys are reconstructed only when the optional `schema` marks their head\n * segment as an object/array container property. Reconstruction (and its\n * prototype-pollution guard) is delegated to the shared canonical helper.\n /\nexport function unflattenGeminiToolArguments(args: unknown, schema?: unknown): unknown {\n if (!isPlainObject(args)) return args;\n const keys = Object.keys(args);\n const hasBracket = hasFlattenedKey(keys);\n const containers = hasBracket ? new Set<string>() : containerPropertyNames(schema);\n const hasDottedContainer =\n !hasBracket && keys.some((key) => isDottedContainerKey(key, containers));\n if (!hasBracket && !hasDottedContainer) return args;\n\n return reconstructFlattenedKeys(args, (key) => shouldSplitKey(key, hasBracket, containers));\n}\n\n/\n If `model` is a GitHub Copilot Gemini model, normalize flattened tool-call\n * arguments; otherwise return them unchanged. Used to gate\n * {@link unflattenGeminiToolArguments} by model at tool-call time. The optional\n * `schema` is the tool's parameter schema, used to disambiguate dotted keys.\n /\nexport function normalizeToolArgumentsForModel(\n args: unknown,\n model: Pick<Model<Api>, \"provider\" \| \"api\" \| \"id\"> \| undefined,\n schema?: unknown,\n): unknown {\n if (!model \|\| !isCopilotGeminiModel(model)) return args;\n return unflattenGeminiToolArguments(args, schema);\n}\n\n/* Map each tool name in an OpenAI chat-completions payload to its parameter schema. /\nfunction toolParameterSchemas(tools: unknown): Map<string, unknown> {\n const schemas = new Map<string, unknown>();\n if (!Array.isArray(tools)) return schemas;\n for (const tool of tools) {\n if (!isPlainObject(tool)) continue;\n // OpenAI chat-completions tool shape: { type: \"function\", function: { name, parameters } }.\n const fn = tool.function;\n if (isPlainObject(fn) && typeof fn.name === \"string\") {\n schemas.set(fn.name, fn.parameters);\n continue;\n }\n // Defensive: flat tool shape { name, parameters }.\n if (typeof tool.name === \"string\") schemas.set(tool.name, tool.parameters);\n }\n return schemas;\n}\n\n/\n Reconstruct flattened GitHub Copilot Gemini tool-call arguments on the\n * outbound replay payload, so prior assistant tool calls are sent back to\n * CAPI in the nested array/object shape Gemini originally produced.\n \n Why this exists\n * ---------------\n * {@link normalizeToolArgumentsForModel} only unflattens at tool execution\n * time; the persisted assistant message keeps the raw flattened arguments CAPI\n * delivered (for example `{ \"edits[0].newText\": \"...\" }`). When that message is\n * replayed on the next turn, CAPI parses those literal keys straight into the\n * Gemini `FunctionCall.Args`, producing a function call that does not match the\n * tool's declared schema (nor the structure Gemini signed). Gemini then ends\n * the turn with `MALFORMED_FUNCTION_CALL` / `UNEXPECTED_TOOL_CALL` / `OTHER`,\n * which CAPI surfaces as a bare `finish_reason: \"error\"` — so multi-turn tool\n * use dies one turn after any array/object tool call (such as `edit`).\n \n This rewrites each replayed assistant `tool_calls[].function.arguments` JSON\n * into the reconstructed nested shape (reusing {@link unflattenGeminiToolArguments}\n * with the tool's own parameter schema, looked up from the payload's `tools`),\n * fixing both new and already-persisted sessions. Gated to GitHub Copilot Gemini\n * models, fail-open on non-JSON arguments, and a no-op for well-formed args.\n */\nexport function normalizeCopilotGeminiReplayToolArguments(\n payload: unknown,\n model: Pick<Model<Api>, \"provider\" \| \"api\" \| \"id\">,\n): unknown {\n if (!isCopilotGeminiModel(model)) return payload;\n if (!isPlainObject(payload)) return payload;\n const messages = payload.messages;\n if (!Array.isArray(messages)) return payload;\n\n const schemas = toolParameterSchemas(payload.tools);\n let mutated = false;\n\n const nextMessages = messages.map((message) => {\n if (!isPlainObject(message) \|\| message.role !== \"assistant\") return message;\n const toolCalls = message.tool_calls;\n if (!Array.isArray(toolCalls) \|\| toolCalls.length === 0) return message;\n\n let messageMutated = false;\n const nextToolCalls = toolCalls.map((toolCall) => {\n if (!isPlainObject(toolCall)) return toolCall;\n const fn = toolCall.function;\n if (!isPlainObject(fn) \|\| typeof fn.arguments !== \"string\") return toolCall;\n\n let parsed: unknown;\n try {\n parsed = JSON.parse(fn.arguments);\n } catch {\n return toolCall; // fail open: never corrupt a replayed argument string\n }\n if (!isPlainObject(parsed)) return toolCall;\n\n const schema = typeof fn.name === \"string\" ? schemas.get(fn.name) : undefined;\n const reconstructed = unflattenGeminiToolArguments(parsed, schema);\n if (reconstructed === parsed) return toolCall;\n\n messageMutated = true;\n return { ...toolCall, function: { ...fn, arguments: JSON.stringify(reconstructed) } };\n });\n\n if (!messageMutated) return message;\n mutated = true;\n return { ...message, tool_calls: nextToolCalls };\n });\n\n if (!mutated) return payload;\n return { ...payload, messages: nextMessages };\n}\n"]}
1	+ {"version":3,"file":"copilot-gemini-tool-arguments.js","sourceRoot":"","sources":["../../src/core/copilot-gemini-tool-arguments.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,oBAAoB,EAAE,MAAM,uCAAuC,CAAC;AAC7E,OAAO,EAAE,wBAAwB,EAAE,MAAM,+BAA+B,CAAC;AA2CzE,SAAS,aAAa,CAAC,KAAc;IACnC,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;AAC9E,CAAC;AAED,8DAA8D;AAC9D,SAAS,eAAe,CAAC,IAAc;IACrC,OAAO,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AACjD,CAAC;AAED,sFAAsF;AACtF,SAAS,iBAAiB,CAAC,MAAe;IACxC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC;QAAE,OAAO,KAAK,CAAC;IACzC,IAAI,MAAM,CAAC,IAAI,KAAK,QAAQ,IAAI,MAAM,CAAC,IAAI,KAAK,OAAO;QAAE,OAAO,IAAI,CAAC;IACrE,IAAI,YAAY,IAAI,MAAM,IAAI,OAAO,IAAI,MAAM;QAAE,OAAO,IAAI,CAAC;IAC7D,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,KAAK,CAAC;IAC3C,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC,CAAC;IACnF,OAAO,KAAK,CAAC;AACf,CAAC;AAED,0EAA0E;AAC1E,SAAS,sBAAsB,CAAC,MAAe;IAC7C,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAChC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC;QAAE,OAAO,KAAK,CAAC;IACzC,MAAM,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;IACrC,IAAI,CAAC,aAAa,CAAC,UAAU,CAAC;QAAE,OAAO,KAAK,CAAC;IAC7C,KAAK,MAAM,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;QACrD,IAAI,iBAAiB,CAAC,GAAG,CAAC;YAAE,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC9C,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,kBAAkB,CAAC,MAAkB,EAAE,IAAY;IAC1D,IAAI,MAAM,CAAC,IAAI,KAAK,IAAI;QAAE,OAAO,IAAI,CAAC;IACtC,OAAO,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;AAClE,CAAC;AAED,SAAS,aAAa,CAAC,MAAe;IACpC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC;QAAE,OAAO,KAAK,CAAC;IACzC,IAAI,kBAAkB,CAAC,MAAM,EAAE,OAAO,CAAC;QAAE,OAAO,IAAI,CAAC;IACrD,IAAI,OAAO,IAAI,MAAM,IAAI,CAAC,kBAAkB,CAAC,MAAM,EAAE,QAAQ,CAAC;QAAE,OAAO,IAAI,CAAC;IAC5E,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,KAAK,CAAC;IAC3C,OAAO,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC;AAC/E,CAAC;AAED,SAAS,0BAA0B,CAAC,MAAe;IACjD,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC;QAAE,OAAO,EAAE,CAAC;IACtC,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;IACjC,MAAM,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;IACrC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,aAAa,CAAC,UAAU,CAAC;QAAE,OAAO,EAAE,CAAC;IACtE,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,EAAkB,EAAE,CAAC,CAC/C,OAAO,IAAI,KAAK,QAAQ;QACxB,MAAM,CAAC,MAAM,CAAC,UAAU,EAAE,IAAI,CAAC;QAC/B,aAAa,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAChC,CAAC,CAAC;AACL,CAAC;AAED,SAAS,kCAAkC,CAAC,IAAgB,EAAE,MAAe;IAC3E,MAAM,OAAO,GAAG,0BAA0B,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC;IAChG,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACtC,MAAM,IAAI,GAAe,EAAE,GAAG,IAAI,EAAE,CAAC;IACrC,KAAK,MAAM,IAAI,IAAI,OAAO;QAAE,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC;IAC5C,OAAO,IAAI,CAAC;AACd,CAAC;AAED,uFAAuF;AACvF,SAAS,oBAAoB,CAAC,GAAW,EAAE,UAAuB;IAChE,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAC7B,IAAI,GAAG,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IAC3B,OAAO,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;AAC3C,CAAC;AAED;;;;;;GAMG;AACH,SAAS,cAAc,CAAC,GAAW,EAAE,UAAmB,EAAE,UAAuB;IAC/E,IAAI,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IACrC,IAAI,UAAU;QAAE,OAAO,IAAI,CAAC;IAC5B,OAAO,oBAAoB,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;AAC/C,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,4BAA4B,CAAC,IAAa,EAAE,MAAgB;IAC1E,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IACtC,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC/B,MAAM,UAAU,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACzC,MAAM,UAAU,GAAG,UAAU,CAAC,CAAC,CAAC,IAAI,GAAG,EAAU,CAAC,CAAC,CAAC,sBAAsB,CAAC,MAAM,CAAC,CAAC;IACnF,MAAM,kBAAkB,GACtB,CAAC,UAAU,IAAI,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,oBAAoB,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC,CAAC;IAC3E,MAAM,aAAa,GAAG,UAAU,IAAI,kBAAkB;QACpD,CAAC,CAAC,wBAAwB,CAAC,IAAI,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,cAAc,CAAC,GAAG,EAAE,UAAU,EAAE,UAAU,CAAC,CAAC;QACtF,CAAC,CAAC,IAAI,CAAC;IAET,OAAO,aAAa,CAAC,aAAa,CAAC;QACjC,CAAC,CAAC,kCAAkC,CAAC,aAAa,EAAE,MAAM,CAAC;QAC3D,CAAC,CAAC,aAAa,CAAC;AACpB,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,8BAA8B,CAC5C,IAAa,EACb,KAA8D,EAC9D,MAAgB;IAEhB,IAAI,CAAC,KAAK,IAAI,CAAC,oBAAoB,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACxD,OAAO,4BAA4B,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;AACpD,CAAC;AAED,wFAAwF;AACxF,SAAS,oBAAoB,CAAC,KAAc;IAC1C,MAAM,OAAO,GAAG,IAAI,GAAG,EAAmB,CAAC;IAC3C,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,OAAO,CAAC;IAC1C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC;YAAE,SAAS;QACnC,4FAA4F;QAC5F,MAAM,EAAE,GAAG,IAAI,CAAC,QAAQ,CAAC;QACzB,IAAI,aAAa,CAAC,EAAE,CAAC,IAAI,OAAO,EAAE,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YACrD,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC;YACpC,SAAS;QACX,CAAC;QACD,mDAAmD;QACnD,IAAI,OAAO,IAAI,CAAC,IAAI,KAAK,QAAQ;YAAE,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;IAC7E,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,UAAU,yCAAyC,CACvD,OAAgB,EAChB,KAAkD;IAElD,IAAI,CAAC,oBAAoB,CAAC,KAAK,CAAC;QAAE,OAAO,OAAO,CAAC;IACjD,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC;QAAE,OAAO,OAAO,CAAC;IAC5C,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC;IAClC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC;QAAE,OAAO,OAAO,CAAC;IAE7C,MAAM,OAAO,GAAG,oBAAoB,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IACpD,IAAI,OAAO,GAAG,KAAK,CAAC;IAEpB,MAAM,YAAY,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE;QAC5C,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,IAAI,OAAO,CAAC,IAAI,KAAK,WAAW;YAAE,OAAO,OAAO,CAAC;QAC5E,MAAM,SAAS,GAAG,OAAO,CAAC,UAAU,CAAC;QACrC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,OAAO,CAAC;QAExE,IAAI,cAAc,GAAG,KAAK,CAAC;QAC3B,MAAM,aAAa,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE;YAC/C,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC;gBAAE,OAAO,QAAQ,CAAC;YAC9C,MAAM,EAAE,GAAG,QAAQ,CAAC,QAAQ,CAAC;YAC7B,IAAI,CAAC,aAAa,CAAC,EAAE,CAAC,IAAI,OAAO,EAAE,CAAC,SAAS,KAAK,QAAQ;gBAAE,OAAO,QAAQ,CAAC;YAE5E,IAAI,MAAe,CAAC;YACpB,IAAI,CAAC;gBACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,SAAS,CAAC,CAAC;YACpC,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,QAAQ,CAAC,CAAC,sDAAsD;YACzE,CAAC;YACD,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC;gBAAE,OAAO,QAAQ,CAAC;YAE5C,MAAM,MAAM,GAAG,OAAO,EAAE,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAC9E,MAAM,aAAa,GAAG,4BAA4B,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;YACnE,IAAI,aAAa,KAAK,MAAM;gBAAE,OAAO,QAAQ,CAAC;YAE9C,cAAc,GAAG,IAAI,CAAC;YACtB,OAAO,EAAE,GAAG,QAAQ,EAAE,QAAQ,EAAE,EAAE,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,EAAE,EAAE,CAAC;QACxF,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,cAAc;YAAE,OAAO,OAAO,CAAC;QACpC,OAAO,GAAG,IAAI,CAAC;QACf,OAAO,EAAE,GAAG,OAAO,EAAE,UAAU,EAAE,aAAa,EAAE,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,IAAI,CAAC,OAAO;QAAE,OAAO,OAAO,CAAC;IAC7B,OAAO,EAAE,GAAG,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,CAAC;AAChD,CAAC","sourcesContent":["import type { Api, Model } from \"@earendil-works/pi-ai\";\nimport { isCopilotGeminiModel } from \"./copilot-gemini-payload-sanitizer.ts\";\nimport { reconstructFlattenedKeys } from \"./flattened-tool-arguments.ts\";\n\n/*\n Normalizes GitHub Copilot Gemini tool-call arguments.\n \n Why this exists\n * ---------------\n * `github-copilot` Gemini models are served through Copilot's CAPI gateway,\n * which proxies to Google's GenAI API. When a function/tool argument is an\n * array (or a nested object/array), Gemini serializes it on the wire as\n * flattened, indexed keys instead of a real JSON array/object. For example\n * a tool called with `{ keywords: [\"a\", \"b\"] }` arrives as:\n \n ```json\n * { \"keywords[0]\": \"a\", \"keywords[1]\": \"b\" }\n * ```\n \n This was confirmed by capturing the raw CAPI SSE stream: the\n * `tool_calls[].function.arguments` JSON itself contains the `name[index]`\n * keys, so the runtime parses valid-but-wrong JSON. Schema validation then\n * fails (`keywords: must have required properties keywords` and\n * `root: must not have additional properties`) and the model retries forever,\n * because it keeps re-emitting the same flattened shape. This is most visible\n * with the workflow `structured_output` tool but affects any Gemini tool call\n * whose schema contains an array or nested object.\n \n What it does\n * ------------\n * Reconstructs flattened keys (`name[i]`, `name[i].sub`, `parent.child`) back\n * into the intended nested arrays/objects, before tool-argument validation\n * runs. Bracket-indexed keys (`name[<digit>]`) are always reconstructed. A\n * purely dotted key (`parent.child`, with no array anywhere) is ambiguous —\n * a legitimate argument key can itself contain a dot — so it is only split when\n * the optional tool `schema` marks its head segment as an object/array\n * container property. When Gemini omits a required empty array entirely (there\n * are no `name[0]` keys to send), the schema is also used to synthesize `[]` for\n * missing required top-level array properties so normal validation can proceed.\n * The transform is gated to GitHub Copilot Gemini models, so it never touches\n * well-formed arguments from any other provider/model.\n /\n\ntype JsonRecord = Record<string, unknown>;\n\nfunction isPlainObject(value: unknown): value is JsonRecord {\n return typeof value === \"object\" && value !== null && !Array.isArray(value);\n}\n\n/* A flattened key contains a bracket index like `foo[0]`. /\nfunction hasFlattenedKey(keys: string[]): boolean {\n return keys.some((key) => /\\[\\d+\\]/.test(key));\n}\n\n/* A schema node that holds a nested object/array (so dotted keys are real paths). /\nfunction isContainerSchema(schema: unknown): boolean {\n if (!isPlainObject(schema)) return false;\n if (schema.type === \"object\" \|\| schema.type === \"array\") return true;\n if (\"properties\" in schema \|\| \"items\" in schema) return true;\n const union = schema.anyOf ?? schema.oneOf;\n if (Array.isArray(union)) return union.some((branch) => isContainerSchema(branch));\n return false;\n}\n\n/* Top-level property names whose schema is an object/array container. /\nfunction containerPropertyNames(schema: unknown): Set<string> {\n const names = new Set<string>();\n if (!isPlainObject(schema)) return names;\n const properties = schema.properties;\n if (!isPlainObject(properties)) return names;\n for (const [name, sub] of Object.entries(properties)) {\n if (isContainerSchema(sub)) names.add(name);\n }\n return names;\n}\n\nfunction schemaTypeIncludes(schema: JsonRecord, type: string): boolean {\n if (schema.type === type) return true;\n return Array.isArray(schema.type) && schema.type.includes(type);\n}\n\nfunction isArraySchema(schema: unknown): boolean {\n if (!isPlainObject(schema)) return false;\n if (schemaTypeIncludes(schema, \"array\")) return true;\n if (\"items\" in schema && !schemaTypeIncludes(schema, \"object\")) return true;\n const union = schema.anyOf ?? schema.oneOf;\n return Array.isArray(union) && union.some((branch) => isArraySchema(branch));\n}\n\nfunction requiredArrayPropertyNames(schema: unknown): readonly string[] {\n if (!isPlainObject(schema)) return [];\n const required = schema.required;\n const properties = schema.properties;\n if (!Array.isArray(required) \|\| !isPlainObject(properties)) return [];\n return required.filter((name): name is string => (\n typeof name === \"string\" &&\n Object.hasOwn(properties, name) &&\n isArraySchema(properties[name])\n ));\n}\n\nfunction fillMissingRequiredArrayProperties(args: JsonRecord, schema: unknown): JsonRecord {\n const missing = requiredArrayPropertyNames(schema).filter((name) => !Object.hasOwn(args, name));\n if (missing.length === 0) return args;\n const next: JsonRecord = { ...args };\n for (const name of missing) next[name] = [];\n return next;\n}\n\n/* Whether `key` is a pure dotted path (`parent.child`) headed by a container prop. /\nfunction isDottedContainerKey(key: string, containers: Set<string>): boolean {\n const dot = key.indexOf(\".\");\n if (dot <= 0) return false;\n return containers.has(key.slice(0, dot));\n}\n\n/\n Decide whether a flattened key should be split into nested path segments.\n * Bracket-indexed keys always split. When a bracket key is present anywhere in\n * the payload, dotted keys split too (they are part of the same flattened\n * object). Otherwise a dotted key only splits when the schema marks its head as\n * a container property, which keeps legitimate dot-containing keys intact.\n /\nfunction shouldSplitKey(key: string, hasBracket: boolean, containers: Set<string>): boolean {\n if (/\\[\\d+\\]/.test(key)) return true;\n if (hasBracket) return true;\n return isDottedContainerKey(key, containers);\n}\n\n/\n Reconstruct flattened Gemini tool-call arguments into proper nested\n * arrays/objects. Returns the original reference unchanged when there is nothing\n * to reconstruct. Bracket-indexed keys are always reconstructed; purely dotted\n * keys are reconstructed only when the optional `schema` marks their head\n * segment as an object/array container property. Reconstruction (and its\n * prototype-pollution guard) is delegated to the shared canonical helper.\n /\nexport function unflattenGeminiToolArguments(args: unknown, schema?: unknown): unknown {\n if (!isPlainObject(args)) return args;\n const keys = Object.keys(args);\n const hasBracket = hasFlattenedKey(keys);\n const containers = hasBracket ? new Set<string>() : containerPropertyNames(schema);\n const hasDottedContainer =\n !hasBracket && keys.some((key) => isDottedContainerKey(key, containers));\n const reconstructed = hasBracket \|\| hasDottedContainer\n ? reconstructFlattenedKeys(args, (key) => shouldSplitKey(key, hasBracket, containers))\n : args;\n\n return isPlainObject(reconstructed)\n ? fillMissingRequiredArrayProperties(reconstructed, schema)\n : reconstructed;\n}\n\n/\n If `model` is a GitHub Copilot Gemini model, normalize flattened tool-call\n * arguments; otherwise return them unchanged. Used to gate\n * {@link unflattenGeminiToolArguments} by model at tool-call time. The optional\n * `schema` is the tool's parameter schema, used to disambiguate dotted keys.\n /\nexport function normalizeToolArgumentsForModel(\n args: unknown,\n model: Pick<Model<Api>, \"provider\" \| \"api\" \| \"id\"> \| undefined,\n schema?: unknown,\n): unknown {\n if (!model \|\| !isCopilotGeminiModel(model)) return args;\n return unflattenGeminiToolArguments(args, schema);\n}\n\n/* Map each tool name in an OpenAI chat-completions payload to its parameter schema. /\nfunction toolParameterSchemas(tools: unknown): Map<string, unknown> {\n const schemas = new Map<string, unknown>();\n if (!Array.isArray(tools)) return schemas;\n for (const tool of tools) {\n if (!isPlainObject(tool)) continue;\n // OpenAI chat-completions tool shape: { type: \"function\", function: { name, parameters } }.\n const fn = tool.function;\n if (isPlainObject(fn) && typeof fn.name === \"string\") {\n schemas.set(fn.name, fn.parameters);\n continue;\n }\n // Defensive: flat tool shape { name, parameters }.\n if (typeof tool.name === \"string\") schemas.set(tool.name, tool.parameters);\n }\n return schemas;\n}\n\n/\n Reconstruct flattened GitHub Copilot Gemini tool-call arguments on the\n * outbound replay payload, so prior assistant tool calls are sent back to\n * CAPI in the nested array/object shape Gemini originally produced.\n \n Why this exists\n * ---------------\n * {@link normalizeToolArgumentsForModel} only unflattens at tool execution\n * time; the persisted assistant message keeps the raw flattened arguments CAPI\n * delivered (for example `{ \"edits[0].newText\": \"...\" }`). When that message is\n * replayed on the next turn, CAPI parses those literal keys straight into the\n * Gemini `FunctionCall.Args`, producing a function call that does not match the\n * tool's declared schema (nor the structure Gemini signed). Gemini then ends\n * the turn with `MALFORMED_FUNCTION_CALL` / `UNEXPECTED_TOOL_CALL` / `OTHER`,\n * which CAPI surfaces as a bare `finish_reason: \"error\"` — so multi-turn tool\n * use dies one turn after any array/object tool call (such as `edit`).\n \n This rewrites each replayed assistant `tool_calls[].function.arguments` JSON\n * into the reconstructed nested shape (reusing {@link unflattenGeminiToolArguments}\n * with the tool's own parameter schema, looked up from the payload's `tools`),\n * fixing both new and already-persisted sessions. Gated to GitHub Copilot Gemini\n * models, fail-open on non-JSON arguments, and a no-op for well-formed args.\n */\nexport function normalizeCopilotGeminiReplayToolArguments(\n payload: unknown,\n model: Pick<Model<Api>, \"provider\" \| \"api\" \| \"id\">,\n): unknown {\n if (!isCopilotGeminiModel(model)) return payload;\n if (!isPlainObject(payload)) return payload;\n const messages = payload.messages;\n if (!Array.isArray(messages)) return payload;\n\n const schemas = toolParameterSchemas(payload.tools);\n let mutated = false;\n\n const nextMessages = messages.map((message) => {\n if (!isPlainObject(message) \|\| message.role !== \"assistant\") return message;\n const toolCalls = message.tool_calls;\n if (!Array.isArray(toolCalls) \|\| toolCalls.length === 0) return message;\n\n let messageMutated = false;\n const nextToolCalls = toolCalls.map((toolCall) => {\n if (!isPlainObject(toolCall)) return toolCall;\n const fn = toolCall.function;\n if (!isPlainObject(fn) \|\| typeof fn.arguments !== \"string\") return toolCall;\n\n let parsed: unknown;\n try {\n parsed = JSON.parse(fn.arguments);\n } catch {\n return toolCall; // fail open: never corrupt a replayed argument string\n }\n if (!isPlainObject(parsed)) return toolCall;\n\n const schema = typeof fn.name === \"string\" ? schemas.get(fn.name) : undefined;\n const reconstructed = unflattenGeminiToolArguments(parsed, schema);\n if (reconstructed === parsed) return toolCall;\n\n messageMutated = true;\n return { ...toolCall, function: { ...fn, arguments: JSON.stringify(reconstructed) } };\n });\n\n if (!messageMutated) return message;\n mutated = true;\n return { ...message, tool_calls: nextToolCalls };\n });\n\n if (!mutated) return payload;\n return { ...payload, messages: nextMessages };\n}\n"]}

package/docs/workflows.md CHANGED Viewed

@@ -159,7 +159,7 @@ For the builtin result tables below, `deep-research-codebase`, `goal`, and `ralp
 |---|---|---|
 | `deep-research-codebase` | Scout + research-history chain → parallel specialist waves → aggregator. Indexes the whole repo and synthesizes findings. | Broad or cross-cutting research before you decide what to change. Prefer `/skill:research-codebase` for one subsystem. |
 | `goal` | Persisted goal ledger → bounded worker turns → receipts → three-reviewer gate → deterministic reducer → final report → optional final-stage PR handoff after approval. | Small-to-medium scope changes when you can identify the work surface, state the exact outcome, name the validation that proves it is done, and optionally allow only the final `pull-request` stage to attempt PR creation with `create_pr=true` after Goal reaches `complete`. |
-| `ralph` | Prompt-refinement → research-prompt-refinement → codebase/online research → sub-agent orchestration → multi-model parallel review → optional final-stage PR handoff. | Larger migrations, broad refactors, and multi-package changes where you want Atomic to refine the prompt for clarity, transform it into a research question, research the codebase before implementing, delegate through sub-agents, review, iterate, and optionally allow only the final `pull-request` stage to attempt PR creation with `create_pr=true`. |
+| `ralph` | Raw prompt → research-prompt-refinement → codebase/online research → sub-agent orchestration → multi-model parallel review → optional final-stage PR handoff. | Larger migrations, broad refactors, and multi-package changes where you want Atomic to use your prompt as-is, transform it into a research question, research the codebase before implementing, delegate through sub-agents, review, iterate, and optionally allow only the final `pull-request` stage to attempt PR creation with `create_pr=true`. |
 | `open-claude-design` | Combined discovery/init (`/skill:impeccable shape` + `/skill:impeccable init` in one `discovery` stage) → design-system/reference research (`ds-*`) → curated gallery reference-discovery using that context → a forked `generate-*` / `user-feedback-*` loop → rich HTML handoff (`exporter` → `final-display`). The discovery stage asks what to build, the output type, and which references to emulate, then lets impeccable init detect/create/reconcile `PRODUCT.md` and `DESIGN.md` (references take precedence over project context). Renders a live `preview.html` you can iterate against in the browser (opens through impeccable `live` / the `playwright-cli` skill when available). | UI, page, component, theme, or design-token work that benefits from a guided brief, beautiful references, and generation + user feedback loops. |
 ### `deep-research-codebase`
@@ -228,11 +228,11 @@ Run examples:
 /workflow goal objective="Implement the focused docs fix, run the docs validation command, and open a PR when complete" create_pr=true
 ```
-`goal` starts with a single model-only `prompt-refinement` stage that sharpens the raw objective into a clearer, more actionable form using the Workflow Best Practices prompt anatomy documented later in this guide; the refined objective becomes the operative one recorded in the ledger (the original is preserved as `original_objective` and shown in the final report when it differs). `goal` then creates an OS-temp `goal-ledger.json` artifact, renders goal-continuation context for each worker turn, writes each worker receipt to `work-turn-N.md`, and appends receipts, reviewer decisions, blockers, reducer decisions, and lifecycle events to the ledger. The objective is treated as user-provided data, not higher-priority instructions. By default `goal` does not start the final `pull-request` stage, and `pr_report` is omitted. Prompt text alone does not opt in. Pass `create_pr=true` only when you explicitly want the final stage to inspect provider credentials and attempt provider-appropriate PR/MR/review creation, such as GitHub `gh`, Azure Repos `az repos pr create`, or Sapling/Phabricator tooling, after Goal reaches `complete` within `max_turns`. Goal worker and reviewer prompts explicitly tell intermediate stages to ignore PR-creation requests; only the final `pull-request` stage may attempt that handoff.
+`goal` uses the raw `objective` exactly as supplied as the operative objective recorded in the ledger; it does not run an initial prompt-refinement stage. It creates an OS-temp `goal-ledger.json` artifact, renders goal-continuation context for each worker turn, writes each worker receipt to `work-turn-N.md`, and appends receipts, reviewer decisions, blockers, reducer decisions, and lifecycle events to the ledger. The objective is treated as user-provided data, not higher-priority instructions. By default `goal` does not start the final `pull-request` stage, and `pr_report` is omitted. Prompt text alone does not opt in. Pass `create_pr=true` only when you explicitly want the final stage to inspect provider credentials and attempt provider-appropriate PR/MR/review creation, such as GitHub `gh`, Azure Repos `az repos pr create`, or Sapling/Phabricator tooling, after Goal reaches `complete` within `max_turns`. Goal worker and reviewer prompts explicitly tell intermediate stages to ignore PR-creation requests; only the final `pull-request` stage may attempt that handoff.
 Write the `objective` like a compact acceptance spec. Say what should exist when the run is done, how you want testing handled, which command(s) or manual checks matter, and what outcome proves completion. The workflow is intentionally lean: it does not first generate an RFC or migration plan, so the developer-supplied objective is where scope, validation, and completion criteria belong.
-The worker may claim readiness, but it cannot finalize completion. Workers and reviewers are prompted to verify user-visible behavior end-to-end when practical, using `playwright-cli`-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios. Three reviewers independently inspect the ledger, worker receipt, repository state, and diff against `base_branch`; each returns structured JSON with findings, evidence, verification still remaining, and an optional blocker. A TypeScript reducer marks the goal complete only when reviewer quorum approves, marks blocked only when the same dependency/tool blocker repeats for the blocker threshold, continues when evidence is missing, and returns `needs_human` when `max_turns` is exhausted or worker execution fails.
+The worker may claim readiness, but it cannot finalize completion. Workers and reviewers are prompted to verify user-visible behavior end-to-end when practical, using `playwright-cli`-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios. Goal reviewers also look for any QA E2E video referenced by the ledger or receipt and must inspect the actual video before treating it as proof. Three reviewers independently inspect the ledger, worker receipt, repository state, and diff against `base_branch`; each returns structured JSON with findings, evidence, verification still remaining, and an optional blocker. A TypeScript reducer marks the goal complete only when reviewer quorum approves, marks blocked only when the same dependency/tool blocker repeats for the blocker threshold, continues when evidence is missing, and returns `needs_human` when `max_turns` is exhausted or worker execution fails.
 Result fields:
@@ -242,8 +242,7 @@ Result fields:
 | `status` | Final reducer status: `complete`, `blocked`, or `needs_human` (or `active` only if externally interrupted). |
 | `approved` | Whether the reducer reached `complete`. |
 | `goal_id` | Per-run goal identifier stored in the ledger. |
-| `objective` | Normalized goal objective used by the run (after the `prompt-refinement` stage refines the raw objective). |
-| `original_objective` | The raw user-provided objective exactly as given, before `prompt-refinement`. Omitted when refinement left it unchanged. |
+| `objective` | Raw goal objective used by the run. |
 | `ledger_path` | OS-temp path to `goal-ledger.json`, including receipts, reviewer decisions, reducer decisions, blockers, and lifecycle events. |
 | `turns_completed` | Worker/review turns completed. |
 | `iterations_completed` | Same value as `turns_completed`, retained for status summaries. |
@@ -273,7 +272,7 @@ Run examples:
 /workflow ralph prompt="Safely implement the API refactor" git_worktree_dir=../atomic-ralph-api-wt base_branch=main
 ```
-Each `ralph` run starts with a single model-only `prompt-refinement` stage that sharpens the raw user prompt into a clearer, more actionable objective using the Workflow Best Practices prompt anatomy documented later in this guide; that refined prompt becomes the operative objective for research, orchestration, and review, while the original is surfaced as `original_prompt`. Each iteration then transforms the refined prompt with `/skill:prompt-engineer Transform the following refined user request into a codebase and online research question which can be thoroughly explored: ...` (`research-prompt-refinement`), researches that transformed question with `/skill:research-codebase ...`, and writes the findings under `research/`. The orchestrator treats that research artifact as its primary implementation context, initializes/updates an OS-temp implementation notes file while generating verifiable evidence for any claims it records in the notes and reviewer artifacts, delegates implementation through sub-agents, and asks three independent reviewers to inspect the patch directly against `base_branch`. The reviewer fan-out runs reviewers on different primary model families (Claude Fable 5, GPT-5.5 Codex, and Gemini 3.1 Pro, with shared fallbacks) so the adversarial review gets cross-model coverage instead of three passes from one model. Ralph's orchestrator and reviewers are prompted to verify user-visible behavior end-to-end when practical, using `playwright-cli`-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios. For UI-applicable or full-stack changes, the orchestrator runs a `playwright-cli` end-to-end QA pass and records a reviewable proof video (referenced in the implementation notes and surfaced as `qa_video_path`); when `create_pr=true`, the final `pull-request` stage attaches or links that video to the created PR/MR/review. If reviewers find issues, the next `research-prompt-refinement` and research stages receive the review artifact path so follow-up research can address unresolved findings, and research stages fork from prior research session data when available. The loop stops only when all three reviewers independently approve (each finds no issues) or `max_loops` is reached, so a P0–P3 finding from any single reviewer keeps Ralph iterating instead of being out-voted by a majority quorum. By default Ralph does not start the final `pull-request` stage, and `pr_report` is omitted. Prompt text alone does not opt in. Pass `create_pr=true` only when you explicitly want the final `pull-request` stage to inspect provider credentials and attempt provider-appropriate PR/MR/review creation, such as GitHub `gh`, Azure Repos `az repos pr create`, or Sapling/Phabricator tooling; Ralph's own PR-creation instructions live in that final stage.
+Each `ralph` run uses the raw `prompt` exactly as supplied as the operative objective for research, orchestration, and review; it does not run an initial prompt-refinement stage. Each iteration transforms that raw prompt with `/skill:prompt-engineer Transform the following user request into a codebase and online research question which can be thoroughly explored: ...` (`research-prompt-refinement`), researches that transformed question with `/skill:research-codebase ...`, and writes the findings under `research/`. The orchestrator treats that research artifact as its primary implementation context, initializes/updates an OS-temp implementation notes file while generating verifiable evidence for any claims it records in the notes and reviewer artifacts, delegates implementation through sub-agents, and asks three independent reviewers to inspect the patch directly against `base_branch`. The reviewer fan-out runs reviewers on different primary model families (Claude Fable 5, GPT-5.5 Codex, and Gemini 3.1 Pro, with shared fallbacks) so the adversarial review gets cross-model coverage instead of three passes from one model. Ralph's orchestrator and reviewers are prompted to verify user-visible behavior end-to-end when practical, using `playwright-cli`-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios. For UI-applicable or full-stack changes, the orchestrator runs a `playwright-cli` end-to-end QA pass and records a reviewable proof video (referenced in the implementation notes and surfaced as `qa_video_path`); reviewers receive that path and must inspect the actual video before treating it as proof. When `create_pr=true`, the final `pull-request` stage attaches or links that video to the created PR/MR/review. If reviewers find issues, the next `research-prompt-refinement` and research stages receive the review artifact path so follow-up research can address unresolved findings, and research stages fork from prior research session data when available. The loop stops only when all three reviewers independently approve (each finds no issues) or `max_loops` is reached, so a P0–P3 finding from any single reviewer keeps Ralph iterating instead of being out-voted by a majority quorum. By default Ralph does not start the final `pull-request` stage, and `pr_report` is omitted. Prompt text alone does not opt in. Pass `create_pr=true` only when you explicitly want the final `pull-request` stage to inspect provider credentials and attempt provider-appropriate PR/MR/review creation, such as GitHub `gh`, Azure Repos `az repos pr create`, or Sapling/Phabricator tooling; Ralph's own PR-creation instructions live in that final stage.
 Set `git_worktree_dir` when you want Ralph's worker stages isolated in a reusable Git worktree. Relative paths resolve from the invoking repository root, existing same-repository worktree roots are reused, and missing paths are created from `base_branch`. Ralph preserves the invoking repo-relative cwd inside the worktree, so launching from `repo/packages/api` with `git_worktree_dir=../repo-wt` runs stages from `../repo-wt/packages/api`.
@@ -293,10 +292,8 @@ Result fields:
 | `iterations_completed` | Number of research/orchestrate/review loops completed. |
 | `review_report` | Compact reference to the latest reviewer payload artifact. |
 | `review_report_path` | JSON artifact path for the latest Ralph review round. |
-| `original_prompt` | The raw user prompt exactly as provided, before the `prompt-refinement` stage. |
-| `refined_prompt` | The clarity-refined prompt produced by the `prompt-refinement` stage and used as the operative objective for research, orchestration, and review. |
-A typical planned flow is `/skill:research-codebase` → `/skill:create-spec` → `/workflow ralph prompt="Implement specs/2026-03-rate-limit.md and validate the documented burst behavior"`. Ralph can start from a spec path, GitHub issue, or crisp ticket description, then refines the prompt, researches as needed, delegates through sub-agents, reviews, records a QA proof video for UI/full-stack changes when practical, and iterates. For smaller one-off tasks, use `/workflow goal` with a concrete objective that identifies the work surface, states the exact outcome, and names the validation that proves it is done; add `create_pr=true` only when you want Goal's final `pull-request` stage after approval.
+A typical planned flow is `/skill:research-codebase` → `/skill:create-spec` → `/workflow ralph prompt="Implement specs/2026-03-rate-limit.md and validate the documented burst behavior"`. Ralph can start from a spec path, GitHub issue, or crisp ticket description, then uses that prompt as-is, researches as needed, delegates through sub-agents, reviews, records a QA proof video for UI/full-stack changes when practical, and iterates. For smaller one-off tasks, use `/workflow goal` with a concrete objective that identifies the work surface, states the exact outcome, and names the validation that proves it is done; add `create_pr=true` only when you want Goal's final `pull-request` stage after approval.
 ### `open-claude-design`

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@bastani/atomic",
-  "version": "0.9.1-alpha.1",
+  "version": "0.9.2-alpha.1",
   "description": "Atomic coding agent CLI with read, bash, edit, write tools and session management",
   "type": "module",
   "atomicConfig": {
@@ -68,34 +68,34 @@
     "prepublishOnly": "bun run clean && bun run build"
   },
   "dependencies": {
-    "@bastani/atomic-natives": "0.9.1-alpha.1",
+    "@bastani/atomic-natives": "0.9.2-alpha.1",
     "@bufbuild/protobuf": "^2.0.0",
-    "@earendil-works/pi-agent-core": "^0.79.9",
-    "@earendil-works/pi-ai": "^0.79.9",
-    "@earendil-works/pi-tui": "^0.79.9",
+    "@earendil-works/pi-agent-core": "^0.79.10",
+    "@earendil-works/pi-ai": "^0.79.10",
+    "@earendil-works/pi-tui": "^0.79.10",
     "@modelcontextprotocol/ext-apps": "^1.7.2",
     "@modelcontextprotocol/sdk": "^1.25.1",
     "@mozilla/readability": "^0.6.0",
-    "@silvia-odwyer/photon-node": "^0.3.4",
-    "chalk": "^5.5.0",
+    "@silvia-odwyer/photon-node": "0.3.4",
+    "chalk": "5.6.2",
     "cross-spawn": "7.0.6",
-    "diff": "^9.0.0",
-    "glob": "^13.0.1",
-    "highlight.js": "^11.11.1",
-    "hosted-git-info": "^10.1.1",
-    "ignore": "^7.0.5",
-    "jiti": "^2.7.0",
+    "diff": "8.0.4",
+    "glob": "13.0.6",
+    "highlight.js": "10.7.3",
+    "hosted-git-info": "9.0.3",
+    "ignore": "7.0.5",
+    "jiti": "2.7.0",
     "linkedom": "^0.18.12",
-    "minimatch": "^10.2.3",
+    "minimatch": "10.2.5",
     "open": "^11.0.0",
     "p-limit": "^7.3.0",
-    "proper-lockfile": "^4.1.2",
-    "semver": "^7.8.0",
+    "proper-lockfile": "4.1.2",
+    "semver": "7.8.0",
     "turndown": "^7.2.0",
-    "typebox": "^1.1.24",
-    "undici": "^8.3.0",
+    "typebox": "1.1.38",
+    "undici": "8.5.0",
     "unpdf": "^1.6.2",
-    "yaml": "^2.9.0",
+    "yaml": "2.9.0",
     "zod": "^3.25.0 || ^4.0.0"
   },
   "overrides": {
@@ -105,20 +105,20 @@
     }
   },
   "optionalDependencies": {
-    "@mariozechner/clipboard": "^0.3.9"
+    "@mariozechner/clipboard": "0.3.9"
   },
   "devDependencies": {
     "@types/cross-spawn": "6.0.6",
-    "@types/diff": "^8.0.0",
-    "@types/hosted-git-info": "^3.0.5",
-    "@types/ms": "^2.1.0",
-    "@types/node": "^25.9.3",
-    "@types/proper-lockfile": "^4.1.4",
-    "@types/semver": "^7.7.1",
+    "@types/diff": "7.0.2",
+    "@types/hosted-git-info": "3.0.5",
+    "@types/ms": "2.1.0",
+    "@types/node": "24.12.4",
+    "@types/proper-lockfile": "4.1.4",
+    "@types/semver": "7.7.1",
     "@typescript/native-preview": "7.0.0-dev.20260511.1",
-    "shx": "^0.4.0",
-    "typescript": "^5.7.3",
-    "vitest": "^4.1.7"
+    "shx": "0.4.0",
+    "typescript": "5.9.3",
+    "vitest": "4.1.9"
   },
   "keywords": [
     "coding-agent",
@@ -136,6 +136,6 @@
     "directory": "packages/coding-agent"
   },
   "engines": {
-    "node": ">=20.6.0"
+    "node": ">=22.19.0"
   }
 }

package/dist/builtin/workflows/builtin/prompt-refinement.ts DELETED Viewed

@@ -1,90 +0,0 @@
-/**
- * Shared prompt-refinement stage used by the ralph and goal workflows.
- *
- * Before the main work loop begins, both workflows run this single
- * `prompt-refinement` stage. The stage uses the Workflow Best Practices prompt
- * anatomy documented in `packages/coding-agent/docs/workflows.md` to sharpen the
- * raw user request into a clearer, more actionable objective. The refined
- * request replaces the original as the operative objective downstream; the
- * original is preserved by each workflow for reporting.
- */
-import type { WorkflowModelValue, WorkflowTaskOptions, WorkflowTaskResult } from "../src/shared/types.js";
-export type PromptSection = readonly [tag: string, content: string];
-/**
- * Clarity rubric mirrored from the "## Workflow Best Practices" section of
- * `docs/workflows.md` (the user-facing docs under packages/coding-agent/docs).
- * The refinement stage makes each element explicit where it can be reasonably
- * inferred from the raw request.
- */
-export const PROMPT_REFINEMENT_CRITERIA = [
-  "Apply the workflow best practices documented in the `## Workflow Best Practices` section of `docs/workflows.md` to transform the raw request into a clear and verifiable objective. Treat that section as the authoritative prompt-anatomy rubric: use its Objective, Context, Scope, Non-goals, Done criteria, Validation command, Reporting requirements, and Stop conditions when refining the request.",
-  "Objective — state what should be true when the work is complete.",
-  "Context — note why it matters and where the relevant code or area likely lives.",
-  "Scope — state what is allowed to change (the smallest correct change).",
-  "Non-goals — state what to avoid (unrelated refactors, redesigns, or behavior changes outside this case).",
-  "Done criteria — list verifiable completion signals: new behavior works, existing behavior is unchanged, and the validation command passes.",
-  "Validation command — name the targeted check that proves the result.",
-  "Reporting requirements — changed files, validation results, and remaining risks must be reported.",
-  "Stop conditions — name the cases where the agent should stop and ask first (public API, security, data migration, etc.).",
-].join("\n");
-/**
- * Build the prompt sent to the prompt-refinement stage. The refined request is
- * returned verbatim (no fences or preamble) so it can replace the original
- * request as the operative objective for the rest of the workflow.
- */
-export function renderPromptRefinementPrompt(args: {
-  readonly request: string;
-  readonly workflowCwdContext?: PromptSection;
-}): string {
-  const sections: readonly string[] = [
-    `Refine the following user request into a clear and verifiable objective. Improve clarity and completeness using the rubric below without changing the user's intent, expanding scope, or inventing requirements that cannot be reasonably inferred from the request.`,
-    `<original_request>\n${args.request}\n</original_request>`,
-    `<instructions>\n${PROMPT_REFINEMENT_CRITERIA}\n</instructions>`,
-    `<output_format>\nReturn ONLY the refined request. No preamble, no explanation, and no Markdown fences. The returned text replaces the original request as the operative objective for the rest of the workflow, so it must be a single self-contained request.\n</output_format>`,
-  ];
-  const tail = args.workflowCwdContext === undefined
-    ? []
-    : [`<${args.workflowCwdContext[0]}>\n${args.workflowCwdContext[1].trim()}\n</${args.workflowCwdContext[0]}>`];
-  return [...sections, ...tail].join("\n\n");
-}
-/** Minimal context surface required to run a tracked refinement stage. */
-type PromptRefinementContext = {
-  task(name: string, options: WorkflowTaskOptions): Promise<WorkflowTaskResult>;
-};
-/** Model-chain + tool gating forwarded to the refinement stage session. */
-export type PromptRefinementModelConfig = {
-  readonly model?: WorkflowModelValue;
-  readonly fallbackModels?: readonly string[];
-  readonly noTools?: "all" | "builtin";
-  readonly excludedTools?: readonly string[];
-  readonly tools?: readonly string[];
-};
-/**
- * Run the shared `prompt-refinement` stage once and return the refined request.
- * Falls back to the original request when the stage produces no usable text.
- */
-export async function runPromptRefinementStage(
-  ctx: PromptRefinementContext,
-  options: {
-    readonly request: string;
-    readonly workflowCwdContext?: PromptSection;
-    readonly modelConfig: PromptRefinementModelConfig;
-  },
-): Promise<string> {
-  const result = await ctx.task("prompt-refinement", {
-    prompt: renderPromptRefinementPrompt({
-      request: options.request,
-      ...(options.workflowCwdContext === undefined ? {} : { workflowCwdContext: options.workflowCwdContext }),
-    }),
-    ...options.modelConfig,
-  });
-  const refined = (result.text ?? "").trim();
-  return refined.length > 0 ? refined : options.request;
-}