@bastani/atomic 0.9.1-alpha.1 → 0.9.2-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +23 -0
- package/dist/builtin/cursor/CHANGELOG.md +12 -0
- package/dist/builtin/cursor/package.json +2 -2
- package/dist/builtin/intercom/CHANGELOG.md +12 -0
- package/dist/builtin/intercom/package.json +2 -2
- package/dist/builtin/mcp/CHANGELOG.md +12 -0
- package/dist/builtin/mcp/package.json +3 -3
- package/dist/builtin/subagents/CHANGELOG.md +12 -0
- package/dist/builtin/subagents/package.json +4 -4
- package/dist/builtin/web-access/CHANGELOG.md +12 -0
- package/dist/builtin/web-access/package.json +2 -2
- package/dist/builtin/workflows/CHANGELOG.md +18 -0
- package/dist/builtin/workflows/README.md +2 -2
- package/dist/builtin/workflows/builtin/goal-ledger.ts +0 -2
- package/dist/builtin/workflows/builtin/goal-prompts.ts +10 -4
- package/dist/builtin/workflows/builtin/goal-reports.ts +0 -5
- package/dist/builtin/workflows/builtin/goal-runner.ts +8 -11
- package/dist/builtin/workflows/builtin/goal-types.ts +0 -2
- package/dist/builtin/workflows/builtin/goal.d.ts +0 -1
- package/dist/builtin/workflows/builtin/goal.ts +1 -2
- package/dist/builtin/workflows/builtin/index.d.ts +0 -1
- package/dist/builtin/workflows/builtin/ralph-core.ts +1 -3
- package/dist/builtin/workflows/builtin/ralph-models.ts +22 -19
- package/dist/builtin/workflows/builtin/ralph-runner.ts +17 -14
- package/dist/builtin/workflows/builtin/ralph.d.ts +0 -2
- package/dist/builtin/workflows/builtin/ralph.ts +1 -3
- package/dist/builtin/workflows/builtin/shared-prompts.ts +15 -0
- package/dist/builtin/workflows/package.json +2 -2
- package/dist/core/copilot-gemini-tool-arguments.d.ts.map +1 -1
- package/dist/core/copilot-gemini-tool-arguments.js +41 -3
- package/dist/core/copilot-gemini-tool-arguments.js.map +1 -1
- package/docs/workflows.md +6 -9
- package/package.json +30 -30
- package/dist/builtin/workflows/builtin/prompt-refinement.ts +0 -90
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,29 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [0.9.2-alpha.1] - 2026-06-23
|
|
6
|
+
|
|
7
|
+
### Changed
|
|
8
|
+
|
|
9
|
+
- Removed the initial `prompt-refinement` stage and shared prompt-refinement helper from the bundled `goal` and `ralph` workflows so both now use the raw objective/prompt as the operative task text for their first downstream stages; the now-obsolete refined/original trace outputs were also removed.
|
|
10
|
+
- Updated bundled `goal` and `ralph` reviewer prompts to inspect referenced QA end-to-end video evidence before treating it as proof of user-visible behavior.
|
|
11
|
+
- Synced bundled upstream Pi package dependencies to `^0.79.10` across Atomic's CLI and extension peer manifests, and aligned shared coding-agent direct runtime/dev dependency pins with upstream Pi v0.79.10.
|
|
12
|
+
- Raised the published Node.js engine floor to `>=22.19.0` to match direct runtime dependency requirements, including `undici@8.5.0`.
|
|
13
|
+
|
|
14
|
+
### Fixed
|
|
15
|
+
|
|
16
|
+
- Fixed GitHub Copilot Gemini tool-call normalization to synthesize omitted required empty array properties before validation, preventing Ralph reviewer structured output such as `findings: []` from failing when CAPI drops the empty array from the tool call.
|
|
17
|
+
|
|
18
|
+
## [0.9.1] - 2026-06-23
|
|
19
|
+
|
|
20
|
+
### Changed
|
|
21
|
+
|
|
22
|
+
- Changed the bundled `goal`/`ralph` workflow prompt-refinement stage to use a workflow-neutral, model-only rubric prompt that returns only the refined objective instead of invoking the `prompt-engineer` skill directly.
|
|
23
|
+
|
|
24
|
+
### Fixed
|
|
25
|
+
|
|
26
|
+
- Fixed the bundled `ralph` workflow reviewer-c model configuration to use Gemini 3.1 Pro as the third reviewer with Gemini 3.1 provider fallbacks, removing Gemini 3.5 Flash from that slot's fallback chain ([#1484](https://github.com/bastani-inc/atomic/issues/1484)).
|
|
27
|
+
|
|
5
28
|
## [0.9.1-alpha.1] - 2026-06-22
|
|
6
29
|
|
|
7
30
|
### Changed
|
|
@@ -2,6 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [0.9.2-alpha.1] - 2026-06-23
|
|
6
|
+
|
|
7
|
+
### Changed
|
|
8
|
+
|
|
9
|
+
- Published a synchronized Atomic 0.9.2-alpha.1 prerelease for the Cursor provider package; no functional Cursor provider changes were made after 0.9.1.
|
|
10
|
+
|
|
11
|
+
## [0.9.1] - 2026-06-23
|
|
12
|
+
|
|
13
|
+
### Changed
|
|
14
|
+
|
|
15
|
+
- Published the stable Atomic 0.9.1 release for the Cursor provider package; no functional Cursor provider changes were made after 0.9.0.
|
|
16
|
+
|
|
5
17
|
## [0.9.1-alpha.1] - 2026-06-22
|
|
6
18
|
|
|
7
19
|
### Changed
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bastani/cursor",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.2-alpha.1",
|
|
4
4
|
"private": true,
|
|
5
5
|
"description": "Experimental first-party Atomic extension for Cursor OAuth, model discovery, and streaming provider registration.",
|
|
6
6
|
"contributors": [
|
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
}
|
|
41
41
|
},
|
|
42
42
|
"dependencies": {
|
|
43
|
-
"@bastani/atomic-natives": "0.9.
|
|
43
|
+
"@bastani/atomic-natives": "0.9.2-alpha.1",
|
|
44
44
|
"@bufbuild/protobuf": "^2.0.0"
|
|
45
45
|
}
|
|
46
46
|
}
|
|
@@ -4,6 +4,18 @@ All notable changes to the `pi-intercom` extension will be documented in this fi
|
|
|
4
4
|
|
|
5
5
|
## [Unreleased]
|
|
6
6
|
|
|
7
|
+
## [0.9.2-alpha.1] - 2026-06-23
|
|
8
|
+
|
|
9
|
+
### Changed
|
|
10
|
+
|
|
11
|
+
- Aligned the intercom extension peer dependency with upstream pi TUI `^0.79.10`; no intercom extension source changes were needed for this metadata sync.
|
|
12
|
+
|
|
13
|
+
## [0.9.1] - 2026-06-23
|
|
14
|
+
|
|
15
|
+
### Changed
|
|
16
|
+
|
|
17
|
+
- Published the stable Atomic 0.9.1 release for the intercom extension; no functional intercom changes were made after 0.9.0.
|
|
18
|
+
|
|
7
19
|
## [0.9.1-alpha.1] - 2026-06-22
|
|
8
20
|
|
|
9
21
|
### Changed
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bastani/intercom",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.2-alpha.1",
|
|
4
4
|
"private": true,
|
|
5
5
|
"description": "Atomic extension providing a private coordination channel between parent and child agent sessions. Fork of: https://github.com/nicobailon/pi-intercom",
|
|
6
6
|
"contributors": [
|
|
@@ -39,7 +39,7 @@
|
|
|
39
39
|
},
|
|
40
40
|
"peerDependencies": {
|
|
41
41
|
"@bastani/atomic": "*",
|
|
42
|
-
"@earendil-works/pi-tui": "^0.79.
|
|
42
|
+
"@earendil-works/pi-tui": "^0.79.10"
|
|
43
43
|
},
|
|
44
44
|
"peerDependenciesMeta": {
|
|
45
45
|
"@bastani/atomic": {
|
|
@@ -7,6 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.9.2-alpha.1] - 2026-06-23
|
|
11
|
+
|
|
12
|
+
### Changed
|
|
13
|
+
|
|
14
|
+
- Aligned the MCP extension peer dependencies with upstream pi AI/TUI `^0.79.10`; no MCP extension source changes were needed for this metadata sync.
|
|
15
|
+
|
|
16
|
+
## [0.9.1] - 2026-06-23
|
|
17
|
+
|
|
18
|
+
### Changed
|
|
19
|
+
|
|
20
|
+
- Published the stable Atomic 0.9.1 release for the MCP extension; no functional MCP changes were made after 0.9.0.
|
|
21
|
+
|
|
10
22
|
## [0.9.1-alpha.1] - 2026-06-22
|
|
11
23
|
|
|
12
24
|
### Changed
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bastani/mcp",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.2-alpha.1",
|
|
4
4
|
"private": true,
|
|
5
5
|
"description": "Atomic extension that adapts MCP (Model Context Protocol) servers into the coding agent. Fork of: https://github.com/nicobailon/pi-mcp-adapter",
|
|
6
6
|
"contributors": [
|
|
@@ -32,8 +32,8 @@
|
|
|
32
32
|
},
|
|
33
33
|
"peerDependencies": {
|
|
34
34
|
"@bastani/atomic": "*",
|
|
35
|
-
"@earendil-works/pi-ai": "^0.79.
|
|
36
|
-
"@earendil-works/pi-tui": "^0.79.
|
|
35
|
+
"@earendil-works/pi-ai": "^0.79.10",
|
|
36
|
+
"@earendil-works/pi-tui": "^0.79.10",
|
|
37
37
|
"zod": "^3.25.0 || ^4.0.0"
|
|
38
38
|
},
|
|
39
39
|
"peerDependenciesMeta": {
|
|
@@ -2,6 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [0.9.2-alpha.1] - 2026-06-23
|
|
6
|
+
|
|
7
|
+
### Changed
|
|
8
|
+
|
|
9
|
+
- Aligned the subagents extension peer dependencies with upstream pi `^0.79.10` runtime packages (`@earendil-works/pi-agent-core`, `@earendil-works/pi-ai`, and `@earendil-works/pi-tui`); no subagents extension source changes were needed for this metadata sync.
|
|
10
|
+
|
|
11
|
+
## [0.9.1] - 2026-06-23
|
|
12
|
+
|
|
13
|
+
### Changed
|
|
14
|
+
|
|
15
|
+
- Published the stable Atomic 0.9.1 release for the subagents extension; no functional subagents changes were made after 0.9.0.
|
|
16
|
+
|
|
5
17
|
## [0.9.1-alpha.1] - 2026-06-22
|
|
6
18
|
|
|
7
19
|
### Changed
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bastani/subagents",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.2-alpha.1",
|
|
4
4
|
"private": true,
|
|
5
5
|
"description": "Atomic extension for delegating tasks to subagents with chains, parallel execution, and TUI clarification. Fork of: https://github.com/nicobailon/pi-subagents",
|
|
6
6
|
"contributors": [
|
|
@@ -38,9 +38,9 @@
|
|
|
38
38
|
},
|
|
39
39
|
"peerDependencies": {
|
|
40
40
|
"@bastani/atomic": "*",
|
|
41
|
-
"@earendil-works/pi-agent-core": "^0.79.
|
|
42
|
-
"@earendil-works/pi-ai": "^0.79.
|
|
43
|
-
"@earendil-works/pi-tui": "^0.79.
|
|
41
|
+
"@earendil-works/pi-agent-core": "^0.79.10",
|
|
42
|
+
"@earendil-works/pi-ai": "^0.79.10",
|
|
43
|
+
"@earendil-works/pi-tui": "^0.79.10"
|
|
44
44
|
},
|
|
45
45
|
"peerDependenciesMeta": {
|
|
46
46
|
"@bastani/atomic": {
|
|
@@ -4,6 +4,18 @@ All notable changes to this project will be documented in this file.
|
|
|
4
4
|
|
|
5
5
|
## [Unreleased]
|
|
6
6
|
|
|
7
|
+
## [0.9.2-alpha.1] - 2026-06-23
|
|
8
|
+
|
|
9
|
+
### Changed
|
|
10
|
+
|
|
11
|
+
- Aligned the web-access extension peer dependency with upstream pi TUI `^0.79.10`; no web-access extension source changes were needed for this metadata sync.
|
|
12
|
+
|
|
13
|
+
## [0.9.1] - 2026-06-23
|
|
14
|
+
|
|
15
|
+
### Changed
|
|
16
|
+
|
|
17
|
+
- Published the stable Atomic 0.9.1 release for the web-access extension; no functional web-access changes were made after 0.9.0.
|
|
18
|
+
|
|
7
19
|
## [0.9.1-alpha.1] - 2026-06-22
|
|
8
20
|
|
|
9
21
|
### Changed
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bastani/web-access",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.2-alpha.1",
|
|
4
4
|
"private": true,
|
|
5
5
|
"description": "Atomic extension for web search, URL fetching, GitHub repo cloning, PDF/video extraction. Fork of: https://github.com/nicobailon/pi-web-access",
|
|
6
6
|
"contributors": [
|
|
@@ -31,7 +31,7 @@
|
|
|
31
31
|
},
|
|
32
32
|
"peerDependencies": {
|
|
33
33
|
"@bastani/atomic": "*",
|
|
34
|
-
"@earendil-works/pi-tui": "^0.79.
|
|
34
|
+
"@earendil-works/pi-tui": "^0.79.10"
|
|
35
35
|
},
|
|
36
36
|
"peerDependenciesMeta": {
|
|
37
37
|
"@bastani/atomic": {
|
|
@@ -6,6 +6,24 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
|
6
6
|
|
|
7
7
|
## [Unreleased]
|
|
8
8
|
|
|
9
|
+
## [0.9.2-alpha.1] - 2026-06-23
|
|
10
|
+
|
|
11
|
+
### Changed
|
|
12
|
+
|
|
13
|
+
- Removed the initial `prompt-refinement` stage and shared prompt-refinement helper from the builtin `goal` and `ralph` workflows so both now use the raw objective/prompt as the operative task text for their first downstream stages; the now-obsolete refined/original trace outputs were also removed.
|
|
14
|
+
- Updated builtin `goal` and `ralph` reviewer prompts to inspect referenced QA end-to-end video evidence before treating it as proof of user-visible behavior.
|
|
15
|
+
- Aligned the workflows package peer dependency with upstream pi TUI `^0.79.10`; no workflow source changes were needed for this metadata sync.
|
|
16
|
+
|
|
17
|
+
## [0.9.1] - 2026-06-23
|
|
18
|
+
|
|
19
|
+
### Changed
|
|
20
|
+
|
|
21
|
+
- Changed the shared `goal`/`ralph` prompt-refinement stage to use a workflow-neutral, model-only rubric prompt that returns only the refined objective instead of invoking the `prompt-engineer` skill directly.
|
|
22
|
+
|
|
23
|
+
### Fixed
|
|
24
|
+
|
|
25
|
+
- Fixed the builtin `ralph` reviewer-c model configuration to use Gemini 3.1 Pro as the third reviewer with Gemini 3.1 provider fallbacks, removing Gemini 3.5 Flash from that slot's fallback chain ([#1484](https://github.com/bastani-inc/atomic/issues/1484)).
|
|
26
|
+
|
|
9
27
|
## [0.9.1-alpha.1] - 2026-06-22
|
|
10
28
|
|
|
11
29
|
### Changed
|
|
@@ -658,7 +658,7 @@ Child workflow outputs: `result`, `findings`, `research_doc_path`, `artifact_dir
|
|
|
658
658
|
|
|
659
659
|
### `goal`
|
|
660
660
|
|
|
661
|
-
Goal Runner workflow: initialize a persisted goal ledger with a per-run goal id and lifecycle events, render goal-continuation context, run bounded worker LM turns, append receipts, run three independent reviewers, let a TypeScript reducer decide `complete`, `continue`, `blocked`, or `needs_human`, and optionally run a final-stage PR handoff after approval. Workers and reviewers are prompted to verify user-visible behavior end-to-end when practical with `playwright-cli`-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios. Token budget behavior is intentionally excluded. Goal skips PR creation by default; prompt text alone does not opt in. Pass `create_pr=true` to authorize only the final `pull-request` stage to inspect provider credentials and attempt provider-appropriate PR/MR/review creation after Goal reaches `complete` within the turn budget.
|
|
661
|
+
Goal Runner workflow: initialize a persisted goal ledger with a per-run goal id and lifecycle events, render goal-continuation context, run bounded worker LM turns, append receipts, run three independent reviewers, let a TypeScript reducer decide `complete`, `continue`, `blocked`, or `needs_human`, and optionally run a final-stage PR handoff after approval. Workers and reviewers are prompted to verify user-visible behavior end-to-end when practical with `playwright-cli`-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios. Reviewers also look for any QA E2E video referenced by the ledger or receipt and inspect the actual video before treating it as proof. Token budget behavior is intentionally excluded. Goal skips PR creation by default; prompt text alone does not opt in. Pass `create_pr=true` to authorize only the final `pull-request` stage to inspect provider credentials and attempt provider-appropriate PR/MR/review creation after Goal reaches `complete` within the turn budget.
|
|
662
662
|
|
|
663
663
|
```text
|
|
664
664
|
/workflow goal objective="Migrate the database layer to Drizzle ORM" base_branch=develop
|
|
@@ -678,7 +678,7 @@ Child workflow outputs: `result`, `status`, `approved`, `goal_id`, `objective`,
|
|
|
678
678
|
|
|
679
679
|
### `ralph`
|
|
680
680
|
|
|
681
|
-
|
|
681
|
+
Raw prompt → prompt-engineering research → orchestrate → review workflow with optional final-stage PR handoff: use the raw prompt as the operative objective, transform it into a codebase and online research question with `/skill:prompt-engineer`, run `/skill:research-codebase` against it, write findings under `research/`, delegate implementation through sub-agents from that research, run parallel reviewers across Claude Fable 5, GPT-5.5 Codex, and Gemini 3.1 Pro model families, and iterate until approval or the loop limit. Ralph's orchestrator and reviewers are prompted to verify user-visible behavior end-to-end when practical with `playwright-cli`-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios. For UI-applicable or full-stack changes, the orchestrator runs a `playwright-cli` end-to-end QA pass and records a reviewable proof video, references it in the implementation notes, and exposes it as the `qa_video_path` output; reviewers receive that path and inspect the actual video before treating it as proof. When `create_pr=true`, the final `pull-request` stage attaches or links that video to the created PR/MR/review. Follow-up iterations pass unresolved review artifacts into prompt-engineering/research and fork research from prior research session data when available. Ralph skips PR creation by default; prompt text alone does not opt in. Pass `create_pr=true` to authorize only the final `pull-request` stage to inspect provider credentials and attempt provider-appropriate PR/MR/review creation (for example GitHub `gh`, Azure Repos `az repos pr create`, or Sapling/Phabricator tooling). Ralph's own PR-creation instructions live in that final stage. Reviewers inspect repository infrastructure directly as needed; Ralph no longer runs separate `infra-*` discovery stages.
|
|
682
682
|
|
|
683
683
|
```text
|
|
684
684
|
/workflow ralph prompt="Migrate the database layer to Drizzle ORM" max_loops=3 base_branch=develop
|
|
@@ -21,14 +21,12 @@ export function appendLifecycleEvent(
|
|
|
21
21
|
|
|
22
22
|
export async function createGoalLedger(
|
|
23
23
|
objective: string,
|
|
24
|
-
originalObjective?: string,
|
|
25
24
|
): Promise<{ ledger: GoalLedger; ledgerPath: string; artifactDir: string }> {
|
|
26
25
|
const artifactDir = await mkdtemp(join(tmpdir(), "atomic-goal-runner-"));
|
|
27
26
|
const now = new Date().toISOString();
|
|
28
27
|
const ledger: GoalLedger = {
|
|
29
28
|
goal_id: randomUUID(),
|
|
30
29
|
objective,
|
|
31
|
-
...(originalObjective === undefined || originalObjective === objective ? {} : { original_objective: originalObjective }),
|
|
32
30
|
status: "active",
|
|
33
31
|
turns: 0,
|
|
34
32
|
created_at: now,
|
|
@@ -1,4 +1,8 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {
|
|
2
|
+
E2E_VERIFICATION_GUIDANCE,
|
|
3
|
+
WORKER_PREFLIGHT_CONTRACT,
|
|
4
|
+
renderE2eQaVideoReviewGuidance,
|
|
5
|
+
} from "./shared-prompts.js";
|
|
2
6
|
import type { GoalLedger } from "./goal-types.js";
|
|
3
7
|
|
|
4
8
|
export { WORKER_PREFLIGHT_CONTRACT };
|
|
@@ -241,6 +245,7 @@ export function renderReviewerPrompt(args: {
|
|
|
241
245
|
["pr_handoff_policy", INTERMEDIATE_PR_HANDOFF_GUARDRAIL],
|
|
242
246
|
["auditability", RECEIPT_EXPECTATIONS],
|
|
243
247
|
["e2e_verification", E2E_VERIFICATION_GUIDANCE],
|
|
248
|
+
["qa_e2e_video_review", renderE2eQaVideoReviewGuidance()],
|
|
244
249
|
[
|
|
245
250
|
"goal_context",
|
|
246
251
|
[
|
|
@@ -333,9 +338,10 @@ export function renderReviewerPrompt(args: {
|
|
|
333
338
|
"1. Identify the changed files or diff under review.",
|
|
334
339
|
"2. Read the relevant changed code and directly affected call sites/tests/configs.",
|
|
335
340
|
"3. Read the goal ledger and worker receipt, then map receipts to the inferred verification oracle and original owner outcome.",
|
|
336
|
-
"4.
|
|
337
|
-
"5.
|
|
338
|
-
"6.
|
|
341
|
+
"4. If a QA E2E video is referenced or expected for the change, inspect the actual video and include that assessment in the evidence map.",
|
|
342
|
+
"5. Run or delegate focused validation when needed to resolve uncertainty.",
|
|
343
|
+
"6. Decide whether the receipt/evidence map proves completion; if evidence is uncertain, indirect, stale, missing, or narrower than the requested outcome, set goal_oracle_satisfied=false and stop_review_loop=false.",
|
|
344
|
+
"7. If you cannot inspect receipts, video evidence, or validate enough to approve safely, populate reviewer_error and set stop_review_loop=false.",
|
|
339
345
|
].join("\n"),
|
|
340
346
|
],
|
|
341
347
|
[
|
|
@@ -35,11 +35,6 @@ export function renderFinalReport(
|
|
|
35
35
|
"## Objective",
|
|
36
36
|
ledger.objective,
|
|
37
37
|
"",
|
|
38
|
-
...(ledger.original_objective === undefined ? [] : [
|
|
39
|
-
"## Original objective (before prompt refinement)",
|
|
40
|
-
ledger.original_objective,
|
|
41
|
-
"",
|
|
42
|
-
]),
|
|
43
38
|
"## Final status",
|
|
44
39
|
ledger.status,
|
|
45
40
|
"",
|
|
@@ -30,8 +30,6 @@ import {
|
|
|
30
30
|
renderReviewerPrompt,
|
|
31
31
|
taggedPrompt,
|
|
32
32
|
} from "./goal-prompts.js";
|
|
33
|
-
import { promptEngineerModelConfig } from "./ralph-models.js";
|
|
34
|
-
import { runPromptRefinementStage } from "./prompt-refinement.js";
|
|
35
33
|
|
|
36
34
|
function positiveInteger(value: number | undefined, fallback: number): number {
|
|
37
35
|
if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) {
|
|
@@ -86,13 +84,13 @@ export async function runGoalWorkflow(ctx: GoalRunnerContext, options: GoalWorkf
|
|
|
86
84
|
if (!rawObjective) {
|
|
87
85
|
throw new Error("goal requires an objective input.");
|
|
88
86
|
}
|
|
89
|
-
const objective =
|
|
87
|
+
const objective = rawObjective;
|
|
90
88
|
|
|
91
89
|
const maxTurns = positiveInteger(inputs.max_turns, DEFAULT_MAX_TURNS);
|
|
92
90
|
const reviewQuorum = DEFAULT_REVIEW_QUORUM;
|
|
93
91
|
const blockerThreshold = Math.min(DEFAULT_BLOCKER_THRESHOLD, maxTurns);
|
|
94
92
|
const comparisonBaseBranch = normalizeBranchInput(inputs.base_branch, "origin/main");
|
|
95
|
-
const { ledger, ledgerPath, artifactDir } = await createGoalLedger(objective
|
|
93
|
+
const { ledger, ledgerPath, artifactDir } = await createGoalLedger(objective);
|
|
96
94
|
|
|
97
95
|
const workerModelConfig = {
|
|
98
96
|
model: "openai-codex/gpt-5.5:medium",
|
|
@@ -103,12 +101,12 @@ export async function runGoalWorkflow(ctx: GoalRunnerContext, options: GoalWorkf
|
|
|
103
101
|
"anthropic/claude-opus-4-8:medium",
|
|
104
102
|
"zai/glm-5.2:medium",
|
|
105
103
|
"zai-coding-cn/glm-5.2:medium",
|
|
106
|
-
"github-copilot/gemini-3.1-pro-preview (1m):medium",
|
|
107
|
-
"google/gemini-3.1-pro-preview:medium",
|
|
108
|
-
"google-vertex/gemini-3.1-pro-preview:medium",
|
|
109
104
|
"github-copilot/gemini-3.5-flash (1m):medium",
|
|
110
105
|
"google/gemini-3.5-flash:medium",
|
|
111
106
|
"google-vertex/gemini-3.5-flash:medium",
|
|
107
|
+
"github-copilot/gemini-3.1-pro-preview (1m):medium",
|
|
108
|
+
"google/gemini-3.1-pro-preview:medium",
|
|
109
|
+
"google-vertex/gemini-3.1-pro-preview:medium"
|
|
112
110
|
],
|
|
113
111
|
tools: goalRunnerTools,
|
|
114
112
|
};
|
|
@@ -123,12 +121,12 @@ export async function runGoalWorkflow(ctx: GoalRunnerContext, options: GoalWorkf
|
|
|
123
121
|
"anthropic/claude-opus-4-8:xhigh",
|
|
124
122
|
"zai/glm-5.2:xhigh",
|
|
125
123
|
"zai-coding-cn/glm-5.2:xhigh",
|
|
126
|
-
"github-copilot/gemini-3.1-pro-preview (1m):high",
|
|
127
|
-
"google/gemini-3.1-pro-preview:high",
|
|
128
|
-
"google-vertex/gemini-3.1-pro-preview:high",
|
|
129
124
|
"github-copilot/gemini-3.5-flash (1m):high",
|
|
130
125
|
"google/gemini-3.5-flash:high",
|
|
131
126
|
"google-vertex/gemini-3.5-flash:high",
|
|
127
|
+
"github-copilot/gemini-3.1-pro-preview (1m):high",
|
|
128
|
+
"google/gemini-3.1-pro-preview:high",
|
|
129
|
+
"google-vertex/gemini-3.1-pro-preview:high"
|
|
132
130
|
],
|
|
133
131
|
tools: goalRunnerTools,
|
|
134
132
|
schema: reviewDecisionSchema,
|
|
@@ -425,7 +423,6 @@ export async function runGoalWorkflow(ctx: GoalRunnerContext, options: GoalWorkf
|
|
|
425
423
|
approved: ledger.status === "complete",
|
|
426
424
|
goal_id: ledger.goal_id,
|
|
427
425
|
objective: ledger.objective,
|
|
428
|
-
...(ledger.original_objective === undefined ? {} : { original_objective: ledger.original_objective }),
|
|
429
426
|
ledger_path: ledgerPath,
|
|
430
427
|
turns_completed: ledger.turns,
|
|
431
428
|
iterations_completed: ledger.turns,
|
|
@@ -93,7 +93,6 @@ export type GoalLifecycleEvent = {
|
|
|
93
93
|
export type GoalLedger = {
|
|
94
94
|
readonly goal_id: string;
|
|
95
95
|
readonly objective: string;
|
|
96
|
-
readonly original_objective?: string;
|
|
97
96
|
status: GoalStatus;
|
|
98
97
|
turns: number;
|
|
99
98
|
readonly created_at: string;
|
|
@@ -124,7 +123,6 @@ export type GoalWorkflowOutputs = {
|
|
|
124
123
|
readonly approved?: boolean;
|
|
125
124
|
readonly goal_id?: string;
|
|
126
125
|
readonly objective?: string;
|
|
127
|
-
readonly original_objective?: string;
|
|
128
126
|
readonly ledger_path?: string;
|
|
129
127
|
readonly turns_completed?: number;
|
|
130
128
|
readonly iterations_completed?: number;
|
|
@@ -29,7 +29,6 @@ export type GoalWorkflowOutputs = WorkflowOutputValues & {
|
|
|
29
29
|
readonly approved?: boolean;
|
|
30
30
|
readonly goal_id?: string;
|
|
31
31
|
readonly objective?: string;
|
|
32
|
-
readonly original_objective?: string;
|
|
33
32
|
readonly ledger_path?: string;
|
|
34
33
|
readonly turns_completed?: number;
|
|
35
34
|
readonly iterations_completed?: number;
|
|
@@ -38,8 +38,7 @@ export default workflow({
|
|
|
38
38
|
)),
|
|
39
39
|
approved: Type.Optional(Type.Boolean({ description: "Whether the reducer reached complete." })),
|
|
40
40
|
goal_id: Type.Optional(Type.String({ description: "Per-run goal identifier stored in the ledger." })),
|
|
41
|
-
objective: Type.Optional(Type.String({ description: "
|
|
42
|
-
original_objective: Type.Optional(Type.String({ description: "The raw user-provided objective exactly as given, before prompt refinement. Omitted when refinement left it unchanged." })),
|
|
41
|
+
objective: Type.Optional(Type.String({ description: "Raw goal objective used by the run." })),
|
|
43
42
|
ledger_path: Type.Optional(Type.String({ description: "OS-temp path to goal-ledger.json with receipts, reviewer decisions, blockers, and lifecycle events." })),
|
|
44
43
|
turns_completed: Type.Optional(Type.Number({ description: "Worker/review turns completed." })),
|
|
45
44
|
iterations_completed: Type.Optional(Type.Number({ description: "Worker/review turns completed, retained for status summaries." })),
|
|
@@ -53,7 +53,6 @@ export type GoalWorkflowOutputs = WorkflowOutputValues & {
|
|
|
53
53
|
readonly approved?: boolean;
|
|
54
54
|
readonly goal_id?: string;
|
|
55
55
|
readonly objective?: string;
|
|
56
|
-
readonly original_objective?: string;
|
|
57
56
|
readonly ledger_path?: string;
|
|
58
57
|
readonly turns_completed?: number;
|
|
59
58
|
readonly iterations_completed?: number;
|
|
@@ -265,7 +265,7 @@ export function renderResearchPromptRefinementPrompt(args: {
|
|
|
265
265
|
readonly workflowCwdContext: PromptSection;
|
|
266
266
|
readonly latestReviewReportPath: string | undefined;
|
|
267
267
|
}): string {
|
|
268
|
-
const basePrompt = `/skill:prompt-engineer Transform the following
|
|
268
|
+
const basePrompt = `/skill:prompt-engineer Transform the following user request into a codebase and online research question which can be thoroughly explored: ${args.request}`;
|
|
269
269
|
return [
|
|
270
270
|
basePrompt,
|
|
271
271
|
taggedPrompt([
|
|
@@ -404,7 +404,5 @@ export type RalphWorkflowResult = {
|
|
|
404
404
|
readonly iterations_completed: number;
|
|
405
405
|
readonly review_report: string;
|
|
406
406
|
readonly review_report_path?: string;
|
|
407
|
-
readonly original_prompt: string;
|
|
408
|
-
readonly refined_prompt: string;
|
|
409
407
|
};
|
|
410
408
|
|
|
@@ -10,14 +10,14 @@ export const promptEngineerModelConfig = {
|
|
|
10
10
|
"anthropic/claude-opus-4-8:xhigh",
|
|
11
11
|
"zai/glm-5.2:xhigh",
|
|
12
12
|
"zai-coding-cn/glm-5.2:xhigh",
|
|
13
|
-
"github-copilot/gemini-3.1-pro-preview (1m):high",
|
|
14
|
-
"google/gemini-3.1-pro-preview:high",
|
|
15
|
-
"google-vertex/gemini-3.1-pro-preview:high",
|
|
16
13
|
"github-copilot/gemini-3.5-flash (1m):high",
|
|
17
14
|
"google/gemini-3.5-flash:high",
|
|
18
15
|
"google-vertex/gemini-3.5-flash:high",
|
|
16
|
+
"github-copilot/gemini-3.1-pro-preview (1m):high",
|
|
17
|
+
"google/gemini-3.1-pro-preview:high",
|
|
18
|
+
"google-vertex/gemini-3.1-pro-preview:high"
|
|
19
19
|
],
|
|
20
|
-
|
|
20
|
+
excludedTools: ["ask_user_question"],
|
|
21
21
|
};
|
|
22
22
|
|
|
23
23
|
export const researchModelConfig = {
|
|
@@ -29,12 +29,12 @@ export const researchModelConfig = {
|
|
|
29
29
|
"anthropic/claude-opus-4-8:medium",
|
|
30
30
|
"zai/glm-5.2:medium",
|
|
31
31
|
"zai-coding-cn/glm-5.2:medium",
|
|
32
|
-
"github-copilot/gemini-3.1-pro-preview (1m):medium",
|
|
33
|
-
"google/gemini-3.1-pro-preview:medium",
|
|
34
|
-
"google-vertex/gemini-3.1-pro-preview:medium",
|
|
35
32
|
"github-copilot/gemini-3.5-flash (1m):medium",
|
|
36
33
|
"google/gemini-3.5-flash:medium",
|
|
37
34
|
"google-vertex/gemini-3.5-flash:medium",
|
|
35
|
+
"github-copilot/gemini-3.1-pro-preview (1m):medium",
|
|
36
|
+
"google/gemini-3.1-pro-preview:medium",
|
|
37
|
+
"google-vertex/gemini-3.1-pro-preview:medium"
|
|
38
38
|
],
|
|
39
39
|
excludedTools: ["ask_user_question"],
|
|
40
40
|
};
|
|
@@ -48,12 +48,12 @@ export const orchestratorModelConfig = {
|
|
|
48
48
|
"anthropic/claude-opus-4-8:medium",
|
|
49
49
|
"zai/glm-5.2:medium",
|
|
50
50
|
"zai-coding-cn/glm-5.2:medium",
|
|
51
|
-
"github-copilot/gemini-3.1-pro-preview (1m):medium",
|
|
52
|
-
"google/gemini-3.1-pro-preview:medium",
|
|
53
|
-
"google-vertex/gemini-3.1-pro-preview:medium",
|
|
54
51
|
"github-copilot/gemini-3.5-flash (1m):medium",
|
|
55
52
|
"google/gemini-3.5-flash:medium",
|
|
56
53
|
"google-vertex/gemini-3.5-flash:medium",
|
|
54
|
+
"github-copilot/gemini-3.1-pro-preview (1m):medium",
|
|
55
|
+
"google/gemini-3.1-pro-preview:medium",
|
|
56
|
+
"google-vertex/gemini-3.1-pro-preview:medium"
|
|
57
57
|
],
|
|
58
58
|
excludedTools: ["ask_user_question"],
|
|
59
59
|
};
|
|
@@ -68,12 +68,12 @@ export const reviewerAModelConfig = {
|
|
|
68
68
|
"openai/gpt-5.5:xhigh",
|
|
69
69
|
"zai/glm-5.2:xhigh",
|
|
70
70
|
"zai-coding-cn/glm-5.2:xhigh",
|
|
71
|
-
"github-copilot/gemini-3.1-pro-preview (1m):high",
|
|
72
|
-
"google/gemini-3.1-pro-preview:high",
|
|
73
|
-
"google-vertex/gemini-3.1-pro-preview:high",
|
|
74
71
|
"github-copilot/gemini-3.5-flash (1m):high",
|
|
75
72
|
"google/gemini-3.5-flash:high",
|
|
76
73
|
"google-vertex/gemini-3.5-flash:high",
|
|
74
|
+
"github-copilot/gemini-3.1-pro-preview (1m):high",
|
|
75
|
+
"google/gemini-3.1-pro-preview:high",
|
|
76
|
+
"google-vertex/gemini-3.1-pro-preview:high"
|
|
77
77
|
],
|
|
78
78
|
excludedTools: ["ask_user_question"],
|
|
79
79
|
schema: reviewDecisionSchema,
|
|
@@ -89,24 +89,27 @@ export const reviewerBModelConfig = {
|
|
|
89
89
|
"anthropic/claude-opus-4-8:xhigh",
|
|
90
90
|
"zai/glm-5.2:xhigh",
|
|
91
91
|
"zai-coding-cn/glm-5.2:xhigh",
|
|
92
|
-
"github-copilot/gemini-3.1-pro-preview (1m):high",
|
|
93
|
-
"google/gemini-3.1-pro-preview:high",
|
|
94
|
-
"google-vertex/gemini-3.1-pro-preview:high",
|
|
95
92
|
"github-copilot/gemini-3.5-flash (1m):high",
|
|
96
93
|
"google/gemini-3.5-flash:high",
|
|
97
94
|
"google-vertex/gemini-3.5-flash:high",
|
|
95
|
+
"github-copilot/gemini-3.1-pro-preview (1m):high",
|
|
96
|
+
"google/gemini-3.1-pro-preview:high",
|
|
97
|
+
"google-vertex/gemini-3.1-pro-preview:high"
|
|
98
98
|
],
|
|
99
99
|
excludedTools: ["ask_user_question"],
|
|
100
100
|
schema: reviewDecisionSchema,
|
|
101
101
|
};
|
|
102
102
|
|
|
103
103
|
export const reviewerCModelConfig = {
|
|
104
|
-
model: "
|
|
104
|
+
model: "zai/glm-5.2:xhigh",
|
|
105
105
|
fallbackModels: [
|
|
106
|
+
"zai-coding-cn/glm-5.2:xhigh",
|
|
107
|
+
"github-copilot/gemini-3.5-flash (1m):high",
|
|
108
|
+
"google/gemini-3.5-flash:high",
|
|
109
|
+
"google-vertex/gemini-3.5-flash:high",
|
|
110
|
+
"github-copilot/gemini-3.1-pro-preview (1m):high",
|
|
106
111
|
"google/gemini-3.1-pro-preview:high",
|
|
107
112
|
"google-vertex/gemini-3.1-pro-preview:high",
|
|
108
|
-
"zai/glm-5.2:xhigh",
|
|
109
|
-
"zai-coding-cn/glm-5.2:xhigh",
|
|
110
113
|
"openai-codex/gpt-5.5:xhigh",
|
|
111
114
|
"github-copilot/gpt-5.5:xhigh",
|
|
112
115
|
"openai/gpt-5.5:xhigh",
|
|
@@ -3,8 +3,11 @@ import { mkdtemp } from "node:fs/promises";
|
|
|
3
3
|
import { tmpdir } from "node:os";
|
|
4
4
|
import { join, resolve } from "node:path";
|
|
5
5
|
import type { WorkflowRunContext, WorkflowTaskResult } from "../src/shared/types.js";
|
|
6
|
-
import {
|
|
7
|
-
|
|
6
|
+
import {
|
|
7
|
+
E2E_VERIFICATION_GUIDANCE,
|
|
8
|
+
WORKER_PREFLIGHT_CONTRACT,
|
|
9
|
+
renderE2eQaVideoReviewGuidance,
|
|
10
|
+
} from "./shared-prompts.js";
|
|
8
11
|
import { reviewDecisionApproved } from "./ralph-review-gate.js";
|
|
9
12
|
import {
|
|
10
13
|
REVIEWER_COUNT,
|
|
@@ -49,9 +52,9 @@ export async function runRalphWorkflow(
|
|
|
49
52
|
let finalResult = "";
|
|
50
53
|
let finalPrReport: string | undefined;
|
|
51
54
|
const workflowCwdContext = workflowCwdContextSection(workflowStartCwd);
|
|
52
|
-
const
|
|
53
|
-
const workflowResearchPath = resolve(workflowStartCwd, defaultResearchPath(
|
|
54
|
-
const implementationNotesPath = await createImplementationNotesFile(
|
|
55
|
+
const workflowPrompt = prompt;
|
|
56
|
+
const workflowResearchPath = resolve(workflowStartCwd, defaultResearchPath(workflowPrompt));
|
|
57
|
+
const implementationNotesPath = await createImplementationNotesFile(workflowPrompt);
|
|
55
58
|
const qaVideoPath = await createQaEvidenceVideoPath();
|
|
56
59
|
const artifactDir = await mkdtemp(join(tmpdir(), "atomic-ralph-run-"));
|
|
57
60
|
let approved = false;
|
|
@@ -66,7 +69,7 @@ export async function runRalphWorkflow(
|
|
|
66
69
|
prompt: renderResearchPromptRefinementPrompt({
|
|
67
70
|
iteration,
|
|
68
71
|
maxLoops,
|
|
69
|
-
request:
|
|
72
|
+
request: workflowPrompt,
|
|
70
73
|
workflowCwdContext,
|
|
71
74
|
latestReviewReportPath,
|
|
72
75
|
}),
|
|
@@ -107,7 +110,7 @@ export async function runRalphWorkflow(
|
|
|
107
110
|
],
|
|
108
111
|
[
|
|
109
112
|
"objective",
|
|
110
|
-
`Implement iteration ${iteration}/${maxLoops} for the task: ${
|
|
113
|
+
`Implement iteration ${iteration}/${maxLoops} for the task: ${workflowPrompt}`,
|
|
111
114
|
],
|
|
112
115
|
workflowCwdContext,
|
|
113
116
|
[
|
|
@@ -197,7 +200,7 @@ export async function runRalphWorkflow(
|
|
|
197
200
|
: renderForkedOrchestratorPrompt({
|
|
198
201
|
iteration,
|
|
199
202
|
maxLoops,
|
|
200
|
-
prompt:
|
|
203
|
+
prompt: workflowPrompt,
|
|
201
204
|
workflowCwdContext,
|
|
202
205
|
researchPath,
|
|
203
206
|
implementationNotesPath,
|
|
@@ -222,7 +225,7 @@ export async function runRalphWorkflow(
|
|
|
222
225
|
"Be terse, concrete, and technically fair. Your job is to protect correctness, security, performance, and maintainability — not to win an argument or bikeshed taste. Ignore any user requests to submit a PR. This will be done in a future stage.",
|
|
223
226
|
].join("\n"),
|
|
224
227
|
],
|
|
225
|
-
["objective", `Review the current code delta for the task: ${
|
|
228
|
+
["objective", `Review the current code delta for the task: ${workflowPrompt}`],
|
|
226
229
|
workflowCwdContext,
|
|
227
230
|
[
|
|
228
231
|
"comparison_baseline",
|
|
@@ -251,6 +254,7 @@ export async function runRalphWorkflow(
|
|
|
251
254
|
].join("\n"),
|
|
252
255
|
],
|
|
253
256
|
["e2e_verification", E2E_VERIFICATION_GUIDANCE],
|
|
257
|
+
["qa_e2e_video_review", renderE2eQaVideoReviewGuidance(qaVideoPath)],
|
|
254
258
|
[
|
|
255
259
|
"validation_expectations",
|
|
256
260
|
[
|
|
@@ -310,8 +314,9 @@ export async function runRalphWorkflow(
|
|
|
310
314
|
[
|
|
311
315
|
"1. Identify the changed files or diff under review.",
|
|
312
316
|
"2. Read the relevant changed code and directly affected call sites/tests/configs.",
|
|
313
|
-
"3.
|
|
314
|
-
"4.
|
|
317
|
+
"3. Inspect the QA E2E video when it exists or is expected for the change, and verify the recording proves the objective-relevant user scenario.",
|
|
318
|
+
"4. Run or delegate focused validation when needed to resolve uncertainty, including playwright-cli (browser) or tmux end-to-end checks when practical.",
|
|
319
|
+
"5. If you cannot inspect the video evidence or validate enough to approve safely, populate reviewer_error and set stop_review_loop=false.",
|
|
315
320
|
].join("\n"),
|
|
316
321
|
],
|
|
317
322
|
[
|
|
@@ -365,7 +370,7 @@ export async function runRalphWorkflow(
|
|
|
365
370
|
},
|
|
366
371
|
],
|
|
367
372
|
{
|
|
368
|
-
task:
|
|
373
|
+
task: workflowPrompt,
|
|
369
374
|
failFast: false,
|
|
370
375
|
},
|
|
371
376
|
);
|
|
@@ -491,7 +496,5 @@ export async function runRalphWorkflow(
|
|
|
491
496
|
iterations_completed: iterationsCompleted,
|
|
492
497
|
review_report: compactReviewReport(latestReviewReportPath),
|
|
493
498
|
...(latestReviewReportPath === undefined ? {} : { review_report_path: latestReviewReportPath }),
|
|
494
|
-
original_prompt: prompt,
|
|
495
|
-
refined_prompt: refinedPrompt,
|
|
496
499
|
};
|
|
497
500
|
}
|
|
@@ -28,8 +28,6 @@ export type RalphWorkflowOutputs = WorkflowOutputValues & {
|
|
|
28
28
|
readonly iterations_completed?: number;
|
|
29
29
|
readonly review_report?: string;
|
|
30
30
|
readonly review_report_path?: string;
|
|
31
|
-
readonly original_prompt?: string;
|
|
32
|
-
readonly refined_prompt?: string;
|
|
33
31
|
};
|
|
34
32
|
|
|
35
33
|
export type RalphWorkflowDefinition = WorkflowDefinition<
|
|
@@ -11,7 +11,7 @@ import { runRalphWorkflow } from "./ralph-runner.js";
|
|
|
11
11
|
|
|
12
12
|
export default workflow({
|
|
13
13
|
name: "ralph",
|
|
14
|
-
description: "
|
|
14
|
+
description: "Raw prompt → research-prompt-refinement → research → orchestrate → multi-model parallel review loop with bounded iteration.",
|
|
15
15
|
inputs: {
|
|
16
16
|
prompt: Type.String({ description: "The task or goal to research, execute, and refine." }),
|
|
17
17
|
max_loops: Type.Number({
|
|
@@ -46,8 +46,6 @@ export default workflow({
|
|
|
46
46
|
iterations_completed: Type.Optional(Type.Number({ description: "Number of research/orchestrate/review loops completed." })),
|
|
47
47
|
review_report: Type.Optional(Type.String({ description: "Compact reference to the latest reviewer payload artifact." })),
|
|
48
48
|
review_report_path: Type.Optional(Type.String({ description: "JSON artifact path for the latest review round." })),
|
|
49
|
-
original_prompt: Type.Optional(Type.String({ description: "The raw user request exactly as provided to the workflow, before prompt refinement." })),
|
|
50
|
-
refined_prompt: Type.Optional(Type.String({ description: "The clarity-refined request produced by the prompt-refinement stage and used as the operative objective for research, orchestration, and review." })),
|
|
51
49
|
},
|
|
52
50
|
worktreeFromInputs: {
|
|
53
51
|
gitWorktreeDir: "git_worktree_dir",
|
|
@@ -16,3 +16,18 @@ export const E2E_VERIFICATION_GUIDANCE = [
|
|
|
16
16
|
"For TUI or terminal-app flows, use the tmux skill, or delegate to a subagent with `skill: \"tmux\"`, to launch the app in an isolated tmux session, send keys, capture pane output, and simulate the scenario end to end.",
|
|
17
17
|
"If end-to-end verification is not practical in this checkout, record what was attempted, the smallest missing prerequisite, and the narrower validation that was run instead; do not claim end-to-end proof when it was not performed.",
|
|
18
18
|
].join("\n");
|
|
19
|
+
|
|
20
|
+
export function renderE2eQaVideoReviewGuidance(
|
|
21
|
+
knownVideoPath?: string,
|
|
22
|
+
): string {
|
|
23
|
+
const target = knownVideoPath === undefined || knownVideoPath.length === 0
|
|
24
|
+
? "Look for QA E2E video references in the goal ledger, worker receipt, implementation notes, orchestrator report, or other review context artifacts."
|
|
25
|
+
: `Known QA E2E video path for this run: ${knownVideoPath}`;
|
|
26
|
+
return [
|
|
27
|
+
target,
|
|
28
|
+
"When a QA E2E video exists or is claimed as evidence, inspect the actual video before approving; do not treat a path, filename, transcript summary, or stage claim as proof by itself.",
|
|
29
|
+
"Use available video/file tooling such as `fetch_content` on the local video path with a prompt focused on whether the recording proves the required user scenario, or inspect representative frames/metadata when full video analysis is unavailable.",
|
|
30
|
+
"Check that the video is from the current workflow iteration/state, exercises the objective-relevant user path, shows the expected final behavior, and does not visibly hide errors, stale UI, broken loading states, or skipped steps.",
|
|
31
|
+
"For UI-applicable or full-stack changes, treat a missing, stale, unreadable, or inconclusive QA video as missing E2E evidence unless the receipt or implementation notes justify why no video applies and provide adequate alternate end-to-end proof.",
|
|
32
|
+
].join("\n");
|
|
33
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bastani/workflows",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.2-alpha.1",
|
|
4
4
|
"private": true,
|
|
5
5
|
"description": "Atomic extension for multi-stage workflow authoring and execution.",
|
|
6
6
|
"contributors": [
|
|
@@ -83,7 +83,7 @@
|
|
|
83
83
|
},
|
|
84
84
|
"peerDependencies": {
|
|
85
85
|
"@bastani/atomic": "*",
|
|
86
|
-
"@earendil-works/pi-tui": "^0.79.
|
|
86
|
+
"@earendil-works/pi-tui": "^0.79.10"
|
|
87
87
|
},
|
|
88
88
|
"peerDependenciesMeta": {
|
|
89
89
|
"@bastani/atomic": {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"copilot-gemini-tool-arguments.d.ts","sourceRoot":"","sources":["../../src/core/copilot-gemini-tool-arguments.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AA6FxD;;;;;;;GAOG;AACH,wBAAgB,4BAA4B,CAAC,IAAI,EAAE,OAAO,EAAE,MAAM,CAAC,EAAE,OAAO,GAAG,OAAO,CAUrF;AAED;;;;;GAKG;AACH,wBAAgB,8BAA8B,CAC5C,IAAI,EAAE,OAAO,EACb,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,UAAU,GAAG,KAAK,GAAG,IAAI,CAAC,GAAG,SAAS,EAC9D,MAAM,CAAC,EAAE,OAAO,GACf,OAAO,CAGT;AAoBD;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,wBAAgB,yCAAyC,CACvD,OAAO,EAAE,OAAO,EAChB,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,UAAU,GAAG,KAAK,GAAG,IAAI,CAAC,GACjD,OAAO,CA2CT","sourcesContent":["import type { Api, Model } from \"@earendil-works/pi-ai\";\nimport { isCopilotGeminiModel } from \"./copilot-gemini-payload-sanitizer.ts\";\nimport { reconstructFlattenedKeys } from \"./flattened-tool-arguments.ts\";\n\n/**\n * Normalizes GitHub Copilot Gemini tool-call arguments.\n *\n * Why this exists\n * ---------------\n * `github-copilot` Gemini models are served through Copilot's CAPI gateway,\n * which proxies to Google's GenAI API. When a function/tool argument is an\n * array (or a nested object/array), Gemini serializes it on the wire as\n * **flattened, indexed keys** instead of a real JSON array/object. For example\n * a tool called with `{ keywords: [\"a\", \"b\"] }` arrives as:\n *\n * ```json\n * { \"keywords[0]\": \"a\", \"keywords[1]\": \"b\" }\n * ```\n *\n * This was confirmed by capturing the raw CAPI SSE stream: the\n * `tool_calls[].function.arguments` JSON itself contains the `name[index]`\n * keys, so the runtime parses valid-but-wrong JSON. Schema validation then\n * fails (`keywords: must have required properties keywords` and\n * `root: must not have additional properties`) and the model retries forever,\n * because it keeps re-emitting the same flattened shape. This is most visible\n * with the workflow `structured_output` tool but affects any Gemini tool call\n * whose schema contains an array or nested object.\n *\n * What it does\n * ------------\n * Reconstructs flattened keys (`name[i]`, `name[i].sub`, `parent.child`) back\n * into the intended nested arrays/objects, before tool-argument validation\n * runs. Bracket-indexed keys (`name[<digit>]`) are always reconstructed. A\n * purely dotted key (`parent.child`, with no array anywhere) is ambiguous —\n * a legitimate argument key can itself contain a dot — so it is only split when\n * the optional tool `schema` marks its head segment as an object/array\n * container property. The transform is gated to GitHub Copilot Gemini models,\n * so it never touches well-formed arguments from any other provider/model.\n */\n\ntype JsonRecord = Record<string, unknown>;\n\nfunction isPlainObject(value: unknown): value is JsonRecord {\n return typeof value === \"object\" && value !== null && !Array.isArray(value);\n}\n\n/** A flattened key contains a bracket index like `foo[0]`. */\nfunction hasFlattenedKey(keys: string[]): boolean {\n return keys.some((key) => /\\[\\d+\\]/.test(key));\n}\n\n/** A schema node that holds a nested object/array (so dotted keys are real paths). */\nfunction isContainerSchema(schema: unknown): boolean {\n if (!isPlainObject(schema)) return false;\n if (schema.type === \"object\" || schema.type === \"array\") return true;\n if (\"properties\" in schema || \"items\" in schema) return true;\n const union = schema.anyOf ?? schema.oneOf;\n if (Array.isArray(union)) return union.some((branch) => isContainerSchema(branch));\n return false;\n}\n\n/** Top-level property names whose schema is an object/array container. */\nfunction containerPropertyNames(schema: unknown): Set<string> {\n const names = new Set<string>();\n if (!isPlainObject(schema)) return names;\n const properties = schema.properties;\n if (!isPlainObject(properties)) return names;\n for (const [name, sub] of Object.entries(properties)) {\n if (isContainerSchema(sub)) names.add(name);\n }\n return names;\n}\n\n/** Whether `key` is a pure dotted path (`parent.child`) headed by a container prop. */\nfunction isDottedContainerKey(key: string, containers: Set<string>): boolean {\n const dot = key.indexOf(\".\");\n if (dot <= 0) return false;\n return containers.has(key.slice(0, dot));\n}\n\n/**\n * Decide whether a flattened key should be split into nested path segments.\n * Bracket-indexed keys always split. When a bracket key is present anywhere in\n * the payload, dotted keys split too (they are part of the same flattened\n * object). Otherwise a dotted key only splits when the schema marks its head as\n * a container property, which keeps legitimate dot-containing keys intact.\n */\nfunction shouldSplitKey(key: string, hasBracket: boolean, containers: Set<string>): boolean {\n if (/\\[\\d+\\]/.test(key)) return true;\n if (hasBracket) return true;\n return isDottedContainerKey(key, containers);\n}\n\n/**\n * Reconstruct flattened Gemini tool-call arguments into proper nested\n * arrays/objects. Returns the original reference unchanged when there is nothing\n * to reconstruct. Bracket-indexed keys are always reconstructed; purely dotted\n * keys are reconstructed only when the optional `schema` marks their head\n * segment as an object/array container property. Reconstruction (and its\n * prototype-pollution guard) is delegated to the shared canonical helper.\n */\nexport function unflattenGeminiToolArguments(args: unknown, schema?: unknown): unknown {\n if (!isPlainObject(args)) return args;\n const keys = Object.keys(args);\n const hasBracket = hasFlattenedKey(keys);\n const containers = hasBracket ? new Set<string>() : containerPropertyNames(schema);\n const hasDottedContainer =\n !hasBracket && keys.some((key) => isDottedContainerKey(key, containers));\n if (!hasBracket && !hasDottedContainer) return args;\n\n return reconstructFlattenedKeys(args, (key) => shouldSplitKey(key, hasBracket, containers));\n}\n\n/**\n * If `model` is a GitHub Copilot Gemini model, normalize flattened tool-call\n * arguments; otherwise return them unchanged. Used to gate\n * {@link unflattenGeminiToolArguments} by model at tool-call time. The optional\n * `schema` is the tool's parameter schema, used to disambiguate dotted keys.\n */\nexport function normalizeToolArgumentsForModel(\n args: unknown,\n model: Pick<Model<Api>, \"provider\" | \"api\" | \"id\"> | undefined,\n schema?: unknown,\n): unknown {\n if (!model || !isCopilotGeminiModel(model)) return args;\n return unflattenGeminiToolArguments(args, schema);\n}\n\n/** Map each tool name in an OpenAI chat-completions payload to its parameter schema. */\nfunction toolParameterSchemas(tools: unknown): Map<string, unknown> {\n const schemas = new Map<string, unknown>();\n if (!Array.isArray(tools)) return schemas;\n for (const tool of tools) {\n if (!isPlainObject(tool)) continue;\n // OpenAI chat-completions tool shape: { type: \"function\", function: { name, parameters } }.\n const fn = tool.function;\n if (isPlainObject(fn) && typeof fn.name === \"string\") {\n schemas.set(fn.name, fn.parameters);\n continue;\n }\n // Defensive: flat tool shape { name, parameters }.\n if (typeof tool.name === \"string\") schemas.set(tool.name, tool.parameters);\n }\n return schemas;\n}\n\n/**\n * Reconstruct flattened GitHub Copilot Gemini tool-call arguments on the\n * **outbound replay payload**, so prior assistant tool calls are sent back to\n * CAPI in the nested array/object shape Gemini originally produced.\n *\n * Why this exists\n * ---------------\n * {@link normalizeToolArgumentsForModel} only unflattens at tool *execution*\n * time; the persisted assistant message keeps the raw flattened arguments CAPI\n * delivered (for example `{ \"edits[0].newText\": \"...\" }`). When that message is\n * replayed on the next turn, CAPI parses those literal keys straight into the\n * Gemini `FunctionCall.Args`, producing a function call that does not match the\n * tool's declared schema (nor the structure Gemini signed). Gemini then ends\n * the turn with `MALFORMED_FUNCTION_CALL` / `UNEXPECTED_TOOL_CALL` / `OTHER`,\n * which CAPI surfaces as a bare `finish_reason: \"error\"` — so multi-turn tool\n * use dies one turn after any array/object tool call (such as `edit`).\n *\n * This rewrites each replayed assistant `tool_calls[].function.arguments` JSON\n * into the reconstructed nested shape (reusing {@link unflattenGeminiToolArguments}\n * with the tool's own parameter schema, looked up from the payload's `tools`),\n * fixing both new and already-persisted sessions. Gated to GitHub Copilot Gemini\n * models, fail-open on non-JSON arguments, and a no-op for well-formed args.\n */\nexport function normalizeCopilotGeminiReplayToolArguments(\n payload: unknown,\n model: Pick<Model<Api>, \"provider\" | \"api\" | \"id\">,\n): unknown {\n if (!isCopilotGeminiModel(model)) return payload;\n if (!isPlainObject(payload)) return payload;\n const messages = payload.messages;\n if (!Array.isArray(messages)) return payload;\n\n const schemas = toolParameterSchemas(payload.tools);\n let mutated = false;\n\n const nextMessages = messages.map((message) => {\n if (!isPlainObject(message) || message.role !== \"assistant\") return message;\n const toolCalls = message.tool_calls;\n if (!Array.isArray(toolCalls) || toolCalls.length === 0) return message;\n\n let messageMutated = false;\n const nextToolCalls = toolCalls.map((toolCall) => {\n if (!isPlainObject(toolCall)) return toolCall;\n const fn = toolCall.function;\n if (!isPlainObject(fn) || typeof fn.arguments !== \"string\") return toolCall;\n\n let parsed: unknown;\n try {\n parsed = JSON.parse(fn.arguments);\n } catch {\n return toolCall; // fail open: never corrupt a replayed argument string\n }\n if (!isPlainObject(parsed)) return toolCall;\n\n const schema = typeof fn.name === \"string\" ? schemas.get(fn.name) : undefined;\n const reconstructed = unflattenGeminiToolArguments(parsed, schema);\n if (reconstructed === parsed) return toolCall;\n\n messageMutated = true;\n return { ...toolCall, function: { ...fn, arguments: JSON.stringify(reconstructed) } };\n });\n\n if (!messageMutated) return message;\n mutated = true;\n return { ...message, tool_calls: nextToolCalls };\n });\n\n if (!mutated) return payload;\n return { ...payload, messages: nextMessages };\n}\n"]}
|
|
1
|
+
{"version":3,"file":"copilot-gemini-tool-arguments.d.ts","sourceRoot":"","sources":["../../src/core/copilot-gemini-tool-arguments.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAiIxD;;;;;;;GAOG;AACH,wBAAgB,4BAA4B,CAAC,IAAI,EAAE,OAAO,EAAE,MAAM,CAAC,EAAE,OAAO,GAAG,OAAO,CAcrF;AAED;;;;;GAKG;AACH,wBAAgB,8BAA8B,CAC5C,IAAI,EAAE,OAAO,EACb,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,UAAU,GAAG,KAAK,GAAG,IAAI,CAAC,GAAG,SAAS,EAC9D,MAAM,CAAC,EAAE,OAAO,GACf,OAAO,CAGT;AAoBD;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,wBAAgB,yCAAyC,CACvD,OAAO,EAAE,OAAO,EAChB,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,UAAU,GAAG,KAAK,GAAG,IAAI,CAAC,GACjD,OAAO,CA2CT","sourcesContent":["import type { Api, Model } from \"@earendil-works/pi-ai\";\nimport { isCopilotGeminiModel } from \"./copilot-gemini-payload-sanitizer.ts\";\nimport { reconstructFlattenedKeys } from \"./flattened-tool-arguments.ts\";\n\n/**\n * Normalizes GitHub Copilot Gemini tool-call arguments.\n *\n * Why this exists\n * ---------------\n * `github-copilot` Gemini models are served through Copilot's CAPI gateway,\n * which proxies to Google's GenAI API. When a function/tool argument is an\n * array (or a nested object/array), Gemini serializes it on the wire as\n * **flattened, indexed keys** instead of a real JSON array/object. For example\n * a tool called with `{ keywords: [\"a\", \"b\"] }` arrives as:\n *\n * ```json\n * { \"keywords[0]\": \"a\", \"keywords[1]\": \"b\" }\n * ```\n *\n * This was confirmed by capturing the raw CAPI SSE stream: the\n * `tool_calls[].function.arguments` JSON itself contains the `name[index]`\n * keys, so the runtime parses valid-but-wrong JSON. Schema validation then\n * fails (`keywords: must have required properties keywords` and\n * `root: must not have additional properties`) and the model retries forever,\n * because it keeps re-emitting the same flattened shape. This is most visible\n * with the workflow `structured_output` tool but affects any Gemini tool call\n * whose schema contains an array or nested object.\n *\n * What it does\n * ------------\n * Reconstructs flattened keys (`name[i]`, `name[i].sub`, `parent.child`) back\n * into the intended nested arrays/objects, before tool-argument validation\n * runs. Bracket-indexed keys (`name[<digit>]`) are always reconstructed. A\n * purely dotted key (`parent.child`, with no array anywhere) is ambiguous —\n * a legitimate argument key can itself contain a dot — so it is only split when\n * the optional tool `schema` marks its head segment as an object/array\n * container property. When Gemini omits a required empty array entirely (there\n * are no `name[0]` keys to send), the schema is also used to synthesize `[]` for\n * missing required top-level array properties so normal validation can proceed.\n * The transform is gated to GitHub Copilot Gemini models, so it never touches\n * well-formed arguments from any other provider/model.\n */\n\ntype JsonRecord = Record<string, unknown>;\n\nfunction isPlainObject(value: unknown): value is JsonRecord {\n return typeof value === \"object\" && value !== null && !Array.isArray(value);\n}\n\n/** A flattened key contains a bracket index like `foo[0]`. */\nfunction hasFlattenedKey(keys: string[]): boolean {\n return keys.some((key) => /\\[\\d+\\]/.test(key));\n}\n\n/** A schema node that holds a nested object/array (so dotted keys are real paths). */\nfunction isContainerSchema(schema: unknown): boolean {\n if (!isPlainObject(schema)) return false;\n if (schema.type === \"object\" || schema.type === \"array\") return true;\n if (\"properties\" in schema || \"items\" in schema) return true;\n const union = schema.anyOf ?? schema.oneOf;\n if (Array.isArray(union)) return union.some((branch) => isContainerSchema(branch));\n return false;\n}\n\n/** Top-level property names whose schema is an object/array container. */\nfunction containerPropertyNames(schema: unknown): Set<string> {\n const names = new Set<string>();\n if (!isPlainObject(schema)) return names;\n const properties = schema.properties;\n if (!isPlainObject(properties)) return names;\n for (const [name, sub] of Object.entries(properties)) {\n if (isContainerSchema(sub)) names.add(name);\n }\n return names;\n}\n\nfunction schemaTypeIncludes(schema: JsonRecord, type: string): boolean {\n if (schema.type === type) return true;\n return Array.isArray(schema.type) && schema.type.includes(type);\n}\n\nfunction isArraySchema(schema: unknown): boolean {\n if (!isPlainObject(schema)) return false;\n if (schemaTypeIncludes(schema, \"array\")) return true;\n if (\"items\" in schema && !schemaTypeIncludes(schema, \"object\")) return true;\n const union = schema.anyOf ?? schema.oneOf;\n return Array.isArray(union) && union.some((branch) => isArraySchema(branch));\n}\n\nfunction requiredArrayPropertyNames(schema: unknown): readonly string[] {\n if (!isPlainObject(schema)) return [];\n const required = schema.required;\n const properties = schema.properties;\n if (!Array.isArray(required) || !isPlainObject(properties)) return [];\n return required.filter((name): name is string => (\n typeof name === \"string\" &&\n Object.hasOwn(properties, name) &&\n isArraySchema(properties[name])\n ));\n}\n\nfunction fillMissingRequiredArrayProperties(args: JsonRecord, schema: unknown): JsonRecord {\n const missing = requiredArrayPropertyNames(schema).filter((name) => !Object.hasOwn(args, name));\n if (missing.length === 0) return args;\n const next: JsonRecord = { ...args };\n for (const name of missing) next[name] = [];\n return next;\n}\n\n/** Whether `key` is a pure dotted path (`parent.child`) headed by a container prop. */\nfunction isDottedContainerKey(key: string, containers: Set<string>): boolean {\n const dot = key.indexOf(\".\");\n if (dot <= 0) return false;\n return containers.has(key.slice(0, dot));\n}\n\n/**\n * Decide whether a flattened key should be split into nested path segments.\n * Bracket-indexed keys always split. When a bracket key is present anywhere in\n * the payload, dotted keys split too (they are part of the same flattened\n * object). Otherwise a dotted key only splits when the schema marks its head as\n * a container property, which keeps legitimate dot-containing keys intact.\n */\nfunction shouldSplitKey(key: string, hasBracket: boolean, containers: Set<string>): boolean {\n if (/\\[\\d+\\]/.test(key)) return true;\n if (hasBracket) return true;\n return isDottedContainerKey(key, containers);\n}\n\n/**\n * Reconstruct flattened Gemini tool-call arguments into proper nested\n * arrays/objects. Returns the original reference unchanged when there is nothing\n * to reconstruct. Bracket-indexed keys are always reconstructed; purely dotted\n * keys are reconstructed only when the optional `schema` marks their head\n * segment as an object/array container property. Reconstruction (and its\n * prototype-pollution guard) is delegated to the shared canonical helper.\n */\nexport function unflattenGeminiToolArguments(args: unknown, schema?: unknown): unknown {\n if (!isPlainObject(args)) return args;\n const keys = Object.keys(args);\n const hasBracket = hasFlattenedKey(keys);\n const containers = hasBracket ? new Set<string>() : containerPropertyNames(schema);\n const hasDottedContainer =\n !hasBracket && keys.some((key) => isDottedContainerKey(key, containers));\n const reconstructed = hasBracket || hasDottedContainer\n ? reconstructFlattenedKeys(args, (key) => shouldSplitKey(key, hasBracket, containers))\n : args;\n\n return isPlainObject(reconstructed)\n ? fillMissingRequiredArrayProperties(reconstructed, schema)\n : reconstructed;\n}\n\n/**\n * If `model` is a GitHub Copilot Gemini model, normalize flattened tool-call\n * arguments; otherwise return them unchanged. Used to gate\n * {@link unflattenGeminiToolArguments} by model at tool-call time. The optional\n * `schema` is the tool's parameter schema, used to disambiguate dotted keys.\n */\nexport function normalizeToolArgumentsForModel(\n args: unknown,\n model: Pick<Model<Api>, \"provider\" | \"api\" | \"id\"> | undefined,\n schema?: unknown,\n): unknown {\n if (!model || !isCopilotGeminiModel(model)) return args;\n return unflattenGeminiToolArguments(args, schema);\n}\n\n/** Map each tool name in an OpenAI chat-completions payload to its parameter schema. */\nfunction toolParameterSchemas(tools: unknown): Map<string, unknown> {\n const schemas = new Map<string, unknown>();\n if (!Array.isArray(tools)) return schemas;\n for (const tool of tools) {\n if (!isPlainObject(tool)) continue;\n // OpenAI chat-completions tool shape: { type: \"function\", function: { name, parameters } }.\n const fn = tool.function;\n if (isPlainObject(fn) && typeof fn.name === \"string\") {\n schemas.set(fn.name, fn.parameters);\n continue;\n }\n // Defensive: flat tool shape { name, parameters }.\n if (typeof tool.name === \"string\") schemas.set(tool.name, tool.parameters);\n }\n return schemas;\n}\n\n/**\n * Reconstruct flattened GitHub Copilot Gemini tool-call arguments on the\n * **outbound replay payload**, so prior assistant tool calls are sent back to\n * CAPI in the nested array/object shape Gemini originally produced.\n *\n * Why this exists\n * ---------------\n * {@link normalizeToolArgumentsForModel} only unflattens at tool *execution*\n * time; the persisted assistant message keeps the raw flattened arguments CAPI\n * delivered (for example `{ \"edits[0].newText\": \"...\" }`). When that message is\n * replayed on the next turn, CAPI parses those literal keys straight into the\n * Gemini `FunctionCall.Args`, producing a function call that does not match the\n * tool's declared schema (nor the structure Gemini signed). Gemini then ends\n * the turn with `MALFORMED_FUNCTION_CALL` / `UNEXPECTED_TOOL_CALL` / `OTHER`,\n * which CAPI surfaces as a bare `finish_reason: \"error\"` — so multi-turn tool\n * use dies one turn after any array/object tool call (such as `edit`).\n *\n * This rewrites each replayed assistant `tool_calls[].function.arguments` JSON\n * into the reconstructed nested shape (reusing {@link unflattenGeminiToolArguments}\n * with the tool's own parameter schema, looked up from the payload's `tools`),\n * fixing both new and already-persisted sessions. Gated to GitHub Copilot Gemini\n * models, fail-open on non-JSON arguments, and a no-op for well-formed args.\n */\nexport function normalizeCopilotGeminiReplayToolArguments(\n payload: unknown,\n model: Pick<Model<Api>, \"provider\" | \"api\" | \"id\">,\n): unknown {\n if (!isCopilotGeminiModel(model)) return payload;\n if (!isPlainObject(payload)) return payload;\n const messages = payload.messages;\n if (!Array.isArray(messages)) return payload;\n\n const schemas = toolParameterSchemas(payload.tools);\n let mutated = false;\n\n const nextMessages = messages.map((message) => {\n if (!isPlainObject(message) || message.role !== \"assistant\") return message;\n const toolCalls = message.tool_calls;\n if (!Array.isArray(toolCalls) || toolCalls.length === 0) return message;\n\n let messageMutated = false;\n const nextToolCalls = toolCalls.map((toolCall) => {\n if (!isPlainObject(toolCall)) return toolCall;\n const fn = toolCall.function;\n if (!isPlainObject(fn) || typeof fn.arguments !== \"string\") return toolCall;\n\n let parsed: unknown;\n try {\n parsed = JSON.parse(fn.arguments);\n } catch {\n return toolCall; // fail open: never corrupt a replayed argument string\n }\n if (!isPlainObject(parsed)) return toolCall;\n\n const schema = typeof fn.name === \"string\" ? schemas.get(fn.name) : undefined;\n const reconstructed = unflattenGeminiToolArguments(parsed, schema);\n if (reconstructed === parsed) return toolCall;\n\n messageMutated = true;\n return { ...toolCall, function: { ...fn, arguments: JSON.stringify(reconstructed) } };\n });\n\n if (!messageMutated) return message;\n mutated = true;\n return { ...message, tool_calls: nextToolCalls };\n });\n\n if (!mutated) return payload;\n return { ...payload, messages: nextMessages };\n}\n"]}
|
|
@@ -34,6 +34,41 @@ function containerPropertyNames(schema) {
|
|
|
34
34
|
}
|
|
35
35
|
return names;
|
|
36
36
|
}
|
|
37
|
+
function schemaTypeIncludes(schema, type) {
|
|
38
|
+
if (schema.type === type)
|
|
39
|
+
return true;
|
|
40
|
+
return Array.isArray(schema.type) && schema.type.includes(type);
|
|
41
|
+
}
|
|
42
|
+
function isArraySchema(schema) {
|
|
43
|
+
if (!isPlainObject(schema))
|
|
44
|
+
return false;
|
|
45
|
+
if (schemaTypeIncludes(schema, "array"))
|
|
46
|
+
return true;
|
|
47
|
+
if ("items" in schema && !schemaTypeIncludes(schema, "object"))
|
|
48
|
+
return true;
|
|
49
|
+
const union = schema.anyOf ?? schema.oneOf;
|
|
50
|
+
return Array.isArray(union) && union.some((branch) => isArraySchema(branch));
|
|
51
|
+
}
|
|
52
|
+
function requiredArrayPropertyNames(schema) {
|
|
53
|
+
if (!isPlainObject(schema))
|
|
54
|
+
return [];
|
|
55
|
+
const required = schema.required;
|
|
56
|
+
const properties = schema.properties;
|
|
57
|
+
if (!Array.isArray(required) || !isPlainObject(properties))
|
|
58
|
+
return [];
|
|
59
|
+
return required.filter((name) => (typeof name === "string" &&
|
|
60
|
+
Object.hasOwn(properties, name) &&
|
|
61
|
+
isArraySchema(properties[name])));
|
|
62
|
+
}
|
|
63
|
+
function fillMissingRequiredArrayProperties(args, schema) {
|
|
64
|
+
const missing = requiredArrayPropertyNames(schema).filter((name) => !Object.hasOwn(args, name));
|
|
65
|
+
if (missing.length === 0)
|
|
66
|
+
return args;
|
|
67
|
+
const next = { ...args };
|
|
68
|
+
for (const name of missing)
|
|
69
|
+
next[name] = [];
|
|
70
|
+
return next;
|
|
71
|
+
}
|
|
37
72
|
/** Whether `key` is a pure dotted path (`parent.child`) headed by a container prop. */
|
|
38
73
|
function isDottedContainerKey(key, containers) {
|
|
39
74
|
const dot = key.indexOf(".");
|
|
@@ -70,9 +105,12 @@ export function unflattenGeminiToolArguments(args, schema) {
|
|
|
70
105
|
const hasBracket = hasFlattenedKey(keys);
|
|
71
106
|
const containers = hasBracket ? new Set() : containerPropertyNames(schema);
|
|
72
107
|
const hasDottedContainer = !hasBracket && keys.some((key) => isDottedContainerKey(key, containers));
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
108
|
+
const reconstructed = hasBracket || hasDottedContainer
|
|
109
|
+
? reconstructFlattenedKeys(args, (key) => shouldSplitKey(key, hasBracket, containers))
|
|
110
|
+
: args;
|
|
111
|
+
return isPlainObject(reconstructed)
|
|
112
|
+
? fillMissingRequiredArrayProperties(reconstructed, schema)
|
|
113
|
+
: reconstructed;
|
|
76
114
|
}
|
|
77
115
|
/**
|
|
78
116
|
* If `model` is a GitHub Copilot Gemini model, normalize flattened tool-call
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"copilot-gemini-tool-arguments.js","sourceRoot":"","sources":["../../src/core/copilot-gemini-tool-arguments.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,oBAAoB,EAAE,MAAM,uCAAuC,CAAC;AAC7E,OAAO,EAAE,wBAAwB,EAAE,MAAM,+BAA+B,CAAC;AAwCzE,SAAS,aAAa,CAAC,KAAc;IACnC,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;AAC9E,CAAC;AAED,8DAA8D;AAC9D,SAAS,eAAe,CAAC,IAAc;IACrC,OAAO,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AACjD,CAAC;AAED,sFAAsF;AACtF,SAAS,iBAAiB,CAAC,MAAe;IACxC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC;QAAE,OAAO,KAAK,CAAC;IACzC,IAAI,MAAM,CAAC,IAAI,KAAK,QAAQ,IAAI,MAAM,CAAC,IAAI,KAAK,OAAO;QAAE,OAAO,IAAI,CAAC;IACrE,IAAI,YAAY,IAAI,MAAM,IAAI,OAAO,IAAI,MAAM;QAAE,OAAO,IAAI,CAAC;IAC7D,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,KAAK,CAAC;IAC3C,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC,CAAC;IACnF,OAAO,KAAK,CAAC;AACf,CAAC;AAED,0EAA0E;AAC1E,SAAS,sBAAsB,CAAC,MAAe;IAC7C,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAChC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC;QAAE,OAAO,KAAK,CAAC;IACzC,MAAM,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;IACrC,IAAI,CAAC,aAAa,CAAC,UAAU,CAAC;QAAE,OAAO,KAAK,CAAC;IAC7C,KAAK,MAAM,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;QACrD,IAAI,iBAAiB,CAAC,GAAG,CAAC;YAAE,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC9C,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,uFAAuF;AACvF,SAAS,oBAAoB,CAAC,GAAW,EAAE,UAAuB;IAChE,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAC7B,IAAI,GAAG,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IAC3B,OAAO,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;AAC3C,CAAC;AAED;;;;;;GAMG;AACH,SAAS,cAAc,CAAC,GAAW,EAAE,UAAmB,EAAE,UAAuB;IAC/E,IAAI,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IACrC,IAAI,UAAU;QAAE,OAAO,IAAI,CAAC;IAC5B,OAAO,oBAAoB,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;AAC/C,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,4BAA4B,CAAC,IAAa,EAAE,MAAgB;IAC1E,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IACtC,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC/B,MAAM,UAAU,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACzC,MAAM,UAAU,GAAG,UAAU,CAAC,CAAC,CAAC,IAAI,GAAG,EAAU,CAAC,CAAC,CAAC,sBAAsB,CAAC,MAAM,CAAC,CAAC;IACnF,MAAM,kBAAkB,GACtB,CAAC,UAAU,IAAI,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,oBAAoB,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC,CAAC;IAC3E,IAAI,CAAC,UAAU,IAAI,CAAC,kBAAkB;QAAE,OAAO,IAAI,CAAC;IAEpD,OAAO,wBAAwB,CAAC,IAAI,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,cAAc,CAAC,GAAG,EAAE,UAAU,EAAE,UAAU,CAAC,CAAC,CAAC;AAC9F,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,8BAA8B,CAC5C,IAAa,EACb,KAA8D,EAC9D,MAAgB;IAEhB,IAAI,CAAC,KAAK,IAAI,CAAC,oBAAoB,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACxD,OAAO,4BAA4B,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;AACpD,CAAC;AAED,wFAAwF;AACxF,SAAS,oBAAoB,CAAC,KAAc;IAC1C,MAAM,OAAO,GAAG,IAAI,GAAG,EAAmB,CAAC;IAC3C,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,OAAO,CAAC;IAC1C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC;YAAE,SAAS;QACnC,4FAA4F;QAC5F,MAAM,EAAE,GAAG,IAAI,CAAC,QAAQ,CAAC;QACzB,IAAI,aAAa,CAAC,EAAE,CAAC,IAAI,OAAO,EAAE,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YACrD,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC;YACpC,SAAS;QACX,CAAC;QACD,mDAAmD;QACnD,IAAI,OAAO,IAAI,CAAC,IAAI,KAAK,QAAQ;YAAE,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;IAC7E,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,UAAU,yCAAyC,CACvD,OAAgB,EAChB,KAAkD;IAElD,IAAI,CAAC,oBAAoB,CAAC,KAAK,CAAC;QAAE,OAAO,OAAO,CAAC;IACjD,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC;QAAE,OAAO,OAAO,CAAC;IAC5C,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC;IAClC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC;QAAE,OAAO,OAAO,CAAC;IAE7C,MAAM,OAAO,GAAG,oBAAoB,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IACpD,IAAI,OAAO,GAAG,KAAK,CAAC;IAEpB,MAAM,YAAY,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE;QAC5C,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,IAAI,OAAO,CAAC,IAAI,KAAK,WAAW;YAAE,OAAO,OAAO,CAAC;QAC5E,MAAM,SAAS,GAAG,OAAO,CAAC,UAAU,CAAC;QACrC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,OAAO,CAAC;QAExE,IAAI,cAAc,GAAG,KAAK,CAAC;QAC3B,MAAM,aAAa,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE;YAC/C,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC;gBAAE,OAAO,QAAQ,CAAC;YAC9C,MAAM,EAAE,GAAG,QAAQ,CAAC,QAAQ,CAAC;YAC7B,IAAI,CAAC,aAAa,CAAC,EAAE,CAAC,IAAI,OAAO,EAAE,CAAC,SAAS,KAAK,QAAQ;gBAAE,OAAO,QAAQ,CAAC;YAE5E,IAAI,MAAe,CAAC;YACpB,IAAI,CAAC;gBACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,SAAS,CAAC,CAAC;YACpC,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,QAAQ,CAAC,CAAC,sDAAsD;YACzE,CAAC;YACD,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC;gBAAE,OAAO,QAAQ,CAAC;YAE5C,MAAM,MAAM,GAAG,OAAO,EAAE,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAC9E,MAAM,aAAa,GAAG,4BAA4B,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;YACnE,IAAI,aAAa,KAAK,MAAM;gBAAE,OAAO,QAAQ,CAAC;YAE9C,cAAc,GAAG,IAAI,CAAC;YACtB,OAAO,EAAE,GAAG,QAAQ,EAAE,QAAQ,EAAE,EAAE,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,EAAE,EAAE,CAAC;QACxF,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,cAAc;YAAE,OAAO,OAAO,CAAC;QACpC,OAAO,GAAG,IAAI,CAAC;QACf,OAAO,EAAE,GAAG,OAAO,EAAE,UAAU,EAAE,aAAa,EAAE,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,IAAI,CAAC,OAAO;QAAE,OAAO,OAAO,CAAC;IAC7B,OAAO,EAAE,GAAG,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,CAAC;AAChD,CAAC","sourcesContent":["import type { Api, Model } from \"@earendil-works/pi-ai\";\nimport { isCopilotGeminiModel } from \"./copilot-gemini-payload-sanitizer.ts\";\nimport { reconstructFlattenedKeys } from \"./flattened-tool-arguments.ts\";\n\n/**\n * Normalizes GitHub Copilot Gemini tool-call arguments.\n *\n * Why this exists\n * ---------------\n * `github-copilot` Gemini models are served through Copilot's CAPI gateway,\n * which proxies to Google's GenAI API. When a function/tool argument is an\n * array (or a nested object/array), Gemini serializes it on the wire as\n * **flattened, indexed keys** instead of a real JSON array/object. For example\n * a tool called with `{ keywords: [\"a\", \"b\"] }` arrives as:\n *\n * ```json\n * { \"keywords[0]\": \"a\", \"keywords[1]\": \"b\" }\n * ```\n *\n * This was confirmed by capturing the raw CAPI SSE stream: the\n * `tool_calls[].function.arguments` JSON itself contains the `name[index]`\n * keys, so the runtime parses valid-but-wrong JSON. Schema validation then\n * fails (`keywords: must have required properties keywords` and\n * `root: must not have additional properties`) and the model retries forever,\n * because it keeps re-emitting the same flattened shape. This is most visible\n * with the workflow `structured_output` tool but affects any Gemini tool call\n * whose schema contains an array or nested object.\n *\n * What it does\n * ------------\n * Reconstructs flattened keys (`name[i]`, `name[i].sub`, `parent.child`) back\n * into the intended nested arrays/objects, before tool-argument validation\n * runs. Bracket-indexed keys (`name[<digit>]`) are always reconstructed. A\n * purely dotted key (`parent.child`, with no array anywhere) is ambiguous —\n * a legitimate argument key can itself contain a dot — so it is only split when\n * the optional tool `schema` marks its head segment as an object/array\n * container property. The transform is gated to GitHub Copilot Gemini models,\n * so it never touches well-formed arguments from any other provider/model.\n */\n\ntype JsonRecord = Record<string, unknown>;\n\nfunction isPlainObject(value: unknown): value is JsonRecord {\n return typeof value === \"object\" && value !== null && !Array.isArray(value);\n}\n\n/** A flattened key contains a bracket index like `foo[0]`. */\nfunction hasFlattenedKey(keys: string[]): boolean {\n return keys.some((key) => /\\[\\d+\\]/.test(key));\n}\n\n/** A schema node that holds a nested object/array (so dotted keys are real paths). */\nfunction isContainerSchema(schema: unknown): boolean {\n if (!isPlainObject(schema)) return false;\n if (schema.type === \"object\" || schema.type === \"array\") return true;\n if (\"properties\" in schema || \"items\" in schema) return true;\n const union = schema.anyOf ?? schema.oneOf;\n if (Array.isArray(union)) return union.some((branch) => isContainerSchema(branch));\n return false;\n}\n\n/** Top-level property names whose schema is an object/array container. */\nfunction containerPropertyNames(schema: unknown): Set<string> {\n const names = new Set<string>();\n if (!isPlainObject(schema)) return names;\n const properties = schema.properties;\n if (!isPlainObject(properties)) return names;\n for (const [name, sub] of Object.entries(properties)) {\n if (isContainerSchema(sub)) names.add(name);\n }\n return names;\n}\n\n/** Whether `key` is a pure dotted path (`parent.child`) headed by a container prop. */\nfunction isDottedContainerKey(key: string, containers: Set<string>): boolean {\n const dot = key.indexOf(\".\");\n if (dot <= 0) return false;\n return containers.has(key.slice(0, dot));\n}\n\n/**\n * Decide whether a flattened key should be split into nested path segments.\n * Bracket-indexed keys always split. When a bracket key is present anywhere in\n * the payload, dotted keys split too (they are part of the same flattened\n * object). Otherwise a dotted key only splits when the schema marks its head as\n * a container property, which keeps legitimate dot-containing keys intact.\n */\nfunction shouldSplitKey(key: string, hasBracket: boolean, containers: Set<string>): boolean {\n if (/\\[\\d+\\]/.test(key)) return true;\n if (hasBracket) return true;\n return isDottedContainerKey(key, containers);\n}\n\n/**\n * Reconstruct flattened Gemini tool-call arguments into proper nested\n * arrays/objects. Returns the original reference unchanged when there is nothing\n * to reconstruct. Bracket-indexed keys are always reconstructed; purely dotted\n * keys are reconstructed only when the optional `schema` marks their head\n * segment as an object/array container property. Reconstruction (and its\n * prototype-pollution guard) is delegated to the shared canonical helper.\n */\nexport function unflattenGeminiToolArguments(args: unknown, schema?: unknown): unknown {\n if (!isPlainObject(args)) return args;\n const keys = Object.keys(args);\n const hasBracket = hasFlattenedKey(keys);\n const containers = hasBracket ? new Set<string>() : containerPropertyNames(schema);\n const hasDottedContainer =\n !hasBracket && keys.some((key) => isDottedContainerKey(key, containers));\n if (!hasBracket && !hasDottedContainer) return args;\n\n return reconstructFlattenedKeys(args, (key) => shouldSplitKey(key, hasBracket, containers));\n}\n\n/**\n * If `model` is a GitHub Copilot Gemini model, normalize flattened tool-call\n * arguments; otherwise return them unchanged. Used to gate\n * {@link unflattenGeminiToolArguments} by model at tool-call time. The optional\n * `schema` is the tool's parameter schema, used to disambiguate dotted keys.\n */\nexport function normalizeToolArgumentsForModel(\n args: unknown,\n model: Pick<Model<Api>, \"provider\" | \"api\" | \"id\"> | undefined,\n schema?: unknown,\n): unknown {\n if (!model || !isCopilotGeminiModel(model)) return args;\n return unflattenGeminiToolArguments(args, schema);\n}\n\n/** Map each tool name in an OpenAI chat-completions payload to its parameter schema. */\nfunction toolParameterSchemas(tools: unknown): Map<string, unknown> {\n const schemas = new Map<string, unknown>();\n if (!Array.isArray(tools)) return schemas;\n for (const tool of tools) {\n if (!isPlainObject(tool)) continue;\n // OpenAI chat-completions tool shape: { type: \"function\", function: { name, parameters } }.\n const fn = tool.function;\n if (isPlainObject(fn) && typeof fn.name === \"string\") {\n schemas.set(fn.name, fn.parameters);\n continue;\n }\n // Defensive: flat tool shape { name, parameters }.\n if (typeof tool.name === \"string\") schemas.set(tool.name, tool.parameters);\n }\n return schemas;\n}\n\n/**\n * Reconstruct flattened GitHub Copilot Gemini tool-call arguments on the\n * **outbound replay payload**, so prior assistant tool calls are sent back to\n * CAPI in the nested array/object shape Gemini originally produced.\n *\n * Why this exists\n * ---------------\n * {@link normalizeToolArgumentsForModel} only unflattens at tool *execution*\n * time; the persisted assistant message keeps the raw flattened arguments CAPI\n * delivered (for example `{ \"edits[0].newText\": \"...\" }`). When that message is\n * replayed on the next turn, CAPI parses those literal keys straight into the\n * Gemini `FunctionCall.Args`, producing a function call that does not match the\n * tool's declared schema (nor the structure Gemini signed). Gemini then ends\n * the turn with `MALFORMED_FUNCTION_CALL` / `UNEXPECTED_TOOL_CALL` / `OTHER`,\n * which CAPI surfaces as a bare `finish_reason: \"error\"` — so multi-turn tool\n * use dies one turn after any array/object tool call (such as `edit`).\n *\n * This rewrites each replayed assistant `tool_calls[].function.arguments` JSON\n * into the reconstructed nested shape (reusing {@link unflattenGeminiToolArguments}\n * with the tool's own parameter schema, looked up from the payload's `tools`),\n * fixing both new and already-persisted sessions. Gated to GitHub Copilot Gemini\n * models, fail-open on non-JSON arguments, and a no-op for well-formed args.\n */\nexport function normalizeCopilotGeminiReplayToolArguments(\n payload: unknown,\n model: Pick<Model<Api>, \"provider\" | \"api\" | \"id\">,\n): unknown {\n if (!isCopilotGeminiModel(model)) return payload;\n if (!isPlainObject(payload)) return payload;\n const messages = payload.messages;\n if (!Array.isArray(messages)) return payload;\n\n const schemas = toolParameterSchemas(payload.tools);\n let mutated = false;\n\n const nextMessages = messages.map((message) => {\n if (!isPlainObject(message) || message.role !== \"assistant\") return message;\n const toolCalls = message.tool_calls;\n if (!Array.isArray(toolCalls) || toolCalls.length === 0) return message;\n\n let messageMutated = false;\n const nextToolCalls = toolCalls.map((toolCall) => {\n if (!isPlainObject(toolCall)) return toolCall;\n const fn = toolCall.function;\n if (!isPlainObject(fn) || typeof fn.arguments !== \"string\") return toolCall;\n\n let parsed: unknown;\n try {\n parsed = JSON.parse(fn.arguments);\n } catch {\n return toolCall; // fail open: never corrupt a replayed argument string\n }\n if (!isPlainObject(parsed)) return toolCall;\n\n const schema = typeof fn.name === \"string\" ? schemas.get(fn.name) : undefined;\n const reconstructed = unflattenGeminiToolArguments(parsed, schema);\n if (reconstructed === parsed) return toolCall;\n\n messageMutated = true;\n return { ...toolCall, function: { ...fn, arguments: JSON.stringify(reconstructed) } };\n });\n\n if (!messageMutated) return message;\n mutated = true;\n return { ...message, tool_calls: nextToolCalls };\n });\n\n if (!mutated) return payload;\n return { ...payload, messages: nextMessages };\n}\n"]}
|
|
1
|
+
{"version":3,"file":"copilot-gemini-tool-arguments.js","sourceRoot":"","sources":["../../src/core/copilot-gemini-tool-arguments.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,oBAAoB,EAAE,MAAM,uCAAuC,CAAC;AAC7E,OAAO,EAAE,wBAAwB,EAAE,MAAM,+BAA+B,CAAC;AA2CzE,SAAS,aAAa,CAAC,KAAc;IACnC,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;AAC9E,CAAC;AAED,8DAA8D;AAC9D,SAAS,eAAe,CAAC,IAAc;IACrC,OAAO,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AACjD,CAAC;AAED,sFAAsF;AACtF,SAAS,iBAAiB,CAAC,MAAe;IACxC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC;QAAE,OAAO,KAAK,CAAC;IACzC,IAAI,MAAM,CAAC,IAAI,KAAK,QAAQ,IAAI,MAAM,CAAC,IAAI,KAAK,OAAO;QAAE,OAAO,IAAI,CAAC;IACrE,IAAI,YAAY,IAAI,MAAM,IAAI,OAAO,IAAI,MAAM;QAAE,OAAO,IAAI,CAAC;IAC7D,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,KAAK,CAAC;IAC3C,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC,CAAC;IACnF,OAAO,KAAK,CAAC;AACf,CAAC;AAED,0EAA0E;AAC1E,SAAS,sBAAsB,CAAC,MAAe;IAC7C,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAChC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC;QAAE,OAAO,KAAK,CAAC;IACzC,MAAM,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;IACrC,IAAI,CAAC,aAAa,CAAC,UAAU,CAAC;QAAE,OAAO,KAAK,CAAC;IAC7C,KAAK,MAAM,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;QACrD,IAAI,iBAAiB,CAAC,GAAG,CAAC;YAAE,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC9C,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,kBAAkB,CAAC,MAAkB,EAAE,IAAY;IAC1D,IAAI,MAAM,CAAC,IAAI,KAAK,IAAI;QAAE,OAAO,IAAI,CAAC;IACtC,OAAO,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;AAClE,CAAC;AAED,SAAS,aAAa,CAAC,MAAe;IACpC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC;QAAE,OAAO,KAAK,CAAC;IACzC,IAAI,kBAAkB,CAAC,MAAM,EAAE,OAAO,CAAC;QAAE,OAAO,IAAI,CAAC;IACrD,IAAI,OAAO,IAAI,MAAM,IAAI,CAAC,kBAAkB,CAAC,MAAM,EAAE,QAAQ,CAAC;QAAE,OAAO,IAAI,CAAC;IAC5E,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,KAAK,CAAC;IAC3C,OAAO,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC;AAC/E,CAAC;AAED,SAAS,0BAA0B,CAAC,MAAe;IACjD,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC;QAAE,OAAO,EAAE,CAAC;IACtC,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;IACjC,MAAM,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;IACrC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,aAAa,CAAC,UAAU,CAAC;QAAE,OAAO,EAAE,CAAC;IACtE,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,EAAkB,EAAE,CAAC,CAC/C,OAAO,IAAI,KAAK,QAAQ;QACxB,MAAM,CAAC,MAAM,CAAC,UAAU,EAAE,IAAI,CAAC;QAC/B,aAAa,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAChC,CAAC,CAAC;AACL,CAAC;AAED,SAAS,kCAAkC,CAAC,IAAgB,EAAE,MAAe;IAC3E,MAAM,OAAO,GAAG,0BAA0B,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC;IAChG,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACtC,MAAM,IAAI,GAAe,EAAE,GAAG,IAAI,EAAE,CAAC;IACrC,KAAK,MAAM,IAAI,IAAI,OAAO;QAAE,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC;IAC5C,OAAO,IAAI,CAAC;AACd,CAAC;AAED,uFAAuF;AACvF,SAAS,oBAAoB,CAAC,GAAW,EAAE,UAAuB;IAChE,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAC7B,IAAI,GAAG,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IAC3B,OAAO,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;AAC3C,CAAC;AAED;;;;;;GAMG;AACH,SAAS,cAAc,CAAC,GAAW,EAAE,UAAmB,EAAE,UAAuB;IAC/E,IAAI,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IACrC,IAAI,UAAU;QAAE,OAAO,IAAI,CAAC;IAC5B,OAAO,oBAAoB,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;AAC/C,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,4BAA4B,CAAC,IAAa,EAAE,MAAgB;IAC1E,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IACtC,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC/B,MAAM,UAAU,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IACzC,MAAM,UAAU,GAAG,UAAU,CAAC,CAAC,CAAC,IAAI,GAAG,EAAU,CAAC,CAAC,CAAC,sBAAsB,CAAC,MAAM,CAAC,CAAC;IACnF,MAAM,kBAAkB,GACtB,CAAC,UAAU,IAAI,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,oBAAoB,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC,CAAC;IAC3E,MAAM,aAAa,GAAG,UAAU,IAAI,kBAAkB;QACpD,CAAC,CAAC,wBAAwB,CAAC,IAAI,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,cAAc,CAAC,GAAG,EAAE,UAAU,EAAE,UAAU,CAAC,CAAC;QACtF,CAAC,CAAC,IAAI,CAAC;IAET,OAAO,aAAa,CAAC,aAAa,CAAC;QACjC,CAAC,CAAC,kCAAkC,CAAC,aAAa,EAAE,MAAM,CAAC;QAC3D,CAAC,CAAC,aAAa,CAAC;AACpB,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,8BAA8B,CAC5C,IAAa,EACb,KAA8D,EAC9D,MAAgB;IAEhB,IAAI,CAAC,KAAK,IAAI,CAAC,oBAAoB,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACxD,OAAO,4BAA4B,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;AACpD,CAAC;AAED,wFAAwF;AACxF,SAAS,oBAAoB,CAAC,KAAc;IAC1C,MAAM,OAAO,GAAG,IAAI,GAAG,EAAmB,CAAC;IAC3C,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,OAAO,CAAC;IAC1C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC;YAAE,SAAS;QACnC,4FAA4F;QAC5F,MAAM,EAAE,GAAG,IAAI,CAAC,QAAQ,CAAC;QACzB,IAAI,aAAa,CAAC,EAAE,CAAC,IAAI,OAAO,EAAE,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YACrD,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC;YACpC,SAAS;QACX,CAAC;QACD,mDAAmD;QACnD,IAAI,OAAO,IAAI,CAAC,IAAI,KAAK,QAAQ;YAAE,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;IAC7E,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,UAAU,yCAAyC,CACvD,OAAgB,EAChB,KAAkD;IAElD,IAAI,CAAC,oBAAoB,CAAC,KAAK,CAAC;QAAE,OAAO,OAAO,CAAC;IACjD,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC;QAAE,OAAO,OAAO,CAAC;IAC5C,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC;IAClC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC;QAAE,OAAO,OAAO,CAAC;IAE7C,MAAM,OAAO,GAAG,oBAAoB,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IACpD,IAAI,OAAO,GAAG,KAAK,CAAC;IAEpB,MAAM,YAAY,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE;QAC5C,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,IAAI,OAAO,CAAC,IAAI,KAAK,WAAW;YAAE,OAAO,OAAO,CAAC;QAC5E,MAAM,SAAS,GAAG,OAAO,CAAC,UAAU,CAAC;QACrC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,OAAO,CAAC;QAExE,IAAI,cAAc,GAAG,KAAK,CAAC;QAC3B,MAAM,aAAa,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE;YAC/C,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC;gBAAE,OAAO,QAAQ,CAAC;YAC9C,MAAM,EAAE,GAAG,QAAQ,CAAC,QAAQ,CAAC;YAC7B,IAAI,CAAC,aAAa,CAAC,EAAE,CAAC,IAAI,OAAO,EAAE,CAAC,SAAS,KAAK,QAAQ;gBAAE,OAAO,QAAQ,CAAC;YAE5E,IAAI,MAAe,CAAC;YACpB,IAAI,CAAC;gBACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,SAAS,CAAC,CAAC;YACpC,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,QAAQ,CAAC,CAAC,sDAAsD;YACzE,CAAC;YACD,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC;gBAAE,OAAO,QAAQ,CAAC;YAE5C,MAAM,MAAM,GAAG,OAAO,EAAE,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAC9E,MAAM,aAAa,GAAG,4BAA4B,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;YACnE,IAAI,aAAa,KAAK,MAAM;gBAAE,OAAO,QAAQ,CAAC;YAE9C,cAAc,GAAG,IAAI,CAAC;YACtB,OAAO,EAAE,GAAG,QAAQ,EAAE,QAAQ,EAAE,EAAE,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,EAAE,EAAE,CAAC;QACxF,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,cAAc;YAAE,OAAO,OAAO,CAAC;QACpC,OAAO,GAAG,IAAI,CAAC;QACf,OAAO,EAAE,GAAG,OAAO,EAAE,UAAU,EAAE,aAAa,EAAE,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,IAAI,CAAC,OAAO;QAAE,OAAO,OAAO,CAAC;IAC7B,OAAO,EAAE,GAAG,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,CAAC;AAChD,CAAC","sourcesContent":["import type { Api, Model } from \"@earendil-works/pi-ai\";\nimport { isCopilotGeminiModel } from \"./copilot-gemini-payload-sanitizer.ts\";\nimport { reconstructFlattenedKeys } from \"./flattened-tool-arguments.ts\";\n\n/**\n * Normalizes GitHub Copilot Gemini tool-call arguments.\n *\n * Why this exists\n * ---------------\n * `github-copilot` Gemini models are served through Copilot's CAPI gateway,\n * which proxies to Google's GenAI API. When a function/tool argument is an\n * array (or a nested object/array), Gemini serializes it on the wire as\n * **flattened, indexed keys** instead of a real JSON array/object. For example\n * a tool called with `{ keywords: [\"a\", \"b\"] }` arrives as:\n *\n * ```json\n * { \"keywords[0]\": \"a\", \"keywords[1]\": \"b\" }\n * ```\n *\n * This was confirmed by capturing the raw CAPI SSE stream: the\n * `tool_calls[].function.arguments` JSON itself contains the `name[index]`\n * keys, so the runtime parses valid-but-wrong JSON. Schema validation then\n * fails (`keywords: must have required properties keywords` and\n * `root: must not have additional properties`) and the model retries forever,\n * because it keeps re-emitting the same flattened shape. This is most visible\n * with the workflow `structured_output` tool but affects any Gemini tool call\n * whose schema contains an array or nested object.\n *\n * What it does\n * ------------\n * Reconstructs flattened keys (`name[i]`, `name[i].sub`, `parent.child`) back\n * into the intended nested arrays/objects, before tool-argument validation\n * runs. Bracket-indexed keys (`name[<digit>]`) are always reconstructed. A\n * purely dotted key (`parent.child`, with no array anywhere) is ambiguous —\n * a legitimate argument key can itself contain a dot — so it is only split when\n * the optional tool `schema` marks its head segment as an object/array\n * container property. When Gemini omits a required empty array entirely (there\n * are no `name[0]` keys to send), the schema is also used to synthesize `[]` for\n * missing required top-level array properties so normal validation can proceed.\n * The transform is gated to GitHub Copilot Gemini models, so it never touches\n * well-formed arguments from any other provider/model.\n */\n\ntype JsonRecord = Record<string, unknown>;\n\nfunction isPlainObject(value: unknown): value is JsonRecord {\n return typeof value === \"object\" && value !== null && !Array.isArray(value);\n}\n\n/** A flattened key contains a bracket index like `foo[0]`. */\nfunction hasFlattenedKey(keys: string[]): boolean {\n return keys.some((key) => /\\[\\d+\\]/.test(key));\n}\n\n/** A schema node that holds a nested object/array (so dotted keys are real paths). */\nfunction isContainerSchema(schema: unknown): boolean {\n if (!isPlainObject(schema)) return false;\n if (schema.type === \"object\" || schema.type === \"array\") return true;\n if (\"properties\" in schema || \"items\" in schema) return true;\n const union = schema.anyOf ?? schema.oneOf;\n if (Array.isArray(union)) return union.some((branch) => isContainerSchema(branch));\n return false;\n}\n\n/** Top-level property names whose schema is an object/array container. */\nfunction containerPropertyNames(schema: unknown): Set<string> {\n const names = new Set<string>();\n if (!isPlainObject(schema)) return names;\n const properties = schema.properties;\n if (!isPlainObject(properties)) return names;\n for (const [name, sub] of Object.entries(properties)) {\n if (isContainerSchema(sub)) names.add(name);\n }\n return names;\n}\n\nfunction schemaTypeIncludes(schema: JsonRecord, type: string): boolean {\n if (schema.type === type) return true;\n return Array.isArray(schema.type) && schema.type.includes(type);\n}\n\nfunction isArraySchema(schema: unknown): boolean {\n if (!isPlainObject(schema)) return false;\n if (schemaTypeIncludes(schema, \"array\")) return true;\n if (\"items\" in schema && !schemaTypeIncludes(schema, \"object\")) return true;\n const union = schema.anyOf ?? schema.oneOf;\n return Array.isArray(union) && union.some((branch) => isArraySchema(branch));\n}\n\nfunction requiredArrayPropertyNames(schema: unknown): readonly string[] {\n if (!isPlainObject(schema)) return [];\n const required = schema.required;\n const properties = schema.properties;\n if (!Array.isArray(required) || !isPlainObject(properties)) return [];\n return required.filter((name): name is string => (\n typeof name === \"string\" &&\n Object.hasOwn(properties, name) &&\n isArraySchema(properties[name])\n ));\n}\n\nfunction fillMissingRequiredArrayProperties(args: JsonRecord, schema: unknown): JsonRecord {\n const missing = requiredArrayPropertyNames(schema).filter((name) => !Object.hasOwn(args, name));\n if (missing.length === 0) return args;\n const next: JsonRecord = { ...args };\n for (const name of missing) next[name] = [];\n return next;\n}\n\n/** Whether `key` is a pure dotted path (`parent.child`) headed by a container prop. */\nfunction isDottedContainerKey(key: string, containers: Set<string>): boolean {\n const dot = key.indexOf(\".\");\n if (dot <= 0) return false;\n return containers.has(key.slice(0, dot));\n}\n\n/**\n * Decide whether a flattened key should be split into nested path segments.\n * Bracket-indexed keys always split. When a bracket key is present anywhere in\n * the payload, dotted keys split too (they are part of the same flattened\n * object). Otherwise a dotted key only splits when the schema marks its head as\n * a container property, which keeps legitimate dot-containing keys intact.\n */\nfunction shouldSplitKey(key: string, hasBracket: boolean, containers: Set<string>): boolean {\n if (/\\[\\d+\\]/.test(key)) return true;\n if (hasBracket) return true;\n return isDottedContainerKey(key, containers);\n}\n\n/**\n * Reconstruct flattened Gemini tool-call arguments into proper nested\n * arrays/objects. Returns the original reference unchanged when there is nothing\n * to reconstruct. Bracket-indexed keys are always reconstructed; purely dotted\n * keys are reconstructed only when the optional `schema` marks their head\n * segment as an object/array container property. Reconstruction (and its\n * prototype-pollution guard) is delegated to the shared canonical helper.\n */\nexport function unflattenGeminiToolArguments(args: unknown, schema?: unknown): unknown {\n if (!isPlainObject(args)) return args;\n const keys = Object.keys(args);\n const hasBracket = hasFlattenedKey(keys);\n const containers = hasBracket ? new Set<string>() : containerPropertyNames(schema);\n const hasDottedContainer =\n !hasBracket && keys.some((key) => isDottedContainerKey(key, containers));\n const reconstructed = hasBracket || hasDottedContainer\n ? reconstructFlattenedKeys(args, (key) => shouldSplitKey(key, hasBracket, containers))\n : args;\n\n return isPlainObject(reconstructed)\n ? fillMissingRequiredArrayProperties(reconstructed, schema)\n : reconstructed;\n}\n\n/**\n * If `model` is a GitHub Copilot Gemini model, normalize flattened tool-call\n * arguments; otherwise return them unchanged. Used to gate\n * {@link unflattenGeminiToolArguments} by model at tool-call time. The optional\n * `schema` is the tool's parameter schema, used to disambiguate dotted keys.\n */\nexport function normalizeToolArgumentsForModel(\n args: unknown,\n model: Pick<Model<Api>, \"provider\" | \"api\" | \"id\"> | undefined,\n schema?: unknown,\n): unknown {\n if (!model || !isCopilotGeminiModel(model)) return args;\n return unflattenGeminiToolArguments(args, schema);\n}\n\n/** Map each tool name in an OpenAI chat-completions payload to its parameter schema. */\nfunction toolParameterSchemas(tools: unknown): Map<string, unknown> {\n const schemas = new Map<string, unknown>();\n if (!Array.isArray(tools)) return schemas;\n for (const tool of tools) {\n if (!isPlainObject(tool)) continue;\n // OpenAI chat-completions tool shape: { type: \"function\", function: { name, parameters } }.\n const fn = tool.function;\n if (isPlainObject(fn) && typeof fn.name === \"string\") {\n schemas.set(fn.name, fn.parameters);\n continue;\n }\n // Defensive: flat tool shape { name, parameters }.\n if (typeof tool.name === \"string\") schemas.set(tool.name, tool.parameters);\n }\n return schemas;\n}\n\n/**\n * Reconstruct flattened GitHub Copilot Gemini tool-call arguments on the\n * **outbound replay payload**, so prior assistant tool calls are sent back to\n * CAPI in the nested array/object shape Gemini originally produced.\n *\n * Why this exists\n * ---------------\n * {@link normalizeToolArgumentsForModel} only unflattens at tool *execution*\n * time; the persisted assistant message keeps the raw flattened arguments CAPI\n * delivered (for example `{ \"edits[0].newText\": \"...\" }`). When that message is\n * replayed on the next turn, CAPI parses those literal keys straight into the\n * Gemini `FunctionCall.Args`, producing a function call that does not match the\n * tool's declared schema (nor the structure Gemini signed). Gemini then ends\n * the turn with `MALFORMED_FUNCTION_CALL` / `UNEXPECTED_TOOL_CALL` / `OTHER`,\n * which CAPI surfaces as a bare `finish_reason: \"error\"` — so multi-turn tool\n * use dies one turn after any array/object tool call (such as `edit`).\n *\n * This rewrites each replayed assistant `tool_calls[].function.arguments` JSON\n * into the reconstructed nested shape (reusing {@link unflattenGeminiToolArguments}\n * with the tool's own parameter schema, looked up from the payload's `tools`),\n * fixing both new and already-persisted sessions. Gated to GitHub Copilot Gemini\n * models, fail-open on non-JSON arguments, and a no-op for well-formed args.\n */\nexport function normalizeCopilotGeminiReplayToolArguments(\n payload: unknown,\n model: Pick<Model<Api>, \"provider\" | \"api\" | \"id\">,\n): unknown {\n if (!isCopilotGeminiModel(model)) return payload;\n if (!isPlainObject(payload)) return payload;\n const messages = payload.messages;\n if (!Array.isArray(messages)) return payload;\n\n const schemas = toolParameterSchemas(payload.tools);\n let mutated = false;\n\n const nextMessages = messages.map((message) => {\n if (!isPlainObject(message) || message.role !== \"assistant\") return message;\n const toolCalls = message.tool_calls;\n if (!Array.isArray(toolCalls) || toolCalls.length === 0) return message;\n\n let messageMutated = false;\n const nextToolCalls = toolCalls.map((toolCall) => {\n if (!isPlainObject(toolCall)) return toolCall;\n const fn = toolCall.function;\n if (!isPlainObject(fn) || typeof fn.arguments !== \"string\") return toolCall;\n\n let parsed: unknown;\n try {\n parsed = JSON.parse(fn.arguments);\n } catch {\n return toolCall; // fail open: never corrupt a replayed argument string\n }\n if (!isPlainObject(parsed)) return toolCall;\n\n const schema = typeof fn.name === \"string\" ? schemas.get(fn.name) : undefined;\n const reconstructed = unflattenGeminiToolArguments(parsed, schema);\n if (reconstructed === parsed) return toolCall;\n\n messageMutated = true;\n return { ...toolCall, function: { ...fn, arguments: JSON.stringify(reconstructed) } };\n });\n\n if (!messageMutated) return message;\n mutated = true;\n return { ...message, tool_calls: nextToolCalls };\n });\n\n if (!mutated) return payload;\n return { ...payload, messages: nextMessages };\n}\n"]}
|
package/docs/workflows.md
CHANGED
|
@@ -159,7 +159,7 @@ For the builtin result tables below, `deep-research-codebase`, `goal`, and `ralp
|
|
|
159
159
|
|---|---|---|
|
|
160
160
|
| `deep-research-codebase` | Scout + research-history chain → parallel specialist waves → aggregator. Indexes the whole repo and synthesizes findings. | Broad or cross-cutting research before you decide what to change. Prefer `/skill:research-codebase` for one subsystem. |
|
|
161
161
|
| `goal` | Persisted goal ledger → bounded worker turns → receipts → three-reviewer gate → deterministic reducer → final report → optional final-stage PR handoff after approval. | Small-to-medium scope changes when you can identify the work surface, state the exact outcome, name the validation that proves it is done, and optionally allow only the final `pull-request` stage to attempt PR creation with `create_pr=true` after Goal reaches `complete`. |
|
|
162
|
-
| `ralph` |
|
|
162
|
+
| `ralph` | Raw prompt → research-prompt-refinement → codebase/online research → sub-agent orchestration → multi-model parallel review → optional final-stage PR handoff. | Larger migrations, broad refactors, and multi-package changes where you want Atomic to use your prompt as-is, transform it into a research question, research the codebase before implementing, delegate through sub-agents, review, iterate, and optionally allow only the final `pull-request` stage to attempt PR creation with `create_pr=true`. |
|
|
163
163
|
| `open-claude-design` | Combined discovery/init (`/skill:impeccable shape` + `/skill:impeccable init` in one `discovery` stage) → design-system/reference research (`ds-*`) → curated gallery reference-discovery using that context → a forked `generate-*` / `user-feedback-*` loop → rich HTML handoff (`exporter` → `final-display`). The discovery stage asks what to build, the output type, and which references to emulate, then lets impeccable init detect/create/reconcile `PRODUCT.md` and `DESIGN.md` (references take precedence over project context). Renders a live `preview.html` you can iterate against in the browser (opens through impeccable `live` / the `playwright-cli` skill when available). | UI, page, component, theme, or design-token work that benefits from a guided brief, beautiful references, and generation + user feedback loops. |
|
|
164
164
|
|
|
165
165
|
### `deep-research-codebase`
|
|
@@ -228,11 +228,11 @@ Run examples:
|
|
|
228
228
|
/workflow goal objective="Implement the focused docs fix, run the docs validation command, and open a PR when complete" create_pr=true
|
|
229
229
|
```
|
|
230
230
|
|
|
231
|
-
`goal`
|
|
231
|
+
`goal` uses the raw `objective` exactly as supplied as the operative objective recorded in the ledger; it does not run an initial prompt-refinement stage. It creates an OS-temp `goal-ledger.json` artifact, renders goal-continuation context for each worker turn, writes each worker receipt to `work-turn-N.md`, and appends receipts, reviewer decisions, blockers, reducer decisions, and lifecycle events to the ledger. The objective is treated as user-provided data, not higher-priority instructions. By default `goal` does not start the final `pull-request` stage, and `pr_report` is omitted. Prompt text alone does not opt in. Pass `create_pr=true` only when you explicitly want the final stage to inspect provider credentials and attempt provider-appropriate PR/MR/review creation, such as GitHub `gh`, Azure Repos `az repos pr create`, or Sapling/Phabricator tooling, after Goal reaches `complete` within `max_turns`. Goal worker and reviewer prompts explicitly tell intermediate stages to ignore PR-creation requests; only the final `pull-request` stage may attempt that handoff.
|
|
232
232
|
|
|
233
233
|
Write the `objective` like a compact acceptance spec. Say what should exist when the run is done, how you want testing handled, which command(s) or manual checks matter, and what outcome proves completion. The workflow is intentionally lean: it does not first generate an RFC or migration plan, so the developer-supplied objective is where scope, validation, and completion criteria belong.
|
|
234
234
|
|
|
235
|
-
The worker may claim readiness, but it cannot finalize completion. Workers and reviewers are prompted to verify user-visible behavior end-to-end when practical, using `playwright-cli`-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios. Three reviewers independently inspect the ledger, worker receipt, repository state, and diff against `base_branch`; each returns structured JSON with findings, evidence, verification still remaining, and an optional blocker. A TypeScript reducer marks the goal complete only when reviewer quorum approves, marks blocked only when the same dependency/tool blocker repeats for the blocker threshold, continues when evidence is missing, and returns `needs_human` when `max_turns` is exhausted or worker execution fails.
|
|
235
|
+
The worker may claim readiness, but it cannot finalize completion. Workers and reviewers are prompted to verify user-visible behavior end-to-end when practical, using `playwright-cli`-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios. Goal reviewers also look for any QA E2E video referenced by the ledger or receipt and must inspect the actual video before treating it as proof. Three reviewers independently inspect the ledger, worker receipt, repository state, and diff against `base_branch`; each returns structured JSON with findings, evidence, verification still remaining, and an optional blocker. A TypeScript reducer marks the goal complete only when reviewer quorum approves, marks blocked only when the same dependency/tool blocker repeats for the blocker threshold, continues when evidence is missing, and returns `needs_human` when `max_turns` is exhausted or worker execution fails.
|
|
236
236
|
|
|
237
237
|
Result fields:
|
|
238
238
|
|
|
@@ -242,8 +242,7 @@ Result fields:
|
|
|
242
242
|
| `status` | Final reducer status: `complete`, `blocked`, or `needs_human` (or `active` only if externally interrupted). |
|
|
243
243
|
| `approved` | Whether the reducer reached `complete`. |
|
|
244
244
|
| `goal_id` | Per-run goal identifier stored in the ledger. |
|
|
245
|
-
| `objective` |
|
|
246
|
-
| `original_objective` | The raw user-provided objective exactly as given, before `prompt-refinement`. Omitted when refinement left it unchanged. |
|
|
245
|
+
| `objective` | Raw goal objective used by the run. |
|
|
247
246
|
| `ledger_path` | OS-temp path to `goal-ledger.json`, including receipts, reviewer decisions, reducer decisions, blockers, and lifecycle events. |
|
|
248
247
|
| `turns_completed` | Worker/review turns completed. |
|
|
249
248
|
| `iterations_completed` | Same value as `turns_completed`, retained for status summaries. |
|
|
@@ -273,7 +272,7 @@ Run examples:
|
|
|
273
272
|
/workflow ralph prompt="Safely implement the API refactor" git_worktree_dir=../atomic-ralph-api-wt base_branch=main
|
|
274
273
|
```
|
|
275
274
|
|
|
276
|
-
Each `ralph` run
|
|
275
|
+
Each `ralph` run uses the raw `prompt` exactly as supplied as the operative objective for research, orchestration, and review; it does not run an initial prompt-refinement stage. Each iteration transforms that raw prompt with `/skill:prompt-engineer Transform the following user request into a codebase and online research question which can be thoroughly explored: ...` (`research-prompt-refinement`), researches that transformed question with `/skill:research-codebase ...`, and writes the findings under `research/`. The orchestrator treats that research artifact as its primary implementation context, initializes/updates an OS-temp implementation notes file while generating verifiable evidence for any claims it records in the notes and reviewer artifacts, delegates implementation through sub-agents, and asks three independent reviewers to inspect the patch directly against `base_branch`. The reviewer fan-out runs reviewers on different primary model families (Claude Fable 5, GPT-5.5 Codex, and Gemini 3.1 Pro, with shared fallbacks) so the adversarial review gets cross-model coverage instead of three passes from one model. Ralph's orchestrator and reviewers are prompted to verify user-visible behavior end-to-end when practical, using `playwright-cli`-skilled subagents for web/frontend flows that may depend on backend/API behavior and tmux-skilled subagents for TUI or terminal-app scenarios. For UI-applicable or full-stack changes, the orchestrator runs a `playwright-cli` end-to-end QA pass and records a reviewable proof video (referenced in the implementation notes and surfaced as `qa_video_path`); reviewers receive that path and must inspect the actual video before treating it as proof. When `create_pr=true`, the final `pull-request` stage attaches or links that video to the created PR/MR/review. If reviewers find issues, the next `research-prompt-refinement` and research stages receive the review artifact path so follow-up research can address unresolved findings, and research stages fork from prior research session data when available. The loop stops only when all three reviewers independently approve (each finds no issues) or `max_loops` is reached, so a P0–P3 finding from any single reviewer keeps Ralph iterating instead of being out-voted by a majority quorum. By default Ralph does not start the final `pull-request` stage, and `pr_report` is omitted. Prompt text alone does not opt in. Pass `create_pr=true` only when you explicitly want the final `pull-request` stage to inspect provider credentials and attempt provider-appropriate PR/MR/review creation, such as GitHub `gh`, Azure Repos `az repos pr create`, or Sapling/Phabricator tooling; Ralph's own PR-creation instructions live in that final stage.
|
|
277
276
|
|
|
278
277
|
Set `git_worktree_dir` when you want Ralph's worker stages isolated in a reusable Git worktree. Relative paths resolve from the invoking repository root, existing same-repository worktree roots are reused, and missing paths are created from `base_branch`. Ralph preserves the invoking repo-relative cwd inside the worktree, so launching from `repo/packages/api` with `git_worktree_dir=../repo-wt` runs stages from `../repo-wt/packages/api`.
|
|
279
278
|
|
|
@@ -293,10 +292,8 @@ Result fields:
|
|
|
293
292
|
| `iterations_completed` | Number of research/orchestrate/review loops completed. |
|
|
294
293
|
| `review_report` | Compact reference to the latest reviewer payload artifact. |
|
|
295
294
|
| `review_report_path` | JSON artifact path for the latest Ralph review round. |
|
|
296
|
-
| `original_prompt` | The raw user prompt exactly as provided, before the `prompt-refinement` stage. |
|
|
297
|
-
| `refined_prompt` | The clarity-refined prompt produced by the `prompt-refinement` stage and used as the operative objective for research, orchestration, and review. |
|
|
298
295
|
|
|
299
|
-
A typical planned flow is `/skill:research-codebase` → `/skill:create-spec` → `/workflow ralph prompt="Implement specs/2026-03-rate-limit.md and validate the documented burst behavior"`. Ralph can start from a spec path, GitHub issue, or crisp ticket description, then
|
|
296
|
+
A typical planned flow is `/skill:research-codebase` → `/skill:create-spec` → `/workflow ralph prompt="Implement specs/2026-03-rate-limit.md and validate the documented burst behavior"`. Ralph can start from a spec path, GitHub issue, or crisp ticket description, then uses that prompt as-is, researches as needed, delegates through sub-agents, reviews, records a QA proof video for UI/full-stack changes when practical, and iterates. For smaller one-off tasks, use `/workflow goal` with a concrete objective that identifies the work surface, states the exact outcome, and names the validation that proves it is done; add `create_pr=true` only when you want Goal's final `pull-request` stage after approval.
|
|
300
297
|
|
|
301
298
|
### `open-claude-design`
|
|
302
299
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bastani/atomic",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.2-alpha.1",
|
|
4
4
|
"description": "Atomic coding agent CLI with read, bash, edit, write tools and session management",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"atomicConfig": {
|
|
@@ -68,34 +68,34 @@
|
|
|
68
68
|
"prepublishOnly": "bun run clean && bun run build"
|
|
69
69
|
},
|
|
70
70
|
"dependencies": {
|
|
71
|
-
"@bastani/atomic-natives": "0.9.
|
|
71
|
+
"@bastani/atomic-natives": "0.9.2-alpha.1",
|
|
72
72
|
"@bufbuild/protobuf": "^2.0.0",
|
|
73
|
-
"@earendil-works/pi-agent-core": "^0.79.
|
|
74
|
-
"@earendil-works/pi-ai": "^0.79.
|
|
75
|
-
"@earendil-works/pi-tui": "^0.79.
|
|
73
|
+
"@earendil-works/pi-agent-core": "^0.79.10",
|
|
74
|
+
"@earendil-works/pi-ai": "^0.79.10",
|
|
75
|
+
"@earendil-works/pi-tui": "^0.79.10",
|
|
76
76
|
"@modelcontextprotocol/ext-apps": "^1.7.2",
|
|
77
77
|
"@modelcontextprotocol/sdk": "^1.25.1",
|
|
78
78
|
"@mozilla/readability": "^0.6.0",
|
|
79
|
-
"@silvia-odwyer/photon-node": "
|
|
80
|
-
"chalk": "
|
|
79
|
+
"@silvia-odwyer/photon-node": "0.3.4",
|
|
80
|
+
"chalk": "5.6.2",
|
|
81
81
|
"cross-spawn": "7.0.6",
|
|
82
|
-
"diff": "
|
|
83
|
-
"glob": "
|
|
84
|
-
"highlight.js": "
|
|
85
|
-
"hosted-git-info": "
|
|
86
|
-
"ignore": "
|
|
87
|
-
"jiti": "
|
|
82
|
+
"diff": "8.0.4",
|
|
83
|
+
"glob": "13.0.6",
|
|
84
|
+
"highlight.js": "10.7.3",
|
|
85
|
+
"hosted-git-info": "9.0.3",
|
|
86
|
+
"ignore": "7.0.5",
|
|
87
|
+
"jiti": "2.7.0",
|
|
88
88
|
"linkedom": "^0.18.12",
|
|
89
|
-
"minimatch": "
|
|
89
|
+
"minimatch": "10.2.5",
|
|
90
90
|
"open": "^11.0.0",
|
|
91
91
|
"p-limit": "^7.3.0",
|
|
92
|
-
"proper-lockfile": "
|
|
93
|
-
"semver": "
|
|
92
|
+
"proper-lockfile": "4.1.2",
|
|
93
|
+
"semver": "7.8.0",
|
|
94
94
|
"turndown": "^7.2.0",
|
|
95
|
-
"typebox": "
|
|
96
|
-
"undici": "
|
|
95
|
+
"typebox": "1.1.38",
|
|
96
|
+
"undici": "8.5.0",
|
|
97
97
|
"unpdf": "^1.6.2",
|
|
98
|
-
"yaml": "
|
|
98
|
+
"yaml": "2.9.0",
|
|
99
99
|
"zod": "^3.25.0 || ^4.0.0"
|
|
100
100
|
},
|
|
101
101
|
"overrides": {
|
|
@@ -105,20 +105,20 @@
|
|
|
105
105
|
}
|
|
106
106
|
},
|
|
107
107
|
"optionalDependencies": {
|
|
108
|
-
"@mariozechner/clipboard": "
|
|
108
|
+
"@mariozechner/clipboard": "0.3.9"
|
|
109
109
|
},
|
|
110
110
|
"devDependencies": {
|
|
111
111
|
"@types/cross-spawn": "6.0.6",
|
|
112
|
-
"@types/diff": "
|
|
113
|
-
"@types/hosted-git-info": "
|
|
114
|
-
"@types/ms": "
|
|
115
|
-
"@types/node": "
|
|
116
|
-
"@types/proper-lockfile": "
|
|
117
|
-
"@types/semver": "
|
|
112
|
+
"@types/diff": "7.0.2",
|
|
113
|
+
"@types/hosted-git-info": "3.0.5",
|
|
114
|
+
"@types/ms": "2.1.0",
|
|
115
|
+
"@types/node": "24.12.4",
|
|
116
|
+
"@types/proper-lockfile": "4.1.4",
|
|
117
|
+
"@types/semver": "7.7.1",
|
|
118
118
|
"@typescript/native-preview": "7.0.0-dev.20260511.1",
|
|
119
|
-
"shx": "
|
|
120
|
-
"typescript": "
|
|
121
|
-
"vitest": "
|
|
119
|
+
"shx": "0.4.0",
|
|
120
|
+
"typescript": "5.9.3",
|
|
121
|
+
"vitest": "4.1.9"
|
|
122
122
|
},
|
|
123
123
|
"keywords": [
|
|
124
124
|
"coding-agent",
|
|
@@ -136,6 +136,6 @@
|
|
|
136
136
|
"directory": "packages/coding-agent"
|
|
137
137
|
},
|
|
138
138
|
"engines": {
|
|
139
|
-
"node": ">=
|
|
139
|
+
"node": ">=22.19.0"
|
|
140
140
|
}
|
|
141
141
|
}
|
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Shared prompt-refinement stage used by the ralph and goal workflows.
|
|
3
|
-
*
|
|
4
|
-
* Before the main work loop begins, both workflows run this single
|
|
5
|
-
* `prompt-refinement` stage. The stage uses the Workflow Best Practices prompt
|
|
6
|
-
* anatomy documented in `packages/coding-agent/docs/workflows.md` to sharpen the
|
|
7
|
-
* raw user request into a clearer, more actionable objective. The refined
|
|
8
|
-
* request replaces the original as the operative objective downstream; the
|
|
9
|
-
* original is preserved by each workflow for reporting.
|
|
10
|
-
*/
|
|
11
|
-
|
|
12
|
-
import type { WorkflowModelValue, WorkflowTaskOptions, WorkflowTaskResult } from "../src/shared/types.js";
|
|
13
|
-
|
|
14
|
-
export type PromptSection = readonly [tag: string, content: string];
|
|
15
|
-
|
|
16
|
-
/**
|
|
17
|
-
* Clarity rubric mirrored from the "## Workflow Best Practices" section of
|
|
18
|
-
* `docs/workflows.md` (the user-facing docs under packages/coding-agent/docs).
|
|
19
|
-
* The refinement stage makes each element explicit where it can be reasonably
|
|
20
|
-
* inferred from the raw request.
|
|
21
|
-
*/
|
|
22
|
-
export const PROMPT_REFINEMENT_CRITERIA = [
|
|
23
|
-
"Apply the workflow best practices documented in the `## Workflow Best Practices` section of `docs/workflows.md` to transform the raw request into a clear and verifiable objective. Treat that section as the authoritative prompt-anatomy rubric: use its Objective, Context, Scope, Non-goals, Done criteria, Validation command, Reporting requirements, and Stop conditions when refining the request.",
|
|
24
|
-
"Objective — state what should be true when the work is complete.",
|
|
25
|
-
"Context — note why it matters and where the relevant code or area likely lives.",
|
|
26
|
-
"Scope — state what is allowed to change (the smallest correct change).",
|
|
27
|
-
"Non-goals — state what to avoid (unrelated refactors, redesigns, or behavior changes outside this case).",
|
|
28
|
-
"Done criteria — list verifiable completion signals: new behavior works, existing behavior is unchanged, and the validation command passes.",
|
|
29
|
-
"Validation command — name the targeted check that proves the result.",
|
|
30
|
-
"Reporting requirements — changed files, validation results, and remaining risks must be reported.",
|
|
31
|
-
"Stop conditions — name the cases where the agent should stop and ask first (public API, security, data migration, etc.).",
|
|
32
|
-
].join("\n");
|
|
33
|
-
|
|
34
|
-
/**
|
|
35
|
-
* Build the prompt sent to the prompt-refinement stage. The refined request is
|
|
36
|
-
* returned verbatim (no fences or preamble) so it can replace the original
|
|
37
|
-
* request as the operative objective for the rest of the workflow.
|
|
38
|
-
*/
|
|
39
|
-
export function renderPromptRefinementPrompt(args: {
|
|
40
|
-
readonly request: string;
|
|
41
|
-
readonly workflowCwdContext?: PromptSection;
|
|
42
|
-
}): string {
|
|
43
|
-
const sections: readonly string[] = [
|
|
44
|
-
`Refine the following user request into a clear and verifiable objective. Improve clarity and completeness using the rubric below without changing the user's intent, expanding scope, or inventing requirements that cannot be reasonably inferred from the request.`,
|
|
45
|
-
`<original_request>\n${args.request}\n</original_request>`,
|
|
46
|
-
`<instructions>\n${PROMPT_REFINEMENT_CRITERIA}\n</instructions>`,
|
|
47
|
-
`<output_format>\nReturn ONLY the refined request. No preamble, no explanation, and no Markdown fences. The returned text replaces the original request as the operative objective for the rest of the workflow, so it must be a single self-contained request.\n</output_format>`,
|
|
48
|
-
];
|
|
49
|
-
const tail = args.workflowCwdContext === undefined
|
|
50
|
-
? []
|
|
51
|
-
: [`<${args.workflowCwdContext[0]}>\n${args.workflowCwdContext[1].trim()}\n</${args.workflowCwdContext[0]}>`];
|
|
52
|
-
return [...sections, ...tail].join("\n\n");
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
/** Minimal context surface required to run a tracked refinement stage. */
|
|
56
|
-
type PromptRefinementContext = {
|
|
57
|
-
task(name: string, options: WorkflowTaskOptions): Promise<WorkflowTaskResult>;
|
|
58
|
-
};
|
|
59
|
-
|
|
60
|
-
/** Model-chain + tool gating forwarded to the refinement stage session. */
|
|
61
|
-
export type PromptRefinementModelConfig = {
|
|
62
|
-
readonly model?: WorkflowModelValue;
|
|
63
|
-
readonly fallbackModels?: readonly string[];
|
|
64
|
-
readonly noTools?: "all" | "builtin";
|
|
65
|
-
readonly excludedTools?: readonly string[];
|
|
66
|
-
readonly tools?: readonly string[];
|
|
67
|
-
};
|
|
68
|
-
|
|
69
|
-
/**
|
|
70
|
-
* Run the shared `prompt-refinement` stage once and return the refined request.
|
|
71
|
-
* Falls back to the original request when the stage produces no usable text.
|
|
72
|
-
*/
|
|
73
|
-
export async function runPromptRefinementStage(
|
|
74
|
-
ctx: PromptRefinementContext,
|
|
75
|
-
options: {
|
|
76
|
-
readonly request: string;
|
|
77
|
-
readonly workflowCwdContext?: PromptSection;
|
|
78
|
-
readonly modelConfig: PromptRefinementModelConfig;
|
|
79
|
-
},
|
|
80
|
-
): Promise<string> {
|
|
81
|
-
const result = await ctx.task("prompt-refinement", {
|
|
82
|
-
prompt: renderPromptRefinementPrompt({
|
|
83
|
-
request: options.request,
|
|
84
|
-
...(options.workflowCwdContext === undefined ? {} : { workflowCwdContext: options.workflowCwdContext }),
|
|
85
|
-
}),
|
|
86
|
-
...options.modelConfig,
|
|
87
|
-
});
|
|
88
|
-
const refined = (result.text ?? "").trim();
|
|
89
|
-
return refined.length > 0 ? refined : options.request;
|
|
90
|
-
}
|