@gajae-code/coding-agent 0.4.2 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/dist/types/async/job-manager.d.ts +44 -1
- package/dist/types/cli/setup-cli.d.ts +14 -1
- package/dist/types/commands/coordinator.d.ts +19 -0
- package/dist/types/commands/mcp-serve.d.ts +24 -0
- package/dist/types/commands/setup.d.ts +41 -0
- package/dist/types/commit/model-selection.d.ts +1 -1
- package/dist/types/config/model-registry.d.ts +3 -1
- package/dist/types/config/model-resolver.d.ts +1 -19
- package/dist/types/config/models-config-schema.d.ts +12 -0
- package/dist/types/config/settings-schema.d.ts +15 -1
- package/dist/types/coordinator/contract.d.ts +4 -0
- package/dist/types/coordinator-mcp/policy.d.ts +24 -0
- package/dist/types/coordinator-mcp/safety.d.ts +26 -0
- package/dist/types/coordinator-mcp/server.d.ts +52 -0
- package/dist/types/extensibility/extensions/types.d.ts +13 -0
- package/dist/types/gjc-runtime/goal-mode-request.d.ts +8 -1
- package/dist/types/gjc-runtime/session-state-sidecar.d.ts +13 -0
- package/dist/types/harness-control-plane/types.d.ts +7 -2
- package/dist/types/modes/acp/acp-event-mapper.d.ts +2 -0
- package/dist/types/modes/components/custom-editor.d.ts +7 -0
- package/dist/types/modes/components/hook-selector.d.ts +11 -0
- package/dist/types/modes/shared/agent-wire/command-contract.d.ts +18 -0
- package/dist/types/modes/shared/agent-wire/event-contract.d.ts +84 -0
- package/dist/types/modes/shared/agent-wire/event-envelope.d.ts +14 -7
- package/dist/types/modes/shared/agent-wire/event-observation.d.ts +37 -0
- package/dist/types/modes/shared/agent-wire/protocol.d.ts +13 -34
- package/dist/types/session/agent-session.d.ts +12 -1
- package/dist/types/session/session-manager.d.ts +1 -1
- package/dist/types/setup/hermes-setup.d.ts +71 -0
- package/dist/types/task/render.d.ts +7 -1
- package/dist/types/tools/bash.d.ts +2 -0
- package/dist/types/tools/browser/actions.d.ts +54 -0
- package/dist/types/tools/browser.d.ts +80 -0
- package/dist/types/tools/image-gen.d.ts +1 -0
- package/dist/types/tools/index.d.ts +3 -1
- package/dist/types/tools/job.d.ts +1 -1
- package/dist/types/tools/subagent-render.d.ts +25 -0
- package/dist/types/tools/subagent.d.ts +5 -1
- package/package.json +7 -7
- package/src/async/job-manager.ts +163 -2
- package/src/cli/setup-cli.ts +86 -2
- package/src/cli.ts +2 -0
- package/src/commands/coordinator.ts +70 -0
- package/src/commands/mcp-serve.ts +62 -0
- package/src/commands/setup.ts +30 -1
- package/src/commands/ultragoal.ts +7 -1
- package/src/commit/agentic/index.ts +2 -2
- package/src/commit/model-selection.ts +7 -22
- package/src/commit/pipeline.ts +2 -2
- package/src/config/model-registry.ts +17 -9
- package/src/config/model-resolver.ts +14 -84
- package/src/config/models-config-schema.ts +2 -0
- package/src/config/settings-schema.ts +14 -1
- package/src/coordinator/contract.ts +20 -0
- package/src/coordinator-mcp/policy.ts +160 -0
- package/src/coordinator-mcp/safety.ts +80 -0
- package/src/coordinator-mcp/server.ts +1316 -0
- package/src/extensibility/extensions/types.ts +13 -0
- package/src/gjc-runtime/goal-mode-request.ts +21 -1
- package/src/gjc-runtime/session-state-sidecar.ts +79 -0
- package/src/harness-control-plane/owner.ts +3 -3
- package/src/harness-control-plane/rpc-adapter.ts +7 -1
- package/src/harness-control-plane/types.ts +8 -11
- package/src/internal-urls/docs-index.generated.ts +6 -5
- package/src/memories/index.ts +1 -1
- package/src/modes/acp/acp-agent.ts +17 -9
- package/src/modes/acp/acp-event-mapper.ts +33 -1
- package/src/modes/components/custom-editor.ts +19 -3
- package/src/modes/components/hook-selector.ts +109 -5
- package/src/modes/controllers/extension-ui-controller.ts +16 -1
- package/src/modes/controllers/input-controller.ts +27 -7
- package/src/modes/controllers/selector-controller.ts +7 -1
- package/src/modes/interactive-mode.ts +3 -1
- package/src/modes/rpc/rpc-client.ts +16 -3
- package/src/modes/rpc/rpc-mode.ts +5 -2
- package/src/modes/shared/agent-wire/command-contract.ts +18 -0
- package/src/modes/shared/agent-wire/event-contract.ts +147 -0
- package/src/modes/shared/agent-wire/event-envelope.ts +35 -16
- package/src/modes/shared/agent-wire/event-observation.ts +397 -0
- package/src/modes/shared/agent-wire/protocol.ts +24 -81
- package/src/modes/utils/context-usage.ts +2 -2
- package/src/prompts/agents/architect.md +6 -0
- package/src/prompts/agents/critic.md +6 -0
- package/src/prompts/agents/explore.md +1 -1
- package/src/prompts/agents/plan.md +1 -1
- package/src/prompts/agents/planner.md +8 -1
- package/src/prompts/agents/reviewer.md +1 -1
- package/src/prompts/tools/browser.md +3 -2
- package/src/runtime-mcp/manager.ts +15 -2
- package/src/sdk.ts +3 -1
- package/src/session/agent-session.ts +66 -4
- package/src/session/session-manager.ts +1 -1
- package/src/setup/hermes/templates/operator-instructions.v1.md +29 -0
- package/src/setup/hermes-setup.ts +429 -0
- package/src/task/agents.ts +1 -1
- package/src/task/index.ts +2 -0
- package/src/task/render.ts +14 -0
- package/src/tools/ask.ts +30 -10
- package/src/tools/bash.ts +6 -1
- package/src/tools/browser/actions.ts +189 -0
- package/src/tools/browser.ts +91 -1
- package/src/tools/image-gen.ts +42 -15
- package/src/tools/index.ts +7 -1
- package/src/tools/inspect-image.ts +10 -8
- package/src/tools/job.ts +12 -2
- package/src/tools/monitor.ts +98 -17
- package/src/tools/renderers.ts +2 -0
- package/src/tools/subagent-render.ts +160 -0
- package/src/tools/subagent.ts +49 -7
- package/src/utils/commit-message-generator.ts +6 -13
- package/src/utils/title-generator.ts +1 -1
- package/dist/types/harness-control-plane/frame-mapper.d.ts +0 -29
- package/src/harness-control-plane/frame-mapper.ts +0 -286
- package/src/priority.json +0 -37
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structured browser action space.
|
|
3
|
+
*
|
|
4
|
+
* Adapts the SOTA computer-use / browser-use pattern: instead of authoring raw
|
|
5
|
+
* JavaScript for every interaction, the model emits a list of structured verbs
|
|
6
|
+
* (navigate / click / type / …) that reference elements by the numeric `id`
|
|
7
|
+
* returned from {@link Observation}. Each verb is compiled onto the existing
|
|
8
|
+
* in-tab `tab.*` helpers and executed through the same worker `run` path, so the
|
|
9
|
+
* worker protocol is unchanged and the raw-JS `run` escape hatch still works.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
export type BrowserActionVerb =
|
|
13
|
+
| "navigate"
|
|
14
|
+
| "click"
|
|
15
|
+
| "type"
|
|
16
|
+
| "fill"
|
|
17
|
+
| "select"
|
|
18
|
+
| "press"
|
|
19
|
+
| "scroll"
|
|
20
|
+
| "back"
|
|
21
|
+
| "wait"
|
|
22
|
+
| "observe"
|
|
23
|
+
| "extract"
|
|
24
|
+
| "screenshot";
|
|
25
|
+
|
|
26
|
+
export interface BrowserActionStep {
|
|
27
|
+
verb: BrowserActionVerb;
|
|
28
|
+
/** Element id from a prior `observe` (preferred for click/type). */
|
|
29
|
+
id?: number;
|
|
30
|
+
/** CSS / puppeteer selector when not addressing by `id`. */
|
|
31
|
+
selector?: string;
|
|
32
|
+
/** Text to type. */
|
|
33
|
+
text?: string;
|
|
34
|
+
/** Value for `fill`. */
|
|
35
|
+
value?: string;
|
|
36
|
+
/** Option value(s) for `select`. */
|
|
37
|
+
values?: string[];
|
|
38
|
+
/** URL for `navigate`. */
|
|
39
|
+
url?: string;
|
|
40
|
+
/** Key for `press` (e.g. "Enter"). */
|
|
41
|
+
key?: string;
|
|
42
|
+
/** Horizontal scroll delta. */
|
|
43
|
+
dx?: number;
|
|
44
|
+
/** Vertical scroll delta. */
|
|
45
|
+
dy?: number;
|
|
46
|
+
/** Sleep duration for `wait` when no selector is given. */
|
|
47
|
+
ms?: number;
|
|
48
|
+
/** Extract format. */
|
|
49
|
+
format?: "markdown" | "text" | "html";
|
|
50
|
+
/** Navigation wait condition for `navigate`. */
|
|
51
|
+
wait_until?: "load" | "domcontentloaded" | "networkidle0" | "networkidle2";
|
|
52
|
+
/** Only return interactive/viewport elements for `observe`. */
|
|
53
|
+
viewport_only?: boolean;
|
|
54
|
+
include_all?: boolean;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const VERBS: ReadonlySet<BrowserActionVerb> = new Set([
|
|
58
|
+
"navigate",
|
|
59
|
+
"click",
|
|
60
|
+
"type",
|
|
61
|
+
"fill",
|
|
62
|
+
"select",
|
|
63
|
+
"press",
|
|
64
|
+
"scroll",
|
|
65
|
+
"back",
|
|
66
|
+
"wait",
|
|
67
|
+
"observe",
|
|
68
|
+
"extract",
|
|
69
|
+
"screenshot",
|
|
70
|
+
]);
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Validate a single step's required fields. Returns an error string, or
|
|
74
|
+
* `undefined` when the step is well-formed.
|
|
75
|
+
*/
|
|
76
|
+
export function validateActionStep(step: BrowserActionStep, index: number): string | undefined {
|
|
77
|
+
const where = `actions[${index}] (${step.verb})`;
|
|
78
|
+
if (!VERBS.has(step.verb)) return `${where}: unknown verb`;
|
|
79
|
+
switch (step.verb) {
|
|
80
|
+
case "navigate":
|
|
81
|
+
if (!step.url?.trim()) return `${where}: 'url' is required`;
|
|
82
|
+
return undefined;
|
|
83
|
+
case "click":
|
|
84
|
+
if (step.id === undefined && !step.selector?.trim()) return `${where}: 'id' or 'selector' is required`;
|
|
85
|
+
return undefined;
|
|
86
|
+
case "type":
|
|
87
|
+
if (step.id === undefined && !step.selector?.trim()) return `${where}: 'id' or 'selector' is required`;
|
|
88
|
+
if (step.text === undefined) return `${where}: 'text' is required`;
|
|
89
|
+
return undefined;
|
|
90
|
+
case "fill":
|
|
91
|
+
if (!step.selector?.trim()) return `${where}: 'selector' is required`;
|
|
92
|
+
if (step.value === undefined) return `${where}: 'value' is required`;
|
|
93
|
+
return undefined;
|
|
94
|
+
case "select":
|
|
95
|
+
if (!step.selector?.trim()) return `${where}: 'selector' is required`;
|
|
96
|
+
if (!step.values?.length) return `${where}: 'values' is required`;
|
|
97
|
+
return undefined;
|
|
98
|
+
case "press":
|
|
99
|
+
if (!step.key?.trim()) return `${where}: 'key' is required`;
|
|
100
|
+
return undefined;
|
|
101
|
+
case "scroll":
|
|
102
|
+
if (step.dx === undefined && step.dy === undefined) return `${where}: 'dx' or 'dy' is required`;
|
|
103
|
+
return undefined;
|
|
104
|
+
case "wait":
|
|
105
|
+
if (!step.selector?.trim() && step.ms === undefined) return `${where}: 'selector' or 'ms' is required`;
|
|
106
|
+
return undefined;
|
|
107
|
+
default:
|
|
108
|
+
// back / observe / extract / screenshot take no required fields
|
|
109
|
+
return undefined;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/** Validate the full step list. Throws on the first invalid step. */
|
|
114
|
+
export function validateActionSteps(steps: readonly BrowserActionStep[]): void {
|
|
115
|
+
if (steps.length === 0) throw new Error("browser 'act' requires a non-empty 'actions' list");
|
|
116
|
+
for (let i = 0; i < steps.length; i += 1) {
|
|
117
|
+
const error = validateActionStep(steps[i]!, i);
|
|
118
|
+
if (error) throw new Error(error);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Compile structured steps into a JS program for the in-tab `run` worker. Steps
|
|
124
|
+
* are embedded as parsed JSON (no string interpolation, so values cannot inject
|
|
125
|
+
* code) and dispatched by a fixed interpreter against the `tab` / `page` helpers.
|
|
126
|
+
*/
|
|
127
|
+
export function compileActionSteps(steps: readonly BrowserActionStep[]): string {
|
|
128
|
+
validateActionSteps(steps);
|
|
129
|
+
const stepsLiteral = JSON.stringify(JSON.stringify(steps));
|
|
130
|
+
return `
|
|
131
|
+
const __steps = JSON.parse(${stepsLiteral});
|
|
132
|
+
const __results = [];
|
|
133
|
+
for (const s of __steps) {
|
|
134
|
+
switch (s.verb) {
|
|
135
|
+
case "navigate":
|
|
136
|
+
await tab.goto(s.url, s.wait_until ? { waitUntil: s.wait_until } : undefined);
|
|
137
|
+
__results.push({ verb: "navigate", url: s.url });
|
|
138
|
+
break;
|
|
139
|
+
case "click":
|
|
140
|
+
if (s.id !== undefined && s.id !== null) { await (await tab.id(s.id)).click(); }
|
|
141
|
+
else { await tab.click(s.selector); }
|
|
142
|
+
__results.push({ verb: "click", id: s.id ?? null, selector: s.selector ?? null });
|
|
143
|
+
break;
|
|
144
|
+
case "type":
|
|
145
|
+
if (s.id !== undefined && s.id !== null) { await (await tab.id(s.id)).type(s.text); }
|
|
146
|
+
else { await tab.type(s.selector, s.text); }
|
|
147
|
+
__results.push({ verb: "type", id: s.id ?? null, selector: s.selector ?? null });
|
|
148
|
+
break;
|
|
149
|
+
case "fill":
|
|
150
|
+
await tab.fill(s.selector, s.value);
|
|
151
|
+
__results.push({ verb: "fill", selector: s.selector });
|
|
152
|
+
break;
|
|
153
|
+
case "select":
|
|
154
|
+
__results.push({ verb: "select", selected: await tab.select(s.selector, ...(s.values || [])) });
|
|
155
|
+
break;
|
|
156
|
+
case "press":
|
|
157
|
+
await tab.press(s.key, s.selector ? { selector: s.selector } : undefined);
|
|
158
|
+
__results.push({ verb: "press", key: s.key });
|
|
159
|
+
break;
|
|
160
|
+
case "scroll":
|
|
161
|
+
await tab.scroll(s.dx || 0, s.dy || 0);
|
|
162
|
+
__results.push({ verb: "scroll", dx: s.dx || 0, dy: s.dy || 0 });
|
|
163
|
+
break;
|
|
164
|
+
case "back":
|
|
165
|
+
await page.goBack();
|
|
166
|
+
__results.push({ verb: "back" });
|
|
167
|
+
break;
|
|
168
|
+
case "wait":
|
|
169
|
+
if (s.selector) { await tab.waitFor(s.selector); }
|
|
170
|
+
else { await new Promise(r => setTimeout(r, s.ms)); }
|
|
171
|
+
__results.push({ verb: "wait", selector: s.selector ?? null, ms: s.ms ?? null });
|
|
172
|
+
break;
|
|
173
|
+
case "observe":
|
|
174
|
+
__results.push({ verb: "observe", observation: await tab.observe({ viewportOnly: s.viewport_only === true, includeAll: s.include_all === true }) });
|
|
175
|
+
break;
|
|
176
|
+
case "extract":
|
|
177
|
+
__results.push({ verb: "extract", content: await tab.extract(s.format || "markdown") });
|
|
178
|
+
break;
|
|
179
|
+
case "screenshot":
|
|
180
|
+
await tab.screenshot({});
|
|
181
|
+
__results.push({ verb: "screenshot" });
|
|
182
|
+
break;
|
|
183
|
+
default:
|
|
184
|
+
throw new Error("Unknown browser action verb: " + s.verb);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
return __results;
|
|
188
|
+
`;
|
|
189
|
+
}
|
package/src/tools/browser.ts
CHANGED
|
@@ -3,6 +3,7 @@ import { prompt, untilAborted } from "@gajae-code/utils";
|
|
|
3
3
|
import * as z from "zod/v4";
|
|
4
4
|
import browserDescription from "../prompts/tools/browser.md" with { type: "text" };
|
|
5
5
|
import type { ToolSession } from "../sdk";
|
|
6
|
+
import { type BrowserActionStep, compileActionSteps } from "./browser/actions";
|
|
6
7
|
import { acquireBrowser, type BrowserHandle, type BrowserKind, type BrowserKindTag } from "./browser/registry";
|
|
7
8
|
import type { Observation, ScreenshotResult } from "./browser/tab-protocol";
|
|
8
9
|
import { acquireTab, dropHeadlessTabs, getTab, releaseAllTabs, releaseTab, runInTab } from "./browser/tab-supervisor";
|
|
@@ -24,8 +25,44 @@ const appSchema = z.object({
|
|
|
24
25
|
target: z.string().describe("substring to pick a window").optional(),
|
|
25
26
|
});
|
|
26
27
|
|
|
28
|
+
const actionStepSchema = z.object({
|
|
29
|
+
verb: z
|
|
30
|
+
.enum([
|
|
31
|
+
"navigate",
|
|
32
|
+
"click",
|
|
33
|
+
"type",
|
|
34
|
+
"fill",
|
|
35
|
+
"select",
|
|
36
|
+
"press",
|
|
37
|
+
"scroll",
|
|
38
|
+
"back",
|
|
39
|
+
"wait",
|
|
40
|
+
"observe",
|
|
41
|
+
"extract",
|
|
42
|
+
"screenshot",
|
|
43
|
+
])
|
|
44
|
+
.describe("structured action verb"),
|
|
45
|
+
id: z.number().describe("element id from a prior observe").optional(),
|
|
46
|
+
selector: z.string().describe("css/puppeteer selector").optional(),
|
|
47
|
+
text: z.string().describe("text to type").optional(),
|
|
48
|
+
value: z.string().describe("value for fill").optional(),
|
|
49
|
+
values: z.array(z.string()).describe("option value(s) for select").optional(),
|
|
50
|
+
url: z.string().describe("url for navigate").optional(),
|
|
51
|
+
key: z.string().describe("key for press, e.g. Enter").optional(),
|
|
52
|
+
dx: z.number().describe("horizontal scroll delta").optional(),
|
|
53
|
+
dy: z.number().describe("vertical scroll delta").optional(),
|
|
54
|
+
ms: z.number().describe("sleep ms for wait without selector").optional(),
|
|
55
|
+
format: z.enum(["markdown", "text", "html"]).describe("extract format").optional(),
|
|
56
|
+
wait_until: z
|
|
57
|
+
.enum(["load", "domcontentloaded", "networkidle0", "networkidle2"])
|
|
58
|
+
.describe("navigation wait condition for navigate")
|
|
59
|
+
.optional(),
|
|
60
|
+
viewport_only: z.boolean().describe("observe: only viewport elements").optional(),
|
|
61
|
+
include_all: z.boolean().describe("observe: include non-interactive elements").optional(),
|
|
62
|
+
});
|
|
63
|
+
|
|
27
64
|
const browserSchema = z.object({
|
|
28
|
-
action: z.enum(["open", "close", "run"] as const).describe("operation"),
|
|
65
|
+
action: z.enum(["open", "close", "run", "act"] as const).describe("operation"),
|
|
29
66
|
name: z.string().describe("tab id (default 'main')").optional(),
|
|
30
67
|
url: z.string().describe("url to open").optional(),
|
|
31
68
|
app: appSchema.optional(),
|
|
@@ -45,6 +82,7 @@ const browserSchema = z.object({
|
|
|
45
82
|
.describe("auto-handle dialogs")
|
|
46
83
|
.optional(),
|
|
47
84
|
code: z.string().describe("js body to run in tab").optional(),
|
|
85
|
+
actions: z.array(actionStepSchema).describe("structured action steps for action 'act'").optional(),
|
|
48
86
|
timeout: z.number().default(30).describe("timeout in seconds (default 30, max 300)").optional(),
|
|
49
87
|
all: z.boolean().describe("close every tab").optional(),
|
|
50
88
|
kill: z.boolean().describe("also kill spawned-app browsers").optional(),
|
|
@@ -126,6 +164,8 @@ export class BrowserTool implements AgentTool<typeof browserSchema, BrowserToolD
|
|
|
126
164
|
return await this.#close(name, params, details, signal);
|
|
127
165
|
case "run":
|
|
128
166
|
return await this.#run(name, params, details, timeoutMs, signal);
|
|
167
|
+
case "act":
|
|
168
|
+
return await this.#act(name, params, details, timeoutMs, signal);
|
|
129
169
|
default:
|
|
130
170
|
throw new ToolError(`Unsupported action: ${(params as BrowserParams).action}`);
|
|
131
171
|
}
|
|
@@ -259,6 +299,56 @@ export class BrowserTool implements AgentTool<typeof browserSchema, BrowserToolD
|
|
|
259
299
|
details.result = textOnly;
|
|
260
300
|
return toolResult(details).content(content).done();
|
|
261
301
|
}
|
|
302
|
+
|
|
303
|
+
async #act(
|
|
304
|
+
name: string,
|
|
305
|
+
params: BrowserParams,
|
|
306
|
+
details: BrowserToolDetails,
|
|
307
|
+
timeoutMs: number,
|
|
308
|
+
signal?: AbortSignal,
|
|
309
|
+
): Promise<AgentToolResult<BrowserToolDetails>> {
|
|
310
|
+
const steps = (params.actions ?? []) as BrowserActionStep[];
|
|
311
|
+
if (steps.length === 0) {
|
|
312
|
+
throw new ToolError("Missing required parameter 'actions' for action 'act'.");
|
|
313
|
+
}
|
|
314
|
+
const tab = getTab(name);
|
|
315
|
+
if (!tab) {
|
|
316
|
+
throw new ToolError(`No tab named ${JSON.stringify(name)}. Open it first with action 'open'.`);
|
|
317
|
+
}
|
|
318
|
+
details.browser = tab.browser.kind.kind;
|
|
319
|
+
details.url = tab.info.url;
|
|
320
|
+
|
|
321
|
+
// compileActionSteps validates each step and produces injection-safe code
|
|
322
|
+
// (steps embedded as parsed JSON) for the existing in-tab run worker.
|
|
323
|
+
let code: string;
|
|
324
|
+
try {
|
|
325
|
+
code = compileActionSteps(steps);
|
|
326
|
+
} catch (error) {
|
|
327
|
+
throw new ToolError(error instanceof Error ? error.message : String(error));
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
const { displays, returnValue, screenshots } = await runInTab(name, {
|
|
331
|
+
code,
|
|
332
|
+
timeoutMs,
|
|
333
|
+
signal,
|
|
334
|
+
session: this.session,
|
|
335
|
+
});
|
|
336
|
+
|
|
337
|
+
if (screenshots.length) details.screenshots = screenshots;
|
|
338
|
+
const content = [...displays];
|
|
339
|
+
if (returnValue !== undefined) {
|
|
340
|
+
content.push({ type: "text", text: stringifyReturnValue(returnValue) });
|
|
341
|
+
}
|
|
342
|
+
if (!content.length) {
|
|
343
|
+
content.push({ type: "text", text: `Ran ${steps.length} action(s) on tab ${JSON.stringify(name)}` });
|
|
344
|
+
}
|
|
345
|
+
const textOnly = content
|
|
346
|
+
.filter((c): c is { type: "text"; text: string } => c.type === "text")
|
|
347
|
+
.map(c => c.text)
|
|
348
|
+
.join("\n");
|
|
349
|
+
details.result = textOnly;
|
|
350
|
+
return toolResult(details).content(content).done();
|
|
351
|
+
}
|
|
262
352
|
}
|
|
263
353
|
|
|
264
354
|
function describeBrowser(handle: BrowserHandle): string {
|
package/src/tools/image-gen.ts
CHANGED
|
@@ -401,23 +401,39 @@ export function setPreferredImageProvider(provider: ImageProvider | "auto"): voi
|
|
|
401
401
|
|
|
402
402
|
interface ParsedAntigravityCredentials {
|
|
403
403
|
accessToken: string;
|
|
404
|
-
projectId
|
|
404
|
+
projectId?: string;
|
|
405
405
|
}
|
|
406
406
|
|
|
407
407
|
function parseAntigravityCredentials(raw: string): ParsedAntigravityCredentials | null {
|
|
408
408
|
try {
|
|
409
|
-
const parsed = JSON.parse(raw) as { token?: string; projectId?: string };
|
|
410
|
-
|
|
411
|
-
|
|
409
|
+
const parsed = JSON.parse(raw) as { token?: string; accessToken?: string; projectId?: string };
|
|
410
|
+
const token = parsed.token ?? parsed.accessToken;
|
|
411
|
+
if (typeof token === "string" && token.trim().length > 0) {
|
|
412
|
+
return { accessToken: token.trim(), projectId: parsed.projectId };
|
|
412
413
|
}
|
|
414
|
+
// Parsed as JSON but no usable token field.
|
|
415
|
+
return null;
|
|
413
416
|
} catch {
|
|
414
|
-
//
|
|
417
|
+
// Not JSON: treat the value as a raw bearer token.
|
|
415
418
|
}
|
|
416
|
-
|
|
419
|
+
const rawToken = raw.trim();
|
|
420
|
+
return rawToken.length > 0 ? { accessToken: rawToken } : null;
|
|
417
421
|
}
|
|
418
422
|
|
|
419
|
-
async function findAntigravityCredentials(
|
|
420
|
-
|
|
423
|
+
async function findAntigravityCredentials(
|
|
424
|
+
modelRegistry: ModelRegistry,
|
|
425
|
+
sessionId?: string,
|
|
426
|
+
): Promise<ImageApiKey | null> {
|
|
427
|
+
const oauthAccess = await modelRegistry.authStorage.getOAuthAccess("google-antigravity", sessionId);
|
|
428
|
+
if (oauthAccess?.accessToken) {
|
|
429
|
+
return {
|
|
430
|
+
provider: "antigravity",
|
|
431
|
+
apiKey: oauthAccess.accessToken,
|
|
432
|
+
projectId: oauthAccess.projectId,
|
|
433
|
+
};
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
const apiKey = await modelRegistry.getApiKeyForProvider("google-antigravity", sessionId);
|
|
421
437
|
if (!apiKey) return null;
|
|
422
438
|
|
|
423
439
|
const parsed = parseAntigravityCredentials(apiKey);
|
|
@@ -457,7 +473,7 @@ async function findImageApiKey(
|
|
|
457
473
|
if (openAI) return openAI;
|
|
458
474
|
// Fall through to auto-detect if preferred provider key not found.
|
|
459
475
|
} else if (preferredImageProvider === "antigravity" && modelRegistry) {
|
|
460
|
-
const antigravity = await findAntigravityCredentials(modelRegistry);
|
|
476
|
+
const antigravity = await findAntigravityCredentials(modelRegistry, sessionId);
|
|
461
477
|
if (antigravity) return antigravity;
|
|
462
478
|
// Fall through to auto-detect if preferred provider key not found.
|
|
463
479
|
} else if (preferredImageProvider === "gemini") {
|
|
@@ -477,7 +493,7 @@ async function findImageApiKey(
|
|
|
477
493
|
if (openAI) return openAI;
|
|
478
494
|
|
|
479
495
|
if (modelRegistry) {
|
|
480
|
-
const antigravity = await findAntigravityCredentials(modelRegistry);
|
|
496
|
+
const antigravity = await findAntigravityCredentials(modelRegistry, sessionId);
|
|
481
497
|
if (antigravity) return antigravity;
|
|
482
498
|
}
|
|
483
499
|
|
|
@@ -589,12 +605,21 @@ function collectInlineImages(parts: GeminiPart[]): InlineImageData[] {
|
|
|
589
605
|
return images;
|
|
590
606
|
}
|
|
591
607
|
|
|
592
|
-
function isOpenAIHostedImageModel(model: Model | undefined): model is Model {
|
|
608
|
+
export function isOpenAIHostedImageModel(model: Model | undefined): model is Model {
|
|
593
609
|
if (!model) return false;
|
|
594
|
-
|
|
610
|
+
// The hosted image_generation tool is only available over the Responses API.
|
|
595
611
|
if (model.api !== "openai-responses" && model.api !== "openai-codex-responses") return false;
|
|
596
|
-
|
|
597
|
-
|
|
612
|
+
// Declarative capability: any provider (e.g. an OpenAI-compatible proxy
|
|
613
|
+
// fronting gpt-image) whose model advertises image output can drive
|
|
614
|
+
// generate_image, routed to the model's own baseUrl with registry auth.
|
|
615
|
+
if (model.output?.includes("image")) return true;
|
|
616
|
+
// First-party heuristic: OpenAI/OpenAI code GPT and o3 models generate
|
|
617
|
+
// images inline through the hosted tool without a declared output modality.
|
|
618
|
+
if (model.provider === "openai" || model.provider === "openai-codex") {
|
|
619
|
+
const modelId = model.id.toLowerCase();
|
|
620
|
+
return modelId.startsWith("gpt-") || modelId === "o3" || modelId.startsWith("o3-");
|
|
621
|
+
}
|
|
622
|
+
return false;
|
|
598
623
|
}
|
|
599
624
|
|
|
600
625
|
function getOpenAIHostedImageProvider(model: Model): ImageProvider {
|
|
@@ -995,7 +1020,9 @@ export const imageGenTool: CustomTool<typeof imageGenSchema, ImageGenToolDetails
|
|
|
995
1020
|
|
|
996
1021
|
if (provider === "antigravity") {
|
|
997
1022
|
if (!apiKey.projectId) {
|
|
998
|
-
throw new Error(
|
|
1023
|
+
throw new Error(
|
|
1024
|
+
"Antigravity image generation requires a projectId, but the stored google-antigravity credential only contains an access token. Run the google-antigravity login flow again so the projectId is stored, then retry.",
|
|
1025
|
+
);
|
|
999
1026
|
}
|
|
1000
1027
|
|
|
1001
1028
|
const prompt = assemblePrompt(params);
|
package/src/tools/index.ts
CHANGED
|
@@ -13,7 +13,11 @@ import { LspTool } from "../lsp";
|
|
|
13
13
|
import type { WorkflowGateEmitter } from "../modes/shared/agent-wire/unattended-session";
|
|
14
14
|
import type { PlanModeState } from "../plan-mode/state";
|
|
15
15
|
import type { AgentRegistry } from "../registry/agent-registry";
|
|
16
|
-
import type {
|
|
16
|
+
import type {
|
|
17
|
+
ForkContextSeed,
|
|
18
|
+
ForkContextSeedOptions,
|
|
19
|
+
PurgeQueuedCustomMessagesResult,
|
|
20
|
+
} from "../session/agent-session";
|
|
17
21
|
import type { ArtifactManager } from "../session/artifacts";
|
|
18
22
|
import type { ClientBridge } from "../session/client-bridge";
|
|
19
23
|
import type { CustomMessage } from "../session/messages";
|
|
@@ -162,6 +166,8 @@ export interface ToolSession {
|
|
|
162
166
|
/** Agent identity used for IRC routing. Returns the registry id (e.g. "0-Main", "0-AuthLoader"). */
|
|
163
167
|
getAgentId?: () => string | null;
|
|
164
168
|
/** Look up a registered tool by name (used by the eval js backend's tool bridge). */
|
|
169
|
+
/** Purge undelivered queued custom messages matching the predicate. Returns counts. */
|
|
170
|
+
purgeQueuedCustomMessages?: (predicate: (message: CustomMessage) => boolean) => PurgeQueuedCustomMessagesResult;
|
|
165
171
|
getToolByName?: (name: string) => AgentTool | undefined;
|
|
166
172
|
/** Agent registry for IRC routing across live sessions. */
|
|
167
173
|
agentRegistry?: AgentRegistry;
|
|
@@ -78,19 +78,21 @@ export class InspectImageTool implements AgentTool<typeof inspectImageSchema, In
|
|
|
78
78
|
};
|
|
79
79
|
|
|
80
80
|
const activeModelPattern = this.session.getActiveModelString?.() ?? this.session.getModelString?.();
|
|
81
|
-
|
|
82
|
-
resolvePattern("pi/vision") ??
|
|
83
|
-
resolvePattern("pi/default") ??
|
|
84
|
-
resolvePattern(activeModelPattern) ??
|
|
85
|
-
availableModels[0];
|
|
81
|
+
let model = resolvePattern("pi/default") ?? resolvePattern(activeModelPattern) ?? availableModels[0];
|
|
86
82
|
if (!model) {
|
|
87
83
|
throw new ToolError("Unable to resolve a model for inspect_image.");
|
|
88
84
|
}
|
|
89
85
|
|
|
86
|
+
// inspect_image requires image input; if the resolved model is text-only,
|
|
87
|
+
// fall back to any available vision-capable model before failing.
|
|
90
88
|
if (!model.input.includes("image")) {
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
89
|
+
const visionModel = availableModels.find(candidate => candidate.input.includes("image"));
|
|
90
|
+
if (!visionModel) {
|
|
91
|
+
throw new ToolError(
|
|
92
|
+
`Resolved model ${model.provider}/${model.id} does not support image input, and no vision-capable model is available. Configure a vision-capable model.`,
|
|
93
|
+
);
|
|
94
|
+
}
|
|
95
|
+
model = visionModel;
|
|
94
96
|
}
|
|
95
97
|
|
|
96
98
|
const apiKey = await modelRegistry.getApiKey(model);
|
package/src/tools/job.ts
CHANGED
|
@@ -52,7 +52,7 @@ interface JobSnapshot {
|
|
|
52
52
|
errorText?: string;
|
|
53
53
|
}
|
|
54
54
|
|
|
55
|
-
type CancelStatus = "cancelled" | "not_found" | "already_completed";
|
|
55
|
+
type CancelStatus = "cancelled" | "not_found" | "already_completed" | "already_cancelled";
|
|
56
56
|
|
|
57
57
|
interface CancelOutcome {
|
|
58
58
|
id: string;
|
|
@@ -115,10 +115,20 @@ export class JobTool implements AgentTool<typeof jobSchema, JobToolDetails> {
|
|
|
115
115
|
for (const id of cancelIds) {
|
|
116
116
|
const existing = manager.getJob(id);
|
|
117
117
|
if (!existing || (ownerId && existing.ownerId !== ownerId)) {
|
|
118
|
-
|
|
118
|
+
const tombstone = manager.purgeMonitorTombstone(id, ownerFilter);
|
|
119
|
+
cancelOutcomes.push(
|
|
120
|
+
tombstone.found
|
|
121
|
+
? {
|
|
122
|
+
id,
|
|
123
|
+
status: tombstone.status === "cancelled" ? "already_cancelled" : "already_completed",
|
|
124
|
+
message: `Monitor job ${id} already gone; purged queued notifications.`,
|
|
125
|
+
}
|
|
126
|
+
: { id, status: "not_found", message: `Background job not found: ${id}` },
|
|
127
|
+
);
|
|
119
128
|
continue;
|
|
120
129
|
}
|
|
121
130
|
if (existing.status !== "running") {
|
|
131
|
+
if (existing.metadata?.monitor) manager.purgeMonitorTombstone(id, ownerFilter);
|
|
122
132
|
cancelOutcomes.push({
|
|
123
133
|
id,
|
|
124
134
|
status: "already_completed",
|
package/src/tools/monitor.ts
CHANGED
|
@@ -47,6 +47,7 @@ export interface MonitorToolDetails {
|
|
|
47
47
|
}
|
|
48
48
|
|
|
49
49
|
const MONITOR_LABEL_MAX = 120;
|
|
50
|
+
const MAX_PENDING_MONITOR_NOTIFICATIONS = 3;
|
|
50
51
|
|
|
51
52
|
function buildMonitorLabel(params: MonitorParams): string {
|
|
52
53
|
const base = `[monitor:${params.kind}] ${params.description}`;
|
|
@@ -89,36 +90,116 @@ export class MonitorTool implements AgentTool<typeof monitorSchema, MonitorToolD
|
|
|
89
90
|
const ownerId = this.session.getAgentId?.() ?? undefined;
|
|
90
91
|
const bash = new BashTool(this.session);
|
|
91
92
|
let deliveredFirstLine = false;
|
|
93
|
+
const controller = { closed: false };
|
|
94
|
+
let currentJobId = "";
|
|
95
|
+
let sequence = 0;
|
|
96
|
+
let latestLine: string | undefined;
|
|
97
|
+
let coalescedCount = 0;
|
|
98
|
+
let flushScheduled = false;
|
|
99
|
+
// Count of notification *sends* (not live queue depth): once it exceeds the
|
|
100
|
+
// cap, each new send first purges older queued notifications for this task,
|
|
101
|
+
// keeping the queue bounded and latest-biased.
|
|
102
|
+
let pendingNotifications = 0;
|
|
103
|
+
const isMonitorMessage = (message: { customType?: string; details?: unknown }) =>
|
|
104
|
+
message.customType === "task-notification" &&
|
|
105
|
+
(message.details as { taskId?: string } | undefined)?.taskId === currentJobId;
|
|
106
|
+
const flushLatest = () => {
|
|
107
|
+
if (!persistent || latestLine === undefined) return;
|
|
108
|
+
const line = latestLine;
|
|
109
|
+
const count = coalescedCount;
|
|
110
|
+
latestLine = undefined;
|
|
111
|
+
coalescedCount = 0;
|
|
112
|
+
flushScheduled = false;
|
|
113
|
+
sendNotification(line, currentJobId, count);
|
|
114
|
+
};
|
|
115
|
+
const closeMonitor = (mode: "purge" | "flush") => {
|
|
116
|
+
// "flush" (natural process exit): deliver the newest pending line so the
|
|
117
|
+
// final state is never lost, then stop. "purge" (explicit cancel / registry
|
|
118
|
+
// eviction): drop the queued backlog. Non-persistent monitors keep their one
|
|
119
|
+
// notification, so they never purge.
|
|
120
|
+
if (mode === "flush") {
|
|
121
|
+
flushLatest();
|
|
122
|
+
controller.closed = true;
|
|
123
|
+
return;
|
|
124
|
+
}
|
|
125
|
+
controller.closed = true;
|
|
126
|
+
if (!persistent) return;
|
|
127
|
+
return this.session.purgeQueuedCustomMessages?.(isMonitorMessage);
|
|
128
|
+
};
|
|
129
|
+
const sendNotification = (line: string, jobId: string, count: number) => {
|
|
130
|
+
if (controller.closed) return;
|
|
131
|
+
const notificationId = `${jobId}:${sequence}`;
|
|
132
|
+
const suffix = count > 0 ? `\n(+${count} earlier lines)` : "";
|
|
133
|
+
const content = `<task-notification>\nMonitor task ${jobId} (${params.kind}: ${params.description}) emitted latest state:\n${line}${suffix}\n</task-notification>`;
|
|
134
|
+
const details = {
|
|
135
|
+
taskId: jobId,
|
|
136
|
+
kind: params.kind,
|
|
137
|
+
description: params.description,
|
|
138
|
+
monitor: true,
|
|
139
|
+
notificationId,
|
|
140
|
+
sequence,
|
|
141
|
+
coalescedCount: count,
|
|
142
|
+
};
|
|
143
|
+
pendingNotifications += 1;
|
|
144
|
+
if (pendingNotifications > MAX_PENDING_MONITOR_NOTIFICATIONS) {
|
|
145
|
+
this.session.purgeQueuedCustomMessages?.(
|
|
146
|
+
m =>
|
|
147
|
+
m.customType === "task-notification" &&
|
|
148
|
+
(m.details as { taskId?: string; notificationId?: string } | undefined)?.taskId === jobId &&
|
|
149
|
+
(m.details as { notificationId?: string } | undefined)?.notificationId !== notificationId,
|
|
150
|
+
);
|
|
151
|
+
pendingNotifications = MAX_PENDING_MONITOR_NOTIFICATIONS;
|
|
152
|
+
}
|
|
153
|
+
const sendPromise = this.session.sendCustomMessage?.(
|
|
154
|
+
{ customType: "task-notification", content, display: false, attribution: "agent", details },
|
|
155
|
+
{ triggerTurn: true, deliverAs: "followUp" },
|
|
156
|
+
);
|
|
157
|
+
if (sendPromise) {
|
|
158
|
+
void sendPromise.catch(error => {
|
|
159
|
+
logger.warn("Monitor task-notification delivery failed", {
|
|
160
|
+
error: error instanceof Error ? error.message : String(error),
|
|
161
|
+
});
|
|
162
|
+
});
|
|
163
|
+
} else {
|
|
164
|
+
this.session.steer?.({ customType: "task-notification", content, details });
|
|
165
|
+
}
|
|
166
|
+
};
|
|
167
|
+
const schedulePersistentNotification = (line: string) => {
|
|
168
|
+
latestLine = line;
|
|
169
|
+
sequence += 1;
|
|
170
|
+
coalescedCount += flushScheduled ? 1 : 0;
|
|
171
|
+
if (flushScheduled) return;
|
|
172
|
+
flushScheduled = true;
|
|
173
|
+
queueMicrotask(flushLatest);
|
|
174
|
+
};
|
|
92
175
|
const monitorJob = await bash.startMonitorJob(
|
|
93
176
|
{ command: params.command, timeout: params.timeout },
|
|
94
177
|
{
|
|
95
178
|
ownerId,
|
|
96
179
|
label,
|
|
97
180
|
ctx: context,
|
|
181
|
+
shouldAcceptRawLine: () => !controller.closed,
|
|
182
|
+
lifecycle: {
|
|
183
|
+
onCancel: () => closeMonitor("purge"),
|
|
184
|
+
onTerminal: () => closeMonitor("flush"),
|
|
185
|
+
onEvict: () => closeMonitor("purge"),
|
|
186
|
+
onTombstonePurge: () => closeMonitor("purge"),
|
|
187
|
+
},
|
|
98
188
|
onRawLine: (line, jobId) => {
|
|
189
|
+
if (controller.closed) return;
|
|
190
|
+
currentJobId = jobId;
|
|
99
191
|
if (!persistent && deliveredFirstLine) return;
|
|
100
192
|
deliveredFirstLine = true;
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
{ customType: "task-notification", content, display: false, attribution: "agent", details },
|
|
105
|
-
{ triggerTurn: true, deliverAs: "followUp" },
|
|
106
|
-
);
|
|
107
|
-
if (sendPromise) {
|
|
108
|
-
void sendPromise.catch(error => {
|
|
109
|
-
logger.warn("Monitor task-notification delivery failed", {
|
|
110
|
-
error: error instanceof Error ? error.message : String(error),
|
|
111
|
-
});
|
|
112
|
-
});
|
|
113
|
-
} else {
|
|
114
|
-
this.session.steer?.({ customType: "task-notification", content, details });
|
|
115
|
-
}
|
|
116
|
-
if (!persistent) {
|
|
117
|
-
manager.cancel(jobId, ownerId ? { ownerId } : undefined);
|
|
193
|
+
if (persistent) {
|
|
194
|
+
schedulePersistentNotification(line);
|
|
195
|
+
return;
|
|
118
196
|
}
|
|
197
|
+
sendNotification(line, jobId, 0);
|
|
198
|
+
manager.cancel(jobId, ownerId ? { ownerId } : undefined);
|
|
119
199
|
},
|
|
120
200
|
},
|
|
121
201
|
);
|
|
202
|
+
currentJobId = monitorJob.jobId;
|
|
122
203
|
|
|
123
204
|
const startedText = `Monitor started · task ${monitorJob.jobId} · persistent: ${persistent}`;
|
|
124
205
|
|
package/src/tools/renderers.ts
CHANGED
|
@@ -29,6 +29,7 @@ import { resolveToolRenderer } from "./resolve";
|
|
|
29
29
|
import { searchToolRenderer } from "./search";
|
|
30
30
|
import { searchToolBm25Renderer } from "./search-tool-bm25";
|
|
31
31
|
import { sshToolRenderer } from "./ssh";
|
|
32
|
+
import { subagentToolRenderer } from "./subagent-render";
|
|
32
33
|
import { todoWriteToolRenderer } from "./todo-write";
|
|
33
34
|
import { writeToolRenderer } from "./write";
|
|
34
35
|
|
|
@@ -66,6 +67,7 @@ export const toolRenderers: Record<string, ToolRenderer> = {
|
|
|
66
67
|
resolve: resolveToolRenderer as ToolRenderer,
|
|
67
68
|
search_tool_bm25: searchToolBm25Renderer as ToolRenderer,
|
|
68
69
|
ssh: sshToolRenderer as ToolRenderer,
|
|
70
|
+
subagent: subagentToolRenderer as ToolRenderer,
|
|
69
71
|
task: taskToolRenderer as ToolRenderer,
|
|
70
72
|
todo_write: todoWriteToolRenderer as ToolRenderer,
|
|
71
73
|
github: githubToolRenderer as ToolRenderer,
|