@gajae-code/coding-agent 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/CHANGELOG.md +11 -0
  2. package/dist/types/async/job-manager.d.ts +25 -0
  3. package/dist/types/commit/model-selection.d.ts +1 -1
  4. package/dist/types/config/model-registry.d.ts +3 -1
  5. package/dist/types/config/model-resolver.d.ts +1 -19
  6. package/dist/types/config/models-config-schema.d.ts +12 -0
  7. package/dist/types/config/settings-schema.d.ts +15 -1
  8. package/dist/types/gjc-runtime/goal-mode-request.d.ts +8 -1
  9. package/dist/types/harness-control-plane/types.d.ts +7 -2
  10. package/dist/types/modes/acp/acp-event-mapper.d.ts +2 -0
  11. package/dist/types/modes/components/custom-editor.d.ts +7 -0
  12. package/dist/types/modes/shared/agent-wire/command-contract.d.ts +18 -0
  13. package/dist/types/modes/shared/agent-wire/event-contract.d.ts +84 -0
  14. package/dist/types/modes/shared/agent-wire/event-envelope.d.ts +14 -7
  15. package/dist/types/modes/shared/agent-wire/event-observation.d.ts +37 -0
  16. package/dist/types/modes/shared/agent-wire/protocol.d.ts +13 -34
  17. package/dist/types/session/agent-session.d.ts +12 -1
  18. package/dist/types/session/session-manager.d.ts +1 -1
  19. package/dist/types/tools/bash.d.ts +2 -0
  20. package/dist/types/tools/browser/actions.d.ts +54 -0
  21. package/dist/types/tools/browser.d.ts +80 -0
  22. package/dist/types/tools/image-gen.d.ts +1 -0
  23. package/dist/types/tools/index.d.ts +3 -1
  24. package/dist/types/tools/job.d.ts +1 -1
  25. package/package.json +7 -7
  26. package/src/async/job-manager.ts +120 -1
  27. package/src/commands/ultragoal.ts +7 -1
  28. package/src/commit/agentic/index.ts +2 -2
  29. package/src/commit/model-selection.ts +7 -22
  30. package/src/commit/pipeline.ts +2 -2
  31. package/src/config/model-registry.ts +17 -9
  32. package/src/config/model-resolver.ts +14 -84
  33. package/src/config/models-config-schema.ts +2 -0
  34. package/src/config/settings-schema.ts +14 -1
  35. package/src/gjc-runtime/goal-mode-request.ts +21 -1
  36. package/src/harness-control-plane/owner.ts +3 -3
  37. package/src/harness-control-plane/rpc-adapter.ts +7 -1
  38. package/src/harness-control-plane/types.ts +8 -11
  39. package/src/internal-urls/docs-index.generated.ts +3 -3
  40. package/src/memories/index.ts +1 -1
  41. package/src/modes/acp/acp-agent.ts +17 -9
  42. package/src/modes/acp/acp-event-mapper.ts +33 -1
  43. package/src/modes/components/custom-editor.ts +19 -3
  44. package/src/modes/controllers/input-controller.ts +27 -7
  45. package/src/modes/controllers/selector-controller.ts +7 -1
  46. package/src/modes/interactive-mode.ts +3 -1
  47. package/src/modes/rpc/rpc-client.ts +16 -3
  48. package/src/modes/rpc/rpc-mode.ts +5 -2
  49. package/src/modes/shared/agent-wire/command-contract.ts +18 -0
  50. package/src/modes/shared/agent-wire/event-contract.ts +147 -0
  51. package/src/modes/shared/agent-wire/event-envelope.ts +35 -16
  52. package/src/modes/shared/agent-wire/event-observation.ts +397 -0
  53. package/src/modes/shared/agent-wire/protocol.ts +24 -81
  54. package/src/modes/utils/context-usage.ts +2 -2
  55. package/src/prompts/agents/explore.md +1 -1
  56. package/src/prompts/agents/plan.md +1 -1
  57. package/src/prompts/agents/reviewer.md +1 -1
  58. package/src/prompts/tools/browser.md +3 -2
  59. package/src/runtime-mcp/manager.ts +15 -2
  60. package/src/sdk.ts +3 -1
  61. package/src/session/agent-session.ts +60 -4
  62. package/src/session/session-manager.ts +1 -1
  63. package/src/task/agents.ts +1 -1
  64. package/src/tools/bash.ts +6 -1
  65. package/src/tools/browser/actions.ts +189 -0
  66. package/src/tools/browser.ts +91 -1
  67. package/src/tools/image-gen.ts +42 -15
  68. package/src/tools/index.ts +7 -1
  69. package/src/tools/inspect-image.ts +10 -8
  70. package/src/tools/job.ts +12 -2
  71. package/src/tools/monitor.ts +98 -17
  72. package/src/utils/commit-message-generator.ts +6 -13
  73. package/src/utils/title-generator.ts +1 -1
  74. package/dist/types/harness-control-plane/frame-mapper.d.ts +0 -29
  75. package/src/harness-control-plane/frame-mapper.ts +0 -286
  76. package/src/priority.json +0 -37
@@ -89,6 +89,15 @@ export interface ForkContextSeedMetadata {
89
89
  skippedReasons: Record<string, number>;
90
90
  }
91
91
 
92
+ export interface PurgeQueuedCustomMessagesResult {
93
+ agentSteering: number;
94
+ agentFollowUp: number;
95
+ pendingNextTurn: number;
96
+ displaySteering: number;
97
+ displayFollowUp: number;
98
+ totalExecutable: number;
99
+ }
100
+
92
101
  export interface ForkContextSeed {
93
102
  messages: Message[];
94
103
  agentMessages: AgentMessage[];
@@ -4363,7 +4372,10 @@ export class AgentSession {
4363
4372
 
4364
4373
  async #activatePendingGjcGoalModeRequest(): Promise<boolean> {
4365
4374
  if (!this.settings.get("goal.enabled")) return false;
4366
- const pendingGoal = await consumePendingGoalModeRequest(this.sessionManager.getCwd());
4375
+ const pendingGoal = await consumePendingGoalModeRequest(
4376
+ this.sessionManager.getCwd(),
4377
+ this.sessionManager.getSessionId(),
4378
+ );
4367
4379
  if (!pendingGoal) return false;
4368
4380
  const currentState = this.getGoalModeState();
4369
4381
  if (currentState?.goal && currentState.goal.status !== "complete" && currentState.goal.status !== "dropped") {
@@ -5058,6 +5070,10 @@ export class AgentSession {
5058
5070
  this.#queueHiddenNextTurnMessage(message, true);
5059
5071
  }
5060
5072
 
5073
+ queueDeferredMessageForTests(message: CustomMessage, triggerTurn = true): void {
5074
+ this.#queueHiddenNextTurnMessage(message, triggerTurn);
5075
+ }
5076
+
5061
5077
  #queueHiddenNextTurnMessage(message: CustomMessage, triggerTurn: boolean): void {
5062
5078
  this.#pendingNextTurnMessages.push(message);
5063
5079
  if (!triggerTurn) return;
@@ -5230,6 +5246,46 @@ export class AgentSession {
5230
5246
  );
5231
5247
  }
5232
5248
 
5249
+ /** Remove undelivered queued custom messages matching `predicate` from executable queues and tagged display mirrors. */
5250
+ purgeQueuedCustomMessages(predicate: (message: CustomMessage) => boolean): PurgeQueuedCustomMessagesResult {
5251
+ const isMatch = (m: AgentMessage): boolean => m.role === "custom" && predicate(m as CustomMessage);
5252
+ const removedTags = new Set<string>();
5253
+ for (const m of [...this.agent.snapshotSteering(), ...this.agent.snapshotFollowUp()]) {
5254
+ if (isMatch(m)) {
5255
+ const tag = readPendingDisplayTag((m as CustomMessage).details);
5256
+ if (tag) removedTags.add(tag);
5257
+ }
5258
+ }
5259
+ const agentRemoved = this.agent.removeQueuedMessages(isMatch);
5260
+ const beforeNext = this.#pendingNextTurnMessages.length;
5261
+ for (const m of this.#pendingNextTurnMessages) {
5262
+ if (predicate(m)) {
5263
+ const tag = readPendingDisplayTag(m.details);
5264
+ if (tag) removedTags.add(tag);
5265
+ }
5266
+ }
5267
+ this.#pendingNextTurnMessages = this.#pendingNextTurnMessages.filter(m => !predicate(m));
5268
+ const pendingNextTurn = beforeNext - this.#pendingNextTurnMessages.length;
5269
+ let displaySteering = 0;
5270
+ let displayFollowUp = 0;
5271
+ if (removedTags.size > 0) {
5272
+ const beforeS = this.#steeringMessages.length;
5273
+ this.#steeringMessages = this.#steeringMessages.filter(e => !(e.tag && removedTags.has(e.tag)));
5274
+ displaySteering = beforeS - this.#steeringMessages.length;
5275
+ const beforeF = this.#followUpMessages.length;
5276
+ this.#followUpMessages = this.#followUpMessages.filter(e => !(e.tag && removedTags.has(e.tag)));
5277
+ displayFollowUp = beforeF - this.#followUpMessages.length;
5278
+ }
5279
+ return {
5280
+ agentSteering: agentRemoved.steering,
5281
+ agentFollowUp: agentRemoved.followUp,
5282
+ pendingNextTurn,
5283
+ displaySteering,
5284
+ displayFollowUp,
5285
+ totalExecutable: agentRemoved.total + pendingNextTurn,
5286
+ };
5287
+ }
5288
+
5233
5289
  /**
5234
5290
  * Send a user message to the agent.
5235
5291
  * When deliverAs is set, queue the message instead of starting a new turn.
@@ -5704,7 +5760,7 @@ export class AgentSession {
5704
5760
  /**
5705
5761
  * Cycle through configured role models in a fixed order.
5706
5762
  * Skips missing roles.
5707
- * @param roleOrder - Order of roles to cycle through (e.g., ["slow", "default", "smol"])
5763
+ * @param roleOrder - Order of roles to cycle through (e.g., ["default"])
5708
5764
  * @param options - Optional settings: `temporary` to not persist to settings
5709
5765
  */
5710
5766
  async cycleRoleModels(
@@ -6450,7 +6506,7 @@ export class AgentSession {
6450
6506
  if (pruneResult) {
6451
6507
  contextTokens = Math.max(0, contextTokens - pruneResult.tokensSaved);
6452
6508
  }
6453
- if (shouldCompact(contextTokens, contextWindow, compactionSettings)) {
6509
+ if (shouldCompact(contextTokens, contextWindow, compactionSettings, this.model?.maxTokens ?? 0)) {
6454
6510
  // Try promotion first — if a larger model is available, switch instead of compacting
6455
6511
  const promoted = await this.#tryContextPromotion(assistantMessage);
6456
6512
  if (!promoted) {
@@ -7086,7 +7142,7 @@ export class AgentSession {
7086
7142
  }
7087
7143
  return new Error(
7088
7144
  `Compaction requires usable credentials for ${currentModel.provider}/${currentModel.id}. ` +
7089
- `Configure ${currentModel.provider} credentials or assign an authenticated fallback role such as modelRoles.smol.`,
7145
+ `Configure ${currentModel.provider} credentials or assign an authenticated fallback via modelRoles.default.`,
7090
7146
  );
7091
7147
  }
7092
7148
 
@@ -104,7 +104,7 @@ export interface ModelChangeEntry extends SessionEntryBase {
104
104
  type: "model_change";
105
105
  /** Model in "provider/modelId" format */
106
106
  model: string;
107
- /** Role: "default", "smol", "slow", etc. Undefined treated as "default" */
107
+ /** Role: "default" or an agent role. Undefined treated as "default" */
108
108
  role?: string;
109
109
  }
110
110
 
@@ -59,7 +59,7 @@ const EMBEDDED_AGENT_DEFS: EmbeddedAgentDef[] = [
59
59
  name: "task",
60
60
  description: "General-purpose subagent with full capabilities for delegated multi-step tasks",
61
61
  spawns: "*",
62
- model: "pi/task",
62
+ model: "pi/default",
63
63
  thinkingLevel: Effort.Medium,
64
64
  hide: true,
65
65
  },
package/src/tools/bash.ts CHANGED
@@ -609,6 +609,8 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
609
609
  label?: string;
610
610
  ctx?: AgentToolContext;
611
611
  onRawLine?: (line: string, jobId: string) => void;
612
+ shouldAcceptRawLine?: (jobId: string) => boolean;
613
+ lifecycle?: import("../async").AsyncJobLifecycleCleanup;
612
614
  } = {},
613
615
  ): Promise<{ jobId: string; label: string; commandCwd: string }> {
614
616
  const manager = AsyncJobManager.instance();
@@ -624,12 +626,14 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
624
626
  let cursorOffset = 0;
625
627
  let lineBuffer = "";
626
628
  const dispatchLines = (chunk: string) => {
629
+ if (opts.shouldAcceptRawLine?.(currentJobId) === false) return;
627
630
  if (!onRawLine) return;
628
631
  lineBuffer += chunk;
629
632
  let newlineIndex = lineBuffer.indexOf("\n");
630
633
  while (newlineIndex !== -1) {
631
634
  const line = lineBuffer.slice(0, newlineIndex);
632
635
  lineBuffer = lineBuffer.slice(newlineIndex + 1);
636
+ if (opts.shouldAcceptRawLine?.(currentJobId) === false) return;
633
637
  try {
634
638
  onRawLine(line, currentJobId);
635
639
  } catch (error) {
@@ -642,6 +646,7 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
642
646
  };
643
647
  const flushTrailingLine = () => {
644
648
  if (!onRawLine) return;
649
+ if (opts.shouldAcceptRawLine?.(currentJobId) === false) return;
645
650
  if (lineBuffer.length === 0) return;
646
651
  const remainder = lineBuffer;
647
652
  lineBuffer = "";
@@ -693,7 +698,7 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
693
698
  throw error instanceof Error ? error : new Error(String(error));
694
699
  }
695
700
  },
696
- { ownerId, metadata: { monitor: true } },
701
+ { ownerId, metadata: { monitor: true }, lifecycle: opts.lifecycle },
697
702
  );
698
703
  currentJobId = jobId;
699
704
  return { jobId, label, commandCwd: prepared.commandCwd };
@@ -0,0 +1,189 @@
1
+ /**
2
+ * Structured browser action space.
3
+ *
4
+ * Adapts the SOTA computer-use / browser-use pattern: instead of authoring raw
5
+ * JavaScript for every interaction, the model emits a list of structured verbs
6
+ * (navigate / click / type / …) that reference elements by the numeric `id`
7
+ * returned from {@link Observation}. Each verb is compiled onto the existing
8
+ * in-tab `tab.*` helpers and executed through the same worker `run` path, so the
9
+ * worker protocol is unchanged and the raw-JS `run` escape hatch still works.
10
+ */
11
+
12
+ export type BrowserActionVerb =
13
+ | "navigate"
14
+ | "click"
15
+ | "type"
16
+ | "fill"
17
+ | "select"
18
+ | "press"
19
+ | "scroll"
20
+ | "back"
21
+ | "wait"
22
+ | "observe"
23
+ | "extract"
24
+ | "screenshot";
25
+
26
+ export interface BrowserActionStep {
27
+ verb: BrowserActionVerb;
28
+ /** Element id from a prior `observe` (preferred for click/type). */
29
+ id?: number;
30
+ /** CSS / puppeteer selector when not addressing by `id`. */
31
+ selector?: string;
32
+ /** Text to type. */
33
+ text?: string;
34
+ /** Value for `fill`. */
35
+ value?: string;
36
+ /** Option value(s) for `select`. */
37
+ values?: string[];
38
+ /** URL for `navigate`. */
39
+ url?: string;
40
+ /** Key for `press` (e.g. "Enter"). */
41
+ key?: string;
42
+ /** Horizontal scroll delta. */
43
+ dx?: number;
44
+ /** Vertical scroll delta. */
45
+ dy?: number;
46
+ /** Sleep duration for `wait` when no selector is given. */
47
+ ms?: number;
48
+ /** Extract format. */
49
+ format?: "markdown" | "text" | "html";
50
+ /** Navigation wait condition for `navigate`. */
51
+ wait_until?: "load" | "domcontentloaded" | "networkidle0" | "networkidle2";
52
+ /** Only return interactive/viewport elements for `observe`. */
53
+ viewport_only?: boolean;
54
+ include_all?: boolean;
55
+ }
56
+
57
+ const VERBS: ReadonlySet<BrowserActionVerb> = new Set([
58
+ "navigate",
59
+ "click",
60
+ "type",
61
+ "fill",
62
+ "select",
63
+ "press",
64
+ "scroll",
65
+ "back",
66
+ "wait",
67
+ "observe",
68
+ "extract",
69
+ "screenshot",
70
+ ]);
71
+
72
+ /**
73
+ * Validate a single step's required fields. Returns an error string, or
74
+ * `undefined` when the step is well-formed.
75
+ */
76
+ export function validateActionStep(step: BrowserActionStep, index: number): string | undefined {
77
+ const where = `actions[${index}] (${step.verb})`;
78
+ if (!VERBS.has(step.verb)) return `${where}: unknown verb`;
79
+ switch (step.verb) {
80
+ case "navigate":
81
+ if (!step.url?.trim()) return `${where}: 'url' is required`;
82
+ return undefined;
83
+ case "click":
84
+ if (step.id === undefined && !step.selector?.trim()) return `${where}: 'id' or 'selector' is required`;
85
+ return undefined;
86
+ case "type":
87
+ if (step.id === undefined && !step.selector?.trim()) return `${where}: 'id' or 'selector' is required`;
88
+ if (step.text === undefined) return `${where}: 'text' is required`;
89
+ return undefined;
90
+ case "fill":
91
+ if (!step.selector?.trim()) return `${where}: 'selector' is required`;
92
+ if (step.value === undefined) return `${where}: 'value' is required`;
93
+ return undefined;
94
+ case "select":
95
+ if (!step.selector?.trim()) return `${where}: 'selector' is required`;
96
+ if (!step.values?.length) return `${where}: 'values' is required`;
97
+ return undefined;
98
+ case "press":
99
+ if (!step.key?.trim()) return `${where}: 'key' is required`;
100
+ return undefined;
101
+ case "scroll":
102
+ if (step.dx === undefined && step.dy === undefined) return `${where}: 'dx' or 'dy' is required`;
103
+ return undefined;
104
+ case "wait":
105
+ if (!step.selector?.trim() && step.ms === undefined) return `${where}: 'selector' or 'ms' is required`;
106
+ return undefined;
107
+ default:
108
+ // back / observe / extract / screenshot take no required fields
109
+ return undefined;
110
+ }
111
+ }
112
+
113
+ /** Validate the full step list. Throws on the first invalid step. */
114
+ export function validateActionSteps(steps: readonly BrowserActionStep[]): void {
115
+ if (steps.length === 0) throw new Error("browser 'act' requires a non-empty 'actions' list");
116
+ for (let i = 0; i < steps.length; i += 1) {
117
+ const error = validateActionStep(steps[i]!, i);
118
+ if (error) throw new Error(error);
119
+ }
120
+ }
121
+
122
+ /**
123
+ * Compile structured steps into a JS program for the in-tab `run` worker. Steps
124
+ * are embedded as parsed JSON (no string interpolation, so values cannot inject
125
+ * code) and dispatched by a fixed interpreter against the `tab` / `page` helpers.
126
+ */
127
+ export function compileActionSteps(steps: readonly BrowserActionStep[]): string {
128
+ validateActionSteps(steps);
129
+ const stepsLiteral = JSON.stringify(JSON.stringify(steps));
130
+ return `
131
+ const __steps = JSON.parse(${stepsLiteral});
132
+ const __results = [];
133
+ for (const s of __steps) {
134
+ switch (s.verb) {
135
+ case "navigate":
136
+ await tab.goto(s.url, s.wait_until ? { waitUntil: s.wait_until } : undefined);
137
+ __results.push({ verb: "navigate", url: s.url });
138
+ break;
139
+ case "click":
140
+ if (s.id !== undefined && s.id !== null) { await (await tab.id(s.id)).click(); }
141
+ else { await tab.click(s.selector); }
142
+ __results.push({ verb: "click", id: s.id ?? null, selector: s.selector ?? null });
143
+ break;
144
+ case "type":
145
+ if (s.id !== undefined && s.id !== null) { await (await tab.id(s.id)).type(s.text); }
146
+ else { await tab.type(s.selector, s.text); }
147
+ __results.push({ verb: "type", id: s.id ?? null, selector: s.selector ?? null });
148
+ break;
149
+ case "fill":
150
+ await tab.fill(s.selector, s.value);
151
+ __results.push({ verb: "fill", selector: s.selector });
152
+ break;
153
+ case "select":
154
+ __results.push({ verb: "select", selected: await tab.select(s.selector, ...(s.values || [])) });
155
+ break;
156
+ case "press":
157
+ await tab.press(s.key, s.selector ? { selector: s.selector } : undefined);
158
+ __results.push({ verb: "press", key: s.key });
159
+ break;
160
+ case "scroll":
161
+ await tab.scroll(s.dx || 0, s.dy || 0);
162
+ __results.push({ verb: "scroll", dx: s.dx || 0, dy: s.dy || 0 });
163
+ break;
164
+ case "back":
165
+ await page.goBack();
166
+ __results.push({ verb: "back" });
167
+ break;
168
+ case "wait":
169
+ if (s.selector) { await tab.waitFor(s.selector); }
170
+ else { await new Promise(r => setTimeout(r, s.ms)); }
171
+ __results.push({ verb: "wait", selector: s.selector ?? null, ms: s.ms ?? null });
172
+ break;
173
+ case "observe":
174
+ __results.push({ verb: "observe", observation: await tab.observe({ viewportOnly: s.viewport_only === true, includeAll: s.include_all === true }) });
175
+ break;
176
+ case "extract":
177
+ __results.push({ verb: "extract", content: await tab.extract(s.format || "markdown") });
178
+ break;
179
+ case "screenshot":
180
+ await tab.screenshot({});
181
+ __results.push({ verb: "screenshot" });
182
+ break;
183
+ default:
184
+ throw new Error("Unknown browser action verb: " + s.verb);
185
+ }
186
+ }
187
+ return __results;
188
+ `;
189
+ }
@@ -3,6 +3,7 @@ import { prompt, untilAborted } from "@gajae-code/utils";
3
3
  import * as z from "zod/v4";
4
4
  import browserDescription from "../prompts/tools/browser.md" with { type: "text" };
5
5
  import type { ToolSession } from "../sdk";
6
+ import { type BrowserActionStep, compileActionSteps } from "./browser/actions";
6
7
  import { acquireBrowser, type BrowserHandle, type BrowserKind, type BrowserKindTag } from "./browser/registry";
7
8
  import type { Observation, ScreenshotResult } from "./browser/tab-protocol";
8
9
  import { acquireTab, dropHeadlessTabs, getTab, releaseAllTabs, releaseTab, runInTab } from "./browser/tab-supervisor";
@@ -24,8 +25,44 @@ const appSchema = z.object({
24
25
  target: z.string().describe("substring to pick a window").optional(),
25
26
  });
26
27
 
28
+ const actionStepSchema = z.object({
29
+ verb: z
30
+ .enum([
31
+ "navigate",
32
+ "click",
33
+ "type",
34
+ "fill",
35
+ "select",
36
+ "press",
37
+ "scroll",
38
+ "back",
39
+ "wait",
40
+ "observe",
41
+ "extract",
42
+ "screenshot",
43
+ ])
44
+ .describe("structured action verb"),
45
+ id: z.number().describe("element id from a prior observe").optional(),
46
+ selector: z.string().describe("css/puppeteer selector").optional(),
47
+ text: z.string().describe("text to type").optional(),
48
+ value: z.string().describe("value for fill").optional(),
49
+ values: z.array(z.string()).describe("option value(s) for select").optional(),
50
+ url: z.string().describe("url for navigate").optional(),
51
+ key: z.string().describe("key for press, e.g. Enter").optional(),
52
+ dx: z.number().describe("horizontal scroll delta").optional(),
53
+ dy: z.number().describe("vertical scroll delta").optional(),
54
+ ms: z.number().describe("sleep ms for wait without selector").optional(),
55
+ format: z.enum(["markdown", "text", "html"]).describe("extract format").optional(),
56
+ wait_until: z
57
+ .enum(["load", "domcontentloaded", "networkidle0", "networkidle2"])
58
+ .describe("navigation wait condition for navigate")
59
+ .optional(),
60
+ viewport_only: z.boolean().describe("observe: only viewport elements").optional(),
61
+ include_all: z.boolean().describe("observe: include non-interactive elements").optional(),
62
+ });
63
+
27
64
  const browserSchema = z.object({
28
- action: z.enum(["open", "close", "run"] as const).describe("operation"),
65
+ action: z.enum(["open", "close", "run", "act"] as const).describe("operation"),
29
66
  name: z.string().describe("tab id (default 'main')").optional(),
30
67
  url: z.string().describe("url to open").optional(),
31
68
  app: appSchema.optional(),
@@ -45,6 +82,7 @@ const browserSchema = z.object({
45
82
  .describe("auto-handle dialogs")
46
83
  .optional(),
47
84
  code: z.string().describe("js body to run in tab").optional(),
85
+ actions: z.array(actionStepSchema).describe("structured action steps for action 'act'").optional(),
48
86
  timeout: z.number().default(30).describe("timeout in seconds (default 30, max 300)").optional(),
49
87
  all: z.boolean().describe("close every tab").optional(),
50
88
  kill: z.boolean().describe("also kill spawned-app browsers").optional(),
@@ -126,6 +164,8 @@ export class BrowserTool implements AgentTool<typeof browserSchema, BrowserToolD
126
164
  return await this.#close(name, params, details, signal);
127
165
  case "run":
128
166
  return await this.#run(name, params, details, timeoutMs, signal);
167
+ case "act":
168
+ return await this.#act(name, params, details, timeoutMs, signal);
129
169
  default:
130
170
  throw new ToolError(`Unsupported action: ${(params as BrowserParams).action}`);
131
171
  }
@@ -259,6 +299,56 @@ export class BrowserTool implements AgentTool<typeof browserSchema, BrowserToolD
259
299
  details.result = textOnly;
260
300
  return toolResult(details).content(content).done();
261
301
  }
302
+
303
+ async #act(
304
+ name: string,
305
+ params: BrowserParams,
306
+ details: BrowserToolDetails,
307
+ timeoutMs: number,
308
+ signal?: AbortSignal,
309
+ ): Promise<AgentToolResult<BrowserToolDetails>> {
310
+ const steps = (params.actions ?? []) as BrowserActionStep[];
311
+ if (steps.length === 0) {
312
+ throw new ToolError("Missing required parameter 'actions' for action 'act'.");
313
+ }
314
+ const tab = getTab(name);
315
+ if (!tab) {
316
+ throw new ToolError(`No tab named ${JSON.stringify(name)}. Open it first with action 'open'.`);
317
+ }
318
+ details.browser = tab.browser.kind.kind;
319
+ details.url = tab.info.url;
320
+
321
+ // compileActionSteps validates each step and produces injection-safe code
322
+ // (steps embedded as parsed JSON) for the existing in-tab run worker.
323
+ let code: string;
324
+ try {
325
+ code = compileActionSteps(steps);
326
+ } catch (error) {
327
+ throw new ToolError(error instanceof Error ? error.message : String(error));
328
+ }
329
+
330
+ const { displays, returnValue, screenshots } = await runInTab(name, {
331
+ code,
332
+ timeoutMs,
333
+ signal,
334
+ session: this.session,
335
+ });
336
+
337
+ if (screenshots.length) details.screenshots = screenshots;
338
+ const content = [...displays];
339
+ if (returnValue !== undefined) {
340
+ content.push({ type: "text", text: stringifyReturnValue(returnValue) });
341
+ }
342
+ if (!content.length) {
343
+ content.push({ type: "text", text: `Ran ${steps.length} action(s) on tab ${JSON.stringify(name)}` });
344
+ }
345
+ const textOnly = content
346
+ .filter((c): c is { type: "text"; text: string } => c.type === "text")
347
+ .map(c => c.text)
348
+ .join("\n");
349
+ details.result = textOnly;
350
+ return toolResult(details).content(content).done();
351
+ }
262
352
  }
263
353
 
264
354
  function describeBrowser(handle: BrowserHandle): string {
@@ -401,23 +401,39 @@ export function setPreferredImageProvider(provider: ImageProvider | "auto"): voi
401
401
 
402
402
  interface ParsedAntigravityCredentials {
403
403
  accessToken: string;
404
- projectId: string;
404
+ projectId?: string;
405
405
  }
406
406
 
407
407
  function parseAntigravityCredentials(raw: string): ParsedAntigravityCredentials | null {
408
408
  try {
409
- const parsed = JSON.parse(raw) as { token?: string; projectId?: string };
410
- if (parsed.token && parsed.projectId) {
411
- return { accessToken: parsed.token, projectId: parsed.projectId };
409
+ const parsed = JSON.parse(raw) as { token?: string; accessToken?: string; projectId?: string };
410
+ const token = parsed.token ?? parsed.accessToken;
411
+ if (typeof token === "string" && token.trim().length > 0) {
412
+ return { accessToken: token.trim(), projectId: parsed.projectId };
412
413
  }
414
+ // Parsed as JSON but no usable token field.
415
+ return null;
413
416
  } catch {
414
- // Invalid JSON
417
+ // Not JSON: treat the value as a raw bearer token.
415
418
  }
416
- return null;
419
+ const rawToken = raw.trim();
420
+ return rawToken.length > 0 ? { accessToken: rawToken } : null;
417
421
  }
418
422
 
419
- async function findAntigravityCredentials(modelRegistry: ModelRegistry): Promise<ImageApiKey | null> {
420
- const apiKey = await modelRegistry.getApiKeyForProvider("google-antigravity");
423
+ async function findAntigravityCredentials(
424
+ modelRegistry: ModelRegistry,
425
+ sessionId?: string,
426
+ ): Promise<ImageApiKey | null> {
427
+ const oauthAccess = await modelRegistry.authStorage.getOAuthAccess("google-antigravity", sessionId);
428
+ if (oauthAccess?.accessToken) {
429
+ return {
430
+ provider: "antigravity",
431
+ apiKey: oauthAccess.accessToken,
432
+ projectId: oauthAccess.projectId,
433
+ };
434
+ }
435
+
436
+ const apiKey = await modelRegistry.getApiKeyForProvider("google-antigravity", sessionId);
421
437
  if (!apiKey) return null;
422
438
 
423
439
  const parsed = parseAntigravityCredentials(apiKey);
@@ -457,7 +473,7 @@ async function findImageApiKey(
457
473
  if (openAI) return openAI;
458
474
  // Fall through to auto-detect if preferred provider key not found.
459
475
  } else if (preferredImageProvider === "antigravity" && modelRegistry) {
460
- const antigravity = await findAntigravityCredentials(modelRegistry);
476
+ const antigravity = await findAntigravityCredentials(modelRegistry, sessionId);
461
477
  if (antigravity) return antigravity;
462
478
  // Fall through to auto-detect if preferred provider key not found.
463
479
  } else if (preferredImageProvider === "gemini") {
@@ -477,7 +493,7 @@ async function findImageApiKey(
477
493
  if (openAI) return openAI;
478
494
 
479
495
  if (modelRegistry) {
480
- const antigravity = await findAntigravityCredentials(modelRegistry);
496
+ const antigravity = await findAntigravityCredentials(modelRegistry, sessionId);
481
497
  if (antigravity) return antigravity;
482
498
  }
483
499
 
@@ -589,12 +605,21 @@ function collectInlineImages(parts: GeminiPart[]): InlineImageData[] {
589
605
  return images;
590
606
  }
591
607
 
592
- function isOpenAIHostedImageModel(model: Model | undefined): model is Model {
608
+ export function isOpenAIHostedImageModel(model: Model | undefined): model is Model {
593
609
  if (!model) return false;
594
- if (model.provider !== "openai" && model.provider !== "openai-codex") return false;
610
+ // The hosted image_generation tool is only available over the Responses API.
595
611
  if (model.api !== "openai-responses" && model.api !== "openai-codex-responses") return false;
596
- const modelId = model.id.toLowerCase();
597
- return modelId.startsWith("gpt-") || modelId === "o3" || modelId.startsWith("o3-");
612
+ // Declarative capability: any provider (e.g. an OpenAI-compatible proxy
613
+ // fronting gpt-image) whose model advertises image output can drive
614
+ // generate_image, routed to the model's own baseUrl with registry auth.
615
+ if (model.output?.includes("image")) return true;
616
+ // First-party heuristic: OpenAI/OpenAI code GPT and o3 models generate
617
+ // images inline through the hosted tool without a declared output modality.
618
+ if (model.provider === "openai" || model.provider === "openai-codex") {
619
+ const modelId = model.id.toLowerCase();
620
+ return modelId.startsWith("gpt-") || modelId === "o3" || modelId.startsWith("o3-");
621
+ }
622
+ return false;
598
623
  }
599
624
 
600
625
  function getOpenAIHostedImageProvider(model: Model): ImageProvider {
@@ -995,7 +1020,9 @@ export const imageGenTool: CustomTool<typeof imageGenSchema, ImageGenToolDetails
995
1020
 
996
1021
  if (provider === "antigravity") {
997
1022
  if (!apiKey.projectId) {
998
- throw new Error("Missing projectId in antigravity credentials");
1023
+ throw new Error(
1024
+ "Antigravity image generation requires a projectId, but the stored google-antigravity credential only contains an access token. Run the google-antigravity login flow again so the projectId is stored, then retry.",
1025
+ );
999
1026
  }
1000
1027
 
1001
1028
  const prompt = assemblePrompt(params);
@@ -13,7 +13,11 @@ import { LspTool } from "../lsp";
13
13
  import type { WorkflowGateEmitter } from "../modes/shared/agent-wire/unattended-session";
14
14
  import type { PlanModeState } from "../plan-mode/state";
15
15
  import type { AgentRegistry } from "../registry/agent-registry";
16
- import type { ForkContextSeed, ForkContextSeedOptions } from "../session/agent-session";
16
+ import type {
17
+ ForkContextSeed,
18
+ ForkContextSeedOptions,
19
+ PurgeQueuedCustomMessagesResult,
20
+ } from "../session/agent-session";
17
21
  import type { ArtifactManager } from "../session/artifacts";
18
22
  import type { ClientBridge } from "../session/client-bridge";
19
23
  import type { CustomMessage } from "../session/messages";
@@ -162,6 +166,8 @@ export interface ToolSession {
162
166
  /** Agent identity used for IRC routing. Returns the registry id (e.g. "0-Main", "0-AuthLoader"). */
163
167
  getAgentId?: () => string | null;
164
168
  /** Look up a registered tool by name (used by the eval js backend's tool bridge). */
169
+ /** Purge undelivered queued custom messages matching the predicate. Returns counts. */
170
+ purgeQueuedCustomMessages?: (predicate: (message: CustomMessage) => boolean) => PurgeQueuedCustomMessagesResult;
165
171
  getToolByName?: (name: string) => AgentTool | undefined;
166
172
  /** Agent registry for IRC routing across live sessions. */
167
173
  agentRegistry?: AgentRegistry;
@@ -78,19 +78,21 @@ export class InspectImageTool implements AgentTool<typeof inspectImageSchema, In
78
78
  };
79
79
 
80
80
  const activeModelPattern = this.session.getActiveModelString?.() ?? this.session.getModelString?.();
81
- const model =
82
- resolvePattern("pi/vision") ??
83
- resolvePattern("pi/default") ??
84
- resolvePattern(activeModelPattern) ??
85
- availableModels[0];
81
+ let model = resolvePattern("pi/default") ?? resolvePattern(activeModelPattern) ?? availableModels[0];
86
82
  if (!model) {
87
83
  throw new ToolError("Unable to resolve a model for inspect_image.");
88
84
  }
89
85
 
86
+ // inspect_image requires image input; if the resolved model is text-only,
87
+ // fall back to any available vision-capable model before failing.
90
88
  if (!model.input.includes("image")) {
91
- throw new ToolError(
92
- `Resolved model ${model.provider}/${model.id} does not support image input. Configure a vision-capable model for modelRoles.vision.`,
93
- );
89
+ const visionModel = availableModels.find(candidate => candidate.input.includes("image"));
90
+ if (!visionModel) {
91
+ throw new ToolError(
92
+ `Resolved model ${model.provider}/${model.id} does not support image input, and no vision-capable model is available. Configure a vision-capable model.`,
93
+ );
94
+ }
95
+ model = visionModel;
94
96
  }
95
97
 
96
98
  const apiKey = await modelRegistry.getApiKey(model);
package/src/tools/job.ts CHANGED
@@ -52,7 +52,7 @@ interface JobSnapshot {
52
52
  errorText?: string;
53
53
  }
54
54
 
55
- type CancelStatus = "cancelled" | "not_found" | "already_completed";
55
+ type CancelStatus = "cancelled" | "not_found" | "already_completed" | "already_cancelled";
56
56
 
57
57
  interface CancelOutcome {
58
58
  id: string;
@@ -115,10 +115,20 @@ export class JobTool implements AgentTool<typeof jobSchema, JobToolDetails> {
115
115
  for (const id of cancelIds) {
116
116
  const existing = manager.getJob(id);
117
117
  if (!existing || (ownerId && existing.ownerId !== ownerId)) {
118
- cancelOutcomes.push({ id, status: "not_found", message: `Background job not found: ${id}` });
118
+ const tombstone = manager.purgeMonitorTombstone(id, ownerFilter);
119
+ cancelOutcomes.push(
120
+ tombstone.found
121
+ ? {
122
+ id,
123
+ status: tombstone.status === "cancelled" ? "already_cancelled" : "already_completed",
124
+ message: `Monitor job ${id} already gone; purged queued notifications.`,
125
+ }
126
+ : { id, status: "not_found", message: `Background job not found: ${id}` },
127
+ );
119
128
  continue;
120
129
  }
121
130
  if (existing.status !== "running") {
131
+ if (existing.metadata?.monitor) manager.purgeMonitorTombstone(id, ownerFilter);
122
132
  cancelOutcomes.push({
123
133
  id,
124
134
  status: "already_completed",