@gajae-code/coding-agent 0.4.1 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +22 -0
- package/dist/types/async/job-manager.d.ts +25 -0
- package/dist/types/commands/ultragoal.d.ts +1 -0
- package/dist/types/commit/model-selection.d.ts +1 -1
- package/dist/types/config/model-registry.d.ts +3 -1
- package/dist/types/config/model-resolver.d.ts +1 -19
- package/dist/types/config/models-config-schema.d.ts +12 -0
- package/dist/types/config/settings-schema.d.ts +26 -4
- package/dist/types/gjc-runtime/goal-mode-request.d.ts +8 -1
- package/dist/types/gjc-runtime/launch-tmux.d.ts +1 -0
- package/dist/types/gjc-runtime/ultragoal-runtime.d.ts +29 -0
- package/dist/types/harness-control-plane/finalize.d.ts +8 -0
- package/dist/types/harness-control-plane/receipts.d.ts +16 -1
- package/dist/types/harness-control-plane/types.d.ts +16 -3
- package/dist/types/modes/acp/acp-event-mapper.d.ts +2 -0
- package/dist/types/modes/components/custom-editor.d.ts +7 -0
- package/dist/types/modes/shared/agent-wire/command-contract.d.ts +18 -0
- package/dist/types/modes/shared/agent-wire/event-contract.d.ts +84 -0
- package/dist/types/modes/shared/agent-wire/event-envelope.d.ts +14 -7
- package/dist/types/modes/shared/agent-wire/event-observation.d.ts +37 -0
- package/dist/types/modes/shared/agent-wire/protocol.d.ts +13 -34
- package/dist/types/reminders/star-reminder.d.ts +115 -0
- package/dist/types/session/agent-session.d.ts +30 -1
- package/dist/types/session/session-manager.d.ts +1 -1
- package/dist/types/tools/bash.d.ts +2 -0
- package/dist/types/tools/browser/actions.d.ts +54 -0
- package/dist/types/tools/browser.d.ts +80 -0
- package/dist/types/tools/image-gen.d.ts +1 -0
- package/dist/types/tools/index.d.ts +3 -1
- package/dist/types/tools/job.d.ts +1 -1
- package/examples/extensions/README.md +20 -41
- package/package.json +7 -7
- package/src/async/job-manager.ts +120 -1
- package/src/cli/grep-cli.ts +1 -1
- package/src/commands/harness.ts +42 -3
- package/src/commands/ultragoal.ts +8 -1
- package/src/commit/agentic/index.ts +2 -2
- package/src/commit/model-selection.ts +7 -22
- package/src/commit/pipeline.ts +2 -2
- package/src/config/model-registry.ts +17 -9
- package/src/config/model-resolver.ts +14 -84
- package/src/config/models-config-schema.ts +2 -0
- package/src/config/settings-schema.ts +27 -4
- package/src/defaults/gjc/skills/team/SKILL.md +10 -1
- package/src/defaults/gjc/skills/ultragoal/SKILL.md +3 -2
- package/src/gjc-runtime/goal-mode-request.ts +21 -1
- package/src/gjc-runtime/launch-tmux.ts +25 -2
- package/src/gjc-runtime/team-runtime.ts +78 -3
- package/src/gjc-runtime/ultragoal-guard.ts +18 -2
- package/src/gjc-runtime/ultragoal-runtime.ts +240 -30
- package/src/harness-control-plane/finalize.ts +84 -0
- package/src/harness-control-plane/owner.ts +16 -3
- package/src/harness-control-plane/receipts.ts +39 -1
- package/src/harness-control-plane/rpc-adapter.ts +7 -1
- package/src/harness-control-plane/types.ts +33 -12
- package/src/internal-urls/docs-index.generated.ts +3 -3
- package/src/memories/index.ts +1 -1
- package/src/modes/acp/acp-agent.ts +17 -9
- package/src/modes/acp/acp-event-mapper.ts +33 -1
- package/src/modes/components/custom-editor.ts +19 -3
- package/src/modes/controllers/input-controller.ts +27 -7
- package/src/modes/controllers/selector-controller.ts +7 -1
- package/src/modes/interactive-mode.ts +29 -1
- package/src/modes/rpc/rpc-client.ts +16 -3
- package/src/modes/rpc/rpc-mode.ts +5 -2
- package/src/modes/shared/agent-wire/command-contract.ts +18 -0
- package/src/modes/shared/agent-wire/event-contract.ts +147 -0
- package/src/modes/shared/agent-wire/event-envelope.ts +35 -16
- package/src/modes/shared/agent-wire/event-observation.ts +397 -0
- package/src/modes/shared/agent-wire/protocol.ts +24 -81
- package/src/modes/utils/context-usage.ts +2 -2
- package/src/prompts/agents/explore.md +1 -1
- package/src/prompts/agents/plan.md +1 -1
- package/src/prompts/agents/reviewer.md +1 -1
- package/src/prompts/tools/browser.md +3 -2
- package/src/reminders/star-reminder.ts +422 -0
- package/src/runtime-mcp/manager.ts +15 -2
- package/src/sdk.ts +3 -1
- package/src/session/agent-session.ts +139 -17
- package/src/session/session-manager.ts +1 -1
- package/src/task/agents.ts +1 -1
- package/src/tools/bash.ts +6 -1
- package/src/tools/browser/actions.ts +189 -0
- package/src/tools/browser.ts +91 -1
- package/src/tools/image-gen.ts +42 -15
- package/src/tools/index.ts +7 -1
- package/src/tools/inspect-image.ts +10 -8
- package/src/tools/job.ts +12 -2
- package/src/tools/monitor.ts +98 -17
- package/src/utils/commit-message-generator.ts +6 -13
- package/src/utils/title-generator.ts +1 -1
- package/dist/types/harness-control-plane/frame-mapper.d.ts +0 -29
- package/src/harness-control-plane/frame-mapper.ts +0 -286
- package/src/priority.json +0 -37
|
@@ -89,6 +89,15 @@ export interface ForkContextSeedMetadata {
|
|
|
89
89
|
skippedReasons: Record<string, number>;
|
|
90
90
|
}
|
|
91
91
|
|
|
92
|
+
export interface PurgeQueuedCustomMessagesResult {
|
|
93
|
+
agentSteering: number;
|
|
94
|
+
agentFollowUp: number;
|
|
95
|
+
pendingNextTurn: number;
|
|
96
|
+
displaySteering: number;
|
|
97
|
+
displayFollowUp: number;
|
|
98
|
+
totalExecutable: number;
|
|
99
|
+
}
|
|
100
|
+
|
|
92
101
|
export interface ForkContextSeed {
|
|
93
102
|
messages: Message[];
|
|
94
103
|
agentMessages: AgentMessage[];
|
|
@@ -822,6 +831,23 @@ function extractPermissionLocations(
|
|
|
822
831
|
* rely on the existing text-equality match. */
|
|
823
832
|
type QueuedDisplayEntry = { text: string; tag?: string };
|
|
824
833
|
|
|
834
|
+
/** A custom message contributed at the before-agent-start point. */
|
|
835
|
+
export type BeforeAgentStartInternalMessage = Pick<
|
|
836
|
+
CustomMessage,
|
|
837
|
+
"customType" | "content" | "display" | "details" | "attribution"
|
|
838
|
+
>;
|
|
839
|
+
|
|
840
|
+
/**
|
|
841
|
+
* Internal (first-party, non-user-hook) contributor invoked at the active
|
|
842
|
+
* before-agent-start point alongside the extension runner. Returns an optional
|
|
843
|
+
* custom message to append to the prompt context. Errors are nonfatal.
|
|
844
|
+
*/
|
|
845
|
+
export type BeforeAgentStartContributor = (event: {
|
|
846
|
+
prompt: string;
|
|
847
|
+
images?: ImageContent[];
|
|
848
|
+
sessionId: string | undefined;
|
|
849
|
+
}) => Promise<BeforeAgentStartInternalMessage | undefined>;
|
|
850
|
+
|
|
825
851
|
export class AgentSession {
|
|
826
852
|
readonly agent: Agent;
|
|
827
853
|
readonly sessionManager: SessionManager;
|
|
@@ -922,6 +948,8 @@ export class AgentSession {
|
|
|
922
948
|
// Extension system
|
|
923
949
|
#extensionRunner: ExtensionRunner | undefined = undefined;
|
|
924
950
|
#turnIndex = 0;
|
|
951
|
+
// First-party internal before-agent-start contributors (not user hooks).
|
|
952
|
+
#beforeAgentStartContributors: BeforeAgentStartContributor[] = [];
|
|
925
953
|
|
|
926
954
|
#skills: Skill[];
|
|
927
955
|
#skillWarnings: SkillWarning[];
|
|
@@ -4344,7 +4372,10 @@ export class AgentSession {
|
|
|
4344
4372
|
|
|
4345
4373
|
async #activatePendingGjcGoalModeRequest(): Promise<boolean> {
|
|
4346
4374
|
if (!this.settings.get("goal.enabled")) return false;
|
|
4347
|
-
const pendingGoal = await consumePendingGoalModeRequest(
|
|
4375
|
+
const pendingGoal = await consumePendingGoalModeRequest(
|
|
4376
|
+
this.sessionManager.getCwd(),
|
|
4377
|
+
this.sessionManager.getSessionId(),
|
|
4378
|
+
);
|
|
4348
4379
|
if (!pendingGoal) return false;
|
|
4349
4380
|
const currentState = this.getGoalModeState();
|
|
4350
4381
|
if (currentState?.goal && currentState.goal.status !== "complete" && currentState.goal.status !== "dropped") {
|
|
@@ -4756,6 +4787,9 @@ export class AgentSession {
|
|
|
4756
4787
|
|
|
4757
4788
|
const beforeAgentStartSystemPrompt = await this.#buildSystemPromptForAgentStart(expandedText);
|
|
4758
4789
|
|
|
4790
|
+
const promptAttribution: "user" | "agent" | undefined =
|
|
4791
|
+
"attribution" in message ? message.attribution : undefined;
|
|
4792
|
+
|
|
4759
4793
|
// Emit before_agent_start extension event
|
|
4760
4794
|
if (this.#extensionRunner) {
|
|
4761
4795
|
const result = await this.#extensionRunner.emitBeforeAgentStart(
|
|
@@ -4764,19 +4798,7 @@ export class AgentSession {
|
|
|
4764
4798
|
beforeAgentStartSystemPrompt,
|
|
4765
4799
|
);
|
|
4766
4800
|
if (result?.messages) {
|
|
4767
|
-
|
|
4768
|
-
"attribution" in message ? message.attribution : undefined;
|
|
4769
|
-
for (const msg of result.messages) {
|
|
4770
|
-
messages.push({
|
|
4771
|
-
role: "custom",
|
|
4772
|
-
customType: msg.customType,
|
|
4773
|
-
content: msg.content,
|
|
4774
|
-
display: msg.display,
|
|
4775
|
-
details: msg.details,
|
|
4776
|
-
attribution: msg.attribution ?? promptAttribution ?? (message.role === "user" ? "user" : "agent"),
|
|
4777
|
-
timestamp: Date.now(),
|
|
4778
|
-
});
|
|
4779
|
-
}
|
|
4801
|
+
this.#appendBeforeAgentStartCustomMessages(messages, result.messages, promptAttribution, message.role);
|
|
4780
4802
|
}
|
|
4781
4803
|
|
|
4782
4804
|
if (result?.systemPrompt !== undefined) {
|
|
@@ -4788,6 +4810,26 @@ export class AgentSession {
|
|
|
4788
4810
|
this.agent.setSystemPrompt(beforeAgentStartSystemPrompt);
|
|
4789
4811
|
}
|
|
4790
4812
|
|
|
4813
|
+
// Invoke first-party internal before-agent-start contributors. These run
|
|
4814
|
+
// alongside the extension runner (not via user-loaded hooks) and append
|
|
4815
|
+
// through the same custom-message attribution path. Errors are nonfatal.
|
|
4816
|
+
if (this.#beforeAgentStartContributors.length > 0) {
|
|
4817
|
+
const contributed: BeforeAgentStartInternalMessage[] = [];
|
|
4818
|
+
for (const contributor of this.#beforeAgentStartContributors) {
|
|
4819
|
+
try {
|
|
4820
|
+
const msg = await contributor({
|
|
4821
|
+
prompt: expandedText,
|
|
4822
|
+
images: options?.images,
|
|
4823
|
+
sessionId: this.sessionId,
|
|
4824
|
+
});
|
|
4825
|
+
if (msg) contributed.push(msg);
|
|
4826
|
+
} catch (err) {
|
|
4827
|
+
logger.debug("before_agent_start contributor failed", { error: String(err) });
|
|
4828
|
+
}
|
|
4829
|
+
}
|
|
4830
|
+
this.#appendBeforeAgentStartCustomMessages(messages, contributed, promptAttribution, message.role);
|
|
4831
|
+
}
|
|
4832
|
+
|
|
4791
4833
|
// Bail out if a newer abort/prompt cycle has started since we began setup
|
|
4792
4834
|
if (this.#promptGeneration !== generation) {
|
|
4793
4835
|
return;
|
|
@@ -5028,6 +5070,10 @@ export class AgentSession {
|
|
|
5028
5070
|
this.#queueHiddenNextTurnMessage(message, true);
|
|
5029
5071
|
}
|
|
5030
5072
|
|
|
5073
|
+
queueDeferredMessageForTests(message: CustomMessage, triggerTurn = true): void {
|
|
5074
|
+
this.#queueHiddenNextTurnMessage(message, triggerTurn);
|
|
5075
|
+
}
|
|
5076
|
+
|
|
5031
5077
|
#queueHiddenNextTurnMessage(message: CustomMessage, triggerTurn: boolean): void {
|
|
5032
5078
|
this.#pendingNextTurnMessages.push(message);
|
|
5033
5079
|
if (!triggerTurn) return;
|
|
@@ -5200,6 +5246,46 @@ export class AgentSession {
|
|
|
5200
5246
|
);
|
|
5201
5247
|
}
|
|
5202
5248
|
|
|
5249
|
+
/** Remove undelivered queued custom messages matching `predicate` from executable queues and tagged display mirrors. */
|
|
5250
|
+
purgeQueuedCustomMessages(predicate: (message: CustomMessage) => boolean): PurgeQueuedCustomMessagesResult {
|
|
5251
|
+
const isMatch = (m: AgentMessage): boolean => m.role === "custom" && predicate(m as CustomMessage);
|
|
5252
|
+
const removedTags = new Set<string>();
|
|
5253
|
+
for (const m of [...this.agent.snapshotSteering(), ...this.agent.snapshotFollowUp()]) {
|
|
5254
|
+
if (isMatch(m)) {
|
|
5255
|
+
const tag = readPendingDisplayTag((m as CustomMessage).details);
|
|
5256
|
+
if (tag) removedTags.add(tag);
|
|
5257
|
+
}
|
|
5258
|
+
}
|
|
5259
|
+
const agentRemoved = this.agent.removeQueuedMessages(isMatch);
|
|
5260
|
+
const beforeNext = this.#pendingNextTurnMessages.length;
|
|
5261
|
+
for (const m of this.#pendingNextTurnMessages) {
|
|
5262
|
+
if (predicate(m)) {
|
|
5263
|
+
const tag = readPendingDisplayTag(m.details);
|
|
5264
|
+
if (tag) removedTags.add(tag);
|
|
5265
|
+
}
|
|
5266
|
+
}
|
|
5267
|
+
this.#pendingNextTurnMessages = this.#pendingNextTurnMessages.filter(m => !predicate(m));
|
|
5268
|
+
const pendingNextTurn = beforeNext - this.#pendingNextTurnMessages.length;
|
|
5269
|
+
let displaySteering = 0;
|
|
5270
|
+
let displayFollowUp = 0;
|
|
5271
|
+
if (removedTags.size > 0) {
|
|
5272
|
+
const beforeS = this.#steeringMessages.length;
|
|
5273
|
+
this.#steeringMessages = this.#steeringMessages.filter(e => !(e.tag && removedTags.has(e.tag)));
|
|
5274
|
+
displaySteering = beforeS - this.#steeringMessages.length;
|
|
5275
|
+
const beforeF = this.#followUpMessages.length;
|
|
5276
|
+
this.#followUpMessages = this.#followUpMessages.filter(e => !(e.tag && removedTags.has(e.tag)));
|
|
5277
|
+
displayFollowUp = beforeF - this.#followUpMessages.length;
|
|
5278
|
+
}
|
|
5279
|
+
return {
|
|
5280
|
+
agentSteering: agentRemoved.steering,
|
|
5281
|
+
agentFollowUp: agentRemoved.followUp,
|
|
5282
|
+
pendingNextTurn,
|
|
5283
|
+
displaySteering,
|
|
5284
|
+
displayFollowUp,
|
|
5285
|
+
totalExecutable: agentRemoved.total + pendingNextTurn,
|
|
5286
|
+
};
|
|
5287
|
+
}
|
|
5288
|
+
|
|
5203
5289
|
/**
|
|
5204
5290
|
* Send a user message to the agent.
|
|
5205
5291
|
* When deliverAs is set, queue the message instead of starting a new turn.
|
|
@@ -5674,7 +5760,7 @@ export class AgentSession {
|
|
|
5674
5760
|
/**
|
|
5675
5761
|
* Cycle through configured role models in a fixed order.
|
|
5676
5762
|
* Skips missing roles.
|
|
5677
|
-
* @param roleOrder - Order of roles to cycle through (e.g., ["
|
|
5763
|
+
* @param roleOrder - Order of roles to cycle through (e.g., ["default"])
|
|
5678
5764
|
* @param options - Optional settings: `temporary` to not persist to settings
|
|
5679
5765
|
*/
|
|
5680
5766
|
async cycleRoleModels(
|
|
@@ -6420,7 +6506,7 @@ export class AgentSession {
|
|
|
6420
6506
|
if (pruneResult) {
|
|
6421
6507
|
contextTokens = Math.max(0, contextTokens - pruneResult.tokensSaved);
|
|
6422
6508
|
}
|
|
6423
|
-
if (shouldCompact(contextTokens, contextWindow, compactionSettings)) {
|
|
6509
|
+
if (shouldCompact(contextTokens, contextWindow, compactionSettings, this.model?.maxTokens ?? 0)) {
|
|
6424
6510
|
// Try promotion first — if a larger model is available, switch instead of compacting
|
|
6425
6511
|
const promoted = await this.#tryContextPromotion(assistantMessage);
|
|
6426
6512
|
if (!promoted) {
|
|
@@ -7056,7 +7142,7 @@ export class AgentSession {
|
|
|
7056
7142
|
}
|
|
7057
7143
|
return new Error(
|
|
7058
7144
|
`Compaction requires usable credentials for ${currentModel.provider}/${currentModel.id}. ` +
|
|
7059
|
-
`Configure ${currentModel.provider} credentials or assign an authenticated fallback
|
|
7145
|
+
`Configure ${currentModel.provider} credentials or assign an authenticated fallback via modelRoles.default.`,
|
|
7060
7146
|
);
|
|
7061
7147
|
}
|
|
7062
7148
|
|
|
@@ -9622,6 +9708,42 @@ export class AgentSession {
|
|
|
9622
9708
|
return this.#extensionRunner?.hasHandlers(eventType) ?? false;
|
|
9623
9709
|
}
|
|
9624
9710
|
|
|
9711
|
+
/**
|
|
9712
|
+
* Register a first-party internal before-agent-start contributor. Returns an
|
|
9713
|
+
* unregister function. This is NOT user-facing hook discovery; it is an
|
|
9714
|
+
* in-core seam invoked alongside the extension runner.
|
|
9715
|
+
*/
|
|
9716
|
+
registerBeforeAgentStartContributor(contributor: BeforeAgentStartContributor): () => void {
|
|
9717
|
+
this.#beforeAgentStartContributors.push(contributor);
|
|
9718
|
+
return () => {
|
|
9719
|
+
const idx = this.#beforeAgentStartContributors.indexOf(contributor);
|
|
9720
|
+
if (idx !== -1) this.#beforeAgentStartContributors.splice(idx, 1);
|
|
9721
|
+
};
|
|
9722
|
+
}
|
|
9723
|
+
|
|
9724
|
+
/**
|
|
9725
|
+
* Append before-agent-start custom messages (from the extension runner or
|
|
9726
|
+
* internal contributors) using one shared attribution/defaulting path.
|
|
9727
|
+
*/
|
|
9728
|
+
#appendBeforeAgentStartCustomMessages(
|
|
9729
|
+
target: AgentMessage[],
|
|
9730
|
+
returned: readonly BeforeAgentStartInternalMessage[],
|
|
9731
|
+
promptAttribution: "user" | "agent" | undefined,
|
|
9732
|
+
messageRole: string,
|
|
9733
|
+
): void {
|
|
9734
|
+
for (const msg of returned) {
|
|
9735
|
+
target.push({
|
|
9736
|
+
role: "custom",
|
|
9737
|
+
customType: msg.customType,
|
|
9738
|
+
content: msg.content,
|
|
9739
|
+
display: msg.display,
|
|
9740
|
+
details: msg.details,
|
|
9741
|
+
attribution: msg.attribution ?? promptAttribution ?? (messageRole === "user" ? "user" : "agent"),
|
|
9742
|
+
timestamp: Date.now(),
|
|
9743
|
+
});
|
|
9744
|
+
}
|
|
9745
|
+
}
|
|
9746
|
+
|
|
9625
9747
|
/**
|
|
9626
9748
|
* Get the extension runner (for setting UI context and error handlers).
|
|
9627
9749
|
*/
|
|
@@ -104,7 +104,7 @@ export interface ModelChangeEntry extends SessionEntryBase {
|
|
|
104
104
|
type: "model_change";
|
|
105
105
|
/** Model in "provider/modelId" format */
|
|
106
106
|
model: string;
|
|
107
|
-
/** Role: "default"
|
|
107
|
+
/** Role: "default" or an agent role. Undefined treated as "default" */
|
|
108
108
|
role?: string;
|
|
109
109
|
}
|
|
110
110
|
|
package/src/task/agents.ts
CHANGED
|
@@ -59,7 +59,7 @@ const EMBEDDED_AGENT_DEFS: EmbeddedAgentDef[] = [
|
|
|
59
59
|
name: "task",
|
|
60
60
|
description: "General-purpose subagent with full capabilities for delegated multi-step tasks",
|
|
61
61
|
spawns: "*",
|
|
62
|
-
model: "pi/
|
|
62
|
+
model: "pi/default",
|
|
63
63
|
thinkingLevel: Effort.Medium,
|
|
64
64
|
hide: true,
|
|
65
65
|
},
|
package/src/tools/bash.ts
CHANGED
|
@@ -609,6 +609,8 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
|
|
|
609
609
|
label?: string;
|
|
610
610
|
ctx?: AgentToolContext;
|
|
611
611
|
onRawLine?: (line: string, jobId: string) => void;
|
|
612
|
+
shouldAcceptRawLine?: (jobId: string) => boolean;
|
|
613
|
+
lifecycle?: import("../async").AsyncJobLifecycleCleanup;
|
|
612
614
|
} = {},
|
|
613
615
|
): Promise<{ jobId: string; label: string; commandCwd: string }> {
|
|
614
616
|
const manager = AsyncJobManager.instance();
|
|
@@ -624,12 +626,14 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
|
|
|
624
626
|
let cursorOffset = 0;
|
|
625
627
|
let lineBuffer = "";
|
|
626
628
|
const dispatchLines = (chunk: string) => {
|
|
629
|
+
if (opts.shouldAcceptRawLine?.(currentJobId) === false) return;
|
|
627
630
|
if (!onRawLine) return;
|
|
628
631
|
lineBuffer += chunk;
|
|
629
632
|
let newlineIndex = lineBuffer.indexOf("\n");
|
|
630
633
|
while (newlineIndex !== -1) {
|
|
631
634
|
const line = lineBuffer.slice(0, newlineIndex);
|
|
632
635
|
lineBuffer = lineBuffer.slice(newlineIndex + 1);
|
|
636
|
+
if (opts.shouldAcceptRawLine?.(currentJobId) === false) return;
|
|
633
637
|
try {
|
|
634
638
|
onRawLine(line, currentJobId);
|
|
635
639
|
} catch (error) {
|
|
@@ -642,6 +646,7 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
|
|
|
642
646
|
};
|
|
643
647
|
const flushTrailingLine = () => {
|
|
644
648
|
if (!onRawLine) return;
|
|
649
|
+
if (opts.shouldAcceptRawLine?.(currentJobId) === false) return;
|
|
645
650
|
if (lineBuffer.length === 0) return;
|
|
646
651
|
const remainder = lineBuffer;
|
|
647
652
|
lineBuffer = "";
|
|
@@ -693,7 +698,7 @@ export class BashTool implements AgentTool<BashToolSchema, BashToolDetails> {
|
|
|
693
698
|
throw error instanceof Error ? error : new Error(String(error));
|
|
694
699
|
}
|
|
695
700
|
},
|
|
696
|
-
{ ownerId, metadata: { monitor: true } },
|
|
701
|
+
{ ownerId, metadata: { monitor: true }, lifecycle: opts.lifecycle },
|
|
697
702
|
);
|
|
698
703
|
currentJobId = jobId;
|
|
699
704
|
return { jobId, label, commandCwd: prepared.commandCwd };
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structured browser action space.
|
|
3
|
+
*
|
|
4
|
+
* Adapts the SOTA computer-use / browser-use pattern: instead of authoring raw
|
|
5
|
+
* JavaScript for every interaction, the model emits a list of structured verbs
|
|
6
|
+
* (navigate / click / type / …) that reference elements by the numeric `id`
|
|
7
|
+
* returned from {@link Observation}. Each verb is compiled onto the existing
|
|
8
|
+
* in-tab `tab.*` helpers and executed through the same worker `run` path, so the
|
|
9
|
+
* worker protocol is unchanged and the raw-JS `run` escape hatch still works.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
export type BrowserActionVerb =
|
|
13
|
+
| "navigate"
|
|
14
|
+
| "click"
|
|
15
|
+
| "type"
|
|
16
|
+
| "fill"
|
|
17
|
+
| "select"
|
|
18
|
+
| "press"
|
|
19
|
+
| "scroll"
|
|
20
|
+
| "back"
|
|
21
|
+
| "wait"
|
|
22
|
+
| "observe"
|
|
23
|
+
| "extract"
|
|
24
|
+
| "screenshot";
|
|
25
|
+
|
|
26
|
+
export interface BrowserActionStep {
|
|
27
|
+
verb: BrowserActionVerb;
|
|
28
|
+
/** Element id from a prior `observe` (preferred for click/type). */
|
|
29
|
+
id?: number;
|
|
30
|
+
/** CSS / puppeteer selector when not addressing by `id`. */
|
|
31
|
+
selector?: string;
|
|
32
|
+
/** Text to type. */
|
|
33
|
+
text?: string;
|
|
34
|
+
/** Value for `fill`. */
|
|
35
|
+
value?: string;
|
|
36
|
+
/** Option value(s) for `select`. */
|
|
37
|
+
values?: string[];
|
|
38
|
+
/** URL for `navigate`. */
|
|
39
|
+
url?: string;
|
|
40
|
+
/** Key for `press` (e.g. "Enter"). */
|
|
41
|
+
key?: string;
|
|
42
|
+
/** Horizontal scroll delta. */
|
|
43
|
+
dx?: number;
|
|
44
|
+
/** Vertical scroll delta. */
|
|
45
|
+
dy?: number;
|
|
46
|
+
/** Sleep duration for `wait` when no selector is given. */
|
|
47
|
+
ms?: number;
|
|
48
|
+
/** Extract format. */
|
|
49
|
+
format?: "markdown" | "text" | "html";
|
|
50
|
+
/** Navigation wait condition for `navigate`. */
|
|
51
|
+
wait_until?: "load" | "domcontentloaded" | "networkidle0" | "networkidle2";
|
|
52
|
+
/** Only return interactive/viewport elements for `observe`. */
|
|
53
|
+
viewport_only?: boolean;
|
|
54
|
+
include_all?: boolean;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const VERBS: ReadonlySet<BrowserActionVerb> = new Set([
|
|
58
|
+
"navigate",
|
|
59
|
+
"click",
|
|
60
|
+
"type",
|
|
61
|
+
"fill",
|
|
62
|
+
"select",
|
|
63
|
+
"press",
|
|
64
|
+
"scroll",
|
|
65
|
+
"back",
|
|
66
|
+
"wait",
|
|
67
|
+
"observe",
|
|
68
|
+
"extract",
|
|
69
|
+
"screenshot",
|
|
70
|
+
]);
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Validate a single step's required fields. Returns an error string, or
|
|
74
|
+
* `undefined` when the step is well-formed.
|
|
75
|
+
*/
|
|
76
|
+
export function validateActionStep(step: BrowserActionStep, index: number): string | undefined {
|
|
77
|
+
const where = `actions[${index}] (${step.verb})`;
|
|
78
|
+
if (!VERBS.has(step.verb)) return `${where}: unknown verb`;
|
|
79
|
+
switch (step.verb) {
|
|
80
|
+
case "navigate":
|
|
81
|
+
if (!step.url?.trim()) return `${where}: 'url' is required`;
|
|
82
|
+
return undefined;
|
|
83
|
+
case "click":
|
|
84
|
+
if (step.id === undefined && !step.selector?.trim()) return `${where}: 'id' or 'selector' is required`;
|
|
85
|
+
return undefined;
|
|
86
|
+
case "type":
|
|
87
|
+
if (step.id === undefined && !step.selector?.trim()) return `${where}: 'id' or 'selector' is required`;
|
|
88
|
+
if (step.text === undefined) return `${where}: 'text' is required`;
|
|
89
|
+
return undefined;
|
|
90
|
+
case "fill":
|
|
91
|
+
if (!step.selector?.trim()) return `${where}: 'selector' is required`;
|
|
92
|
+
if (step.value === undefined) return `${where}: 'value' is required`;
|
|
93
|
+
return undefined;
|
|
94
|
+
case "select":
|
|
95
|
+
if (!step.selector?.trim()) return `${where}: 'selector' is required`;
|
|
96
|
+
if (!step.values?.length) return `${where}: 'values' is required`;
|
|
97
|
+
return undefined;
|
|
98
|
+
case "press":
|
|
99
|
+
if (!step.key?.trim()) return `${where}: 'key' is required`;
|
|
100
|
+
return undefined;
|
|
101
|
+
case "scroll":
|
|
102
|
+
if (step.dx === undefined && step.dy === undefined) return `${where}: 'dx' or 'dy' is required`;
|
|
103
|
+
return undefined;
|
|
104
|
+
case "wait":
|
|
105
|
+
if (!step.selector?.trim() && step.ms === undefined) return `${where}: 'selector' or 'ms' is required`;
|
|
106
|
+
return undefined;
|
|
107
|
+
default:
|
|
108
|
+
// back / observe / extract / screenshot take no required fields
|
|
109
|
+
return undefined;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/** Validate the full step list. Throws on the first invalid step. */
|
|
114
|
+
export function validateActionSteps(steps: readonly BrowserActionStep[]): void {
|
|
115
|
+
if (steps.length === 0) throw new Error("browser 'act' requires a non-empty 'actions' list");
|
|
116
|
+
for (let i = 0; i < steps.length; i += 1) {
|
|
117
|
+
const error = validateActionStep(steps[i]!, i);
|
|
118
|
+
if (error) throw new Error(error);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Compile structured steps into a JS program for the in-tab `run` worker. Steps
|
|
124
|
+
* are embedded as parsed JSON (no string interpolation, so values cannot inject
|
|
125
|
+
* code) and dispatched by a fixed interpreter against the `tab` / `page` helpers.
|
|
126
|
+
*/
|
|
127
|
+
export function compileActionSteps(steps: readonly BrowserActionStep[]): string {
|
|
128
|
+
validateActionSteps(steps);
|
|
129
|
+
const stepsLiteral = JSON.stringify(JSON.stringify(steps));
|
|
130
|
+
return `
|
|
131
|
+
const __steps = JSON.parse(${stepsLiteral});
|
|
132
|
+
const __results = [];
|
|
133
|
+
for (const s of __steps) {
|
|
134
|
+
switch (s.verb) {
|
|
135
|
+
case "navigate":
|
|
136
|
+
await tab.goto(s.url, s.wait_until ? { waitUntil: s.wait_until } : undefined);
|
|
137
|
+
__results.push({ verb: "navigate", url: s.url });
|
|
138
|
+
break;
|
|
139
|
+
case "click":
|
|
140
|
+
if (s.id !== undefined && s.id !== null) { await (await tab.id(s.id)).click(); }
|
|
141
|
+
else { await tab.click(s.selector); }
|
|
142
|
+
__results.push({ verb: "click", id: s.id ?? null, selector: s.selector ?? null });
|
|
143
|
+
break;
|
|
144
|
+
case "type":
|
|
145
|
+
if (s.id !== undefined && s.id !== null) { await (await tab.id(s.id)).type(s.text); }
|
|
146
|
+
else { await tab.type(s.selector, s.text); }
|
|
147
|
+
__results.push({ verb: "type", id: s.id ?? null, selector: s.selector ?? null });
|
|
148
|
+
break;
|
|
149
|
+
case "fill":
|
|
150
|
+
await tab.fill(s.selector, s.value);
|
|
151
|
+
__results.push({ verb: "fill", selector: s.selector });
|
|
152
|
+
break;
|
|
153
|
+
case "select":
|
|
154
|
+
__results.push({ verb: "select", selected: await tab.select(s.selector, ...(s.values || [])) });
|
|
155
|
+
break;
|
|
156
|
+
case "press":
|
|
157
|
+
await tab.press(s.key, s.selector ? { selector: s.selector } : undefined);
|
|
158
|
+
__results.push({ verb: "press", key: s.key });
|
|
159
|
+
break;
|
|
160
|
+
case "scroll":
|
|
161
|
+
await tab.scroll(s.dx || 0, s.dy || 0);
|
|
162
|
+
__results.push({ verb: "scroll", dx: s.dx || 0, dy: s.dy || 0 });
|
|
163
|
+
break;
|
|
164
|
+
case "back":
|
|
165
|
+
await page.goBack();
|
|
166
|
+
__results.push({ verb: "back" });
|
|
167
|
+
break;
|
|
168
|
+
case "wait":
|
|
169
|
+
if (s.selector) { await tab.waitFor(s.selector); }
|
|
170
|
+
else { await new Promise(r => setTimeout(r, s.ms)); }
|
|
171
|
+
__results.push({ verb: "wait", selector: s.selector ?? null, ms: s.ms ?? null });
|
|
172
|
+
break;
|
|
173
|
+
case "observe":
|
|
174
|
+
__results.push({ verb: "observe", observation: await tab.observe({ viewportOnly: s.viewport_only === true, includeAll: s.include_all === true }) });
|
|
175
|
+
break;
|
|
176
|
+
case "extract":
|
|
177
|
+
__results.push({ verb: "extract", content: await tab.extract(s.format || "markdown") });
|
|
178
|
+
break;
|
|
179
|
+
case "screenshot":
|
|
180
|
+
await tab.screenshot({});
|
|
181
|
+
__results.push({ verb: "screenshot" });
|
|
182
|
+
break;
|
|
183
|
+
default:
|
|
184
|
+
throw new Error("Unknown browser action verb: " + s.verb);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
return __results;
|
|
188
|
+
`;
|
|
189
|
+
}
|
package/src/tools/browser.ts
CHANGED
|
@@ -3,6 +3,7 @@ import { prompt, untilAborted } from "@gajae-code/utils";
|
|
|
3
3
|
import * as z from "zod/v4";
|
|
4
4
|
import browserDescription from "../prompts/tools/browser.md" with { type: "text" };
|
|
5
5
|
import type { ToolSession } from "../sdk";
|
|
6
|
+
import { type BrowserActionStep, compileActionSteps } from "./browser/actions";
|
|
6
7
|
import { acquireBrowser, type BrowserHandle, type BrowserKind, type BrowserKindTag } from "./browser/registry";
|
|
7
8
|
import type { Observation, ScreenshotResult } from "./browser/tab-protocol";
|
|
8
9
|
import { acquireTab, dropHeadlessTabs, getTab, releaseAllTabs, releaseTab, runInTab } from "./browser/tab-supervisor";
|
|
@@ -24,8 +25,44 @@ const appSchema = z.object({
|
|
|
24
25
|
target: z.string().describe("substring to pick a window").optional(),
|
|
25
26
|
});
|
|
26
27
|
|
|
28
|
+
const actionStepSchema = z.object({
|
|
29
|
+
verb: z
|
|
30
|
+
.enum([
|
|
31
|
+
"navigate",
|
|
32
|
+
"click",
|
|
33
|
+
"type",
|
|
34
|
+
"fill",
|
|
35
|
+
"select",
|
|
36
|
+
"press",
|
|
37
|
+
"scroll",
|
|
38
|
+
"back",
|
|
39
|
+
"wait",
|
|
40
|
+
"observe",
|
|
41
|
+
"extract",
|
|
42
|
+
"screenshot",
|
|
43
|
+
])
|
|
44
|
+
.describe("structured action verb"),
|
|
45
|
+
id: z.number().describe("element id from a prior observe").optional(),
|
|
46
|
+
selector: z.string().describe("css/puppeteer selector").optional(),
|
|
47
|
+
text: z.string().describe("text to type").optional(),
|
|
48
|
+
value: z.string().describe("value for fill").optional(),
|
|
49
|
+
values: z.array(z.string()).describe("option value(s) for select").optional(),
|
|
50
|
+
url: z.string().describe("url for navigate").optional(),
|
|
51
|
+
key: z.string().describe("key for press, e.g. Enter").optional(),
|
|
52
|
+
dx: z.number().describe("horizontal scroll delta").optional(),
|
|
53
|
+
dy: z.number().describe("vertical scroll delta").optional(),
|
|
54
|
+
ms: z.number().describe("sleep ms for wait without selector").optional(),
|
|
55
|
+
format: z.enum(["markdown", "text", "html"]).describe("extract format").optional(),
|
|
56
|
+
wait_until: z
|
|
57
|
+
.enum(["load", "domcontentloaded", "networkidle0", "networkidle2"])
|
|
58
|
+
.describe("navigation wait condition for navigate")
|
|
59
|
+
.optional(),
|
|
60
|
+
viewport_only: z.boolean().describe("observe: only viewport elements").optional(),
|
|
61
|
+
include_all: z.boolean().describe("observe: include non-interactive elements").optional(),
|
|
62
|
+
});
|
|
63
|
+
|
|
27
64
|
const browserSchema = z.object({
|
|
28
|
-
action: z.enum(["open", "close", "run"] as const).describe("operation"),
|
|
65
|
+
action: z.enum(["open", "close", "run", "act"] as const).describe("operation"),
|
|
29
66
|
name: z.string().describe("tab id (default 'main')").optional(),
|
|
30
67
|
url: z.string().describe("url to open").optional(),
|
|
31
68
|
app: appSchema.optional(),
|
|
@@ -45,6 +82,7 @@ const browserSchema = z.object({
|
|
|
45
82
|
.describe("auto-handle dialogs")
|
|
46
83
|
.optional(),
|
|
47
84
|
code: z.string().describe("js body to run in tab").optional(),
|
|
85
|
+
actions: z.array(actionStepSchema).describe("structured action steps for action 'act'").optional(),
|
|
48
86
|
timeout: z.number().default(30).describe("timeout in seconds (default 30, max 300)").optional(),
|
|
49
87
|
all: z.boolean().describe("close every tab").optional(),
|
|
50
88
|
kill: z.boolean().describe("also kill spawned-app browsers").optional(),
|
|
@@ -126,6 +164,8 @@ export class BrowserTool implements AgentTool<typeof browserSchema, BrowserToolD
|
|
|
126
164
|
return await this.#close(name, params, details, signal);
|
|
127
165
|
case "run":
|
|
128
166
|
return await this.#run(name, params, details, timeoutMs, signal);
|
|
167
|
+
case "act":
|
|
168
|
+
return await this.#act(name, params, details, timeoutMs, signal);
|
|
129
169
|
default:
|
|
130
170
|
throw new ToolError(`Unsupported action: ${(params as BrowserParams).action}`);
|
|
131
171
|
}
|
|
@@ -259,6 +299,56 @@ export class BrowserTool implements AgentTool<typeof browserSchema, BrowserToolD
|
|
|
259
299
|
details.result = textOnly;
|
|
260
300
|
return toolResult(details).content(content).done();
|
|
261
301
|
}
|
|
302
|
+
|
|
303
|
+
async #act(
|
|
304
|
+
name: string,
|
|
305
|
+
params: BrowserParams,
|
|
306
|
+
details: BrowserToolDetails,
|
|
307
|
+
timeoutMs: number,
|
|
308
|
+
signal?: AbortSignal,
|
|
309
|
+
): Promise<AgentToolResult<BrowserToolDetails>> {
|
|
310
|
+
const steps = (params.actions ?? []) as BrowserActionStep[];
|
|
311
|
+
if (steps.length === 0) {
|
|
312
|
+
throw new ToolError("Missing required parameter 'actions' for action 'act'.");
|
|
313
|
+
}
|
|
314
|
+
const tab = getTab(name);
|
|
315
|
+
if (!tab) {
|
|
316
|
+
throw new ToolError(`No tab named ${JSON.stringify(name)}. Open it first with action 'open'.`);
|
|
317
|
+
}
|
|
318
|
+
details.browser = tab.browser.kind.kind;
|
|
319
|
+
details.url = tab.info.url;
|
|
320
|
+
|
|
321
|
+
// compileActionSteps validates each step and produces injection-safe code
|
|
322
|
+
// (steps embedded as parsed JSON) for the existing in-tab run worker.
|
|
323
|
+
let code: string;
|
|
324
|
+
try {
|
|
325
|
+
code = compileActionSteps(steps);
|
|
326
|
+
} catch (error) {
|
|
327
|
+
throw new ToolError(error instanceof Error ? error.message : String(error));
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
const { displays, returnValue, screenshots } = await runInTab(name, {
|
|
331
|
+
code,
|
|
332
|
+
timeoutMs,
|
|
333
|
+
signal,
|
|
334
|
+
session: this.session,
|
|
335
|
+
});
|
|
336
|
+
|
|
337
|
+
if (screenshots.length) details.screenshots = screenshots;
|
|
338
|
+
const content = [...displays];
|
|
339
|
+
if (returnValue !== undefined) {
|
|
340
|
+
content.push({ type: "text", text: stringifyReturnValue(returnValue) });
|
|
341
|
+
}
|
|
342
|
+
if (!content.length) {
|
|
343
|
+
content.push({ type: "text", text: `Ran ${steps.length} action(s) on tab ${JSON.stringify(name)}` });
|
|
344
|
+
}
|
|
345
|
+
const textOnly = content
|
|
346
|
+
.filter((c): c is { type: "text"; text: string } => c.type === "text")
|
|
347
|
+
.map(c => c.text)
|
|
348
|
+
.join("\n");
|
|
349
|
+
details.result = textOnly;
|
|
350
|
+
return toolResult(details).content(content).done();
|
|
351
|
+
}
|
|
262
352
|
}
|
|
263
353
|
|
|
264
354
|
function describeBrowser(handle: BrowserHandle): string {
|