npm - @electric-ax/agents - Versions diffs - 0.4.17 → 0.4.19 - Mend

@electric-ax/agents 0.4.17 → 0.4.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/entrypoint.js CHANGED Viewed

@@ -4,8 +4,8 @@ import { cacheStores, getGlobalDispatcher, interceptors, setGlobalDispatcher } f
 import fs from "node:fs";
 import pino from "pino";
 import { fileURLToPath } from "node:url";
-import { MOONSHOT_API_BASE_URL, MOONSHOT_PROVIDER, appendPathToUrl, buildSkillSlashCommands, completeWithLowCostModel, createContextSkillLoader, createEntityRegistry, createPullWakeRunner, createRuntimeHandler, createSkillsRegistry, db, detectAvailableProviders, getMoonshotApiKey, getMoonshotModel, getMoonshotModels, pgSync, readCodexAccessToken, registerToolProvider, unregisterToolProvider } from "@electric-ax/agents-runtime";
-import { braveSearchTool, createBashTool, createEditTool, createEventSourceTools, createFetchUrlTool, createReadFileTool, createScheduleTools, createSendTool, createWriteTool } from "@electric-ax/agents-runtime/tools";
+import { GOAL_SLASH_COMMAND, MOONSHOT_API_BASE_URL, MOONSHOT_PROVIDER, appendPathToUrl, buildSkillSlashCommands, commentsCollection, completeWithLowCostModel, createContextSkillLoader, createEntityRegistry, createPullWakeRunner, createRuntimeHandler, createSkillsRegistry, db, detectAvailableProviders, dispatchGoalCommand, formatTokenCount, getMoonshotApiKey, getMoonshotModel, getMoonshotModels, isGoalCommandText, parseGoalCommand, pgSync, readCodexAccessToken, registerToolProvider, unregisterToolProvider } from "@electric-ax/agents-runtime";
+import { braveSearchTool, createBashTool, createEditTool, createFetchUrlTool, createMarkGoalCompleteTool, createReadFileTool, createScheduleTools, createSendTool, createWebhookSourceTools, createWriteTool } from "@electric-ax/agents-runtime/tools";
 import { chooseDefaultSandbox, isE2BAvailable, lazySandbox, remoteSandbox } from "@electric-ax/agents-runtime/sandbox";
 import { z } from "zod";
 import { createHash } from "node:crypto";
@@ -814,7 +814,7 @@ function createSpawnWorkerTool(ctx, modelConfig) {
 //#endregion
 //#region src/tools/observe-pg-sync.ts
-function asToolResult(value) {
+function asToolResult$1(value) {
 	return {
 		content: [{
 			type: `text`,
@@ -832,9 +832,9 @@ function createObservePgSyncTool(ctx) {
 	return {
 		name: `observe_pg_sync`,
 		label: `Observe Postgres Sync`,
-		description: `Observe an Electric Postgres shape stream and wake this agent when matching row changes arrive.`,
+		description: `Observe an Electric Postgres shape stream and wake this agent when matching row changes arrive. Requires the HTTP(S) URL of an Electric shape endpoint — ask the user for it if you don't know it. Registration validates the endpoint up front and fails with Electric's error if the shape can't be fetched.`,
 		parameters: Type.Object({
-			url: Type.Optional(Type.String({ description: `Optional Electric shape endpoint URL. Defaults to the server-configured pg-sync URL.` })),
+			url: Type.String({ description: `HTTP(S) URL of the Electric shape endpoint, e.g. http://localhost:3000/v1/shape. Not a postgres:// connection string. Never guess this — ask the user if it hasn't been provided.` }),
 			table: Type.String({
 				minLength: 1,
 				pattern: `\\S`,
@@ -851,6 +851,7 @@ function createObservePgSyncTool(ctx) {
 		}),
 		execute: async (_toolCallId, params) => {
 			const args = params;
+			if (typeof args.url !== `string` || args.url.trim().length === 0) throw new Error(`url is required`);
 			if (typeof args.table !== `string` || args.table.trim().length === 0) throw new Error(`table is required`);
 			const source = pgSync({
 				url: args.url,
@@ -865,16 +866,79 @@ function createObservePgSyncTool(ctx) {
 				...args.wake?.ops ? { ops: args.wake.ops } : {},
 				...args.wake?.debounceMs !== void 0 ? { debounceMs: args.wake.debounceMs } : {}
 			};
-			await ctx.observe(source, { wake });
-			return asToolResult({
-				sourceRef: source.sourceRef,
-				streamUrl: source.streamUrl,
+			const handle = await ctx.observe(source, { wake });
+			if (!handle.streamUrl) throw new Error(`pg-sync observation did not return a stream URL for ${handle.sourceRef}`);
+			return asToolResult$1({
+				sourceRef: handle.sourceRef,
+				streamUrl: handle.streamUrl,
 				wake
 			});
 		}
 	};
 }
+//#endregion
+//#region src/tools/unobserve-pg-sync.ts
+function asToolResult(value) {
+	return {
+		content: [{
+			type: `text`,
+			text: typeof value === `string` ? value : JSON.stringify(value, null, 2)
+		}],
+		details: {}
+	};
+}
+function isRecord$1(value) {
+	return typeof value === `object` && value !== null && !Array.isArray(value);
+}
+function listPgSyncObservations(ctx) {
+	const manifests = ctx.db.collections.manifests?.toArray;
+	if (!Array.isArray(manifests)) return [];
+	const observations = [];
+	for (const entry of manifests) {
+		if (!isRecord$1(entry) || entry.kind !== `source` || entry.sourceType !== `pgSync` || typeof entry.sourceRef !== `string`) continue;
+		const config = isRecord$1(entry.config) ? entry.config : {};
+		observations.push({
+			sourceRef: entry.sourceRef,
+			...typeof config.table === `string` ? { table: config.table } : {},
+			...typeof config.url === `string` ? { url: config.url } : {},
+			...typeof entry.streamUrl === `string` ? { streamUrl: entry.streamUrl } : {}
+		});
+	}
+	return observations.sort((left, right) => left.sourceRef.localeCompare(right.sourceRef));
+}
+function createUnobservePgSyncTool(ctx) {
+	return {
+		name: `unobserve_pg_sync`,
+		label: `Stop Observing Postgres Sync`,
+		description: `Stop being woken by a Postgres shape stream you previously observed with observe_pg_sync. Identify the observation by its sourceRef (preferred) or table. Call with no arguments to list your active pg-sync observations. This only removes your own subscription; any other agents observing the same shape keep their stream.`,
+		parameters: Type.Object({
+			sourceRef: Type.Optional(Type.String({ description: `The sourceRef returned by observe_pg_sync. Preferred — unambiguous.` })),
+			table: Type.Optional(Type.String({ description: `The observed table name. Used only when sourceRef is not given; fails if more than one observation matches.` }))
+		}),
+		execute: async (_toolCallId, params) => {
+			const args = params;
+			const observations = listPgSyncObservations(ctx);
+			if (!args.sourceRef && !args.table) return asToolResult(observations.length > 0 ? { observations } : `You have no active pg-sync observations.`);
+			let sourceRef = args.sourceRef;
+			if (!sourceRef) {
+				const matches = observations.filter((o) => o.table === args.table);
+				if (matches.length === 0) return asToolResult(`No active pg-sync observation found for table "${args.table}".`);
+				if (matches.length > 1) return asToolResult({
+					error: `Multiple pg-sync observations match table "${args.table}"; pass a sourceRef instead.`,
+					matches
+				});
+				sourceRef = matches[0].sourceRef;
+			} else if (!observations.some((o) => o.sourceRef === sourceRef)) return asToolResult(`No active pg-sync observation found for sourceRef "${sourceRef}".`);
+			await ctx.unobserve(sourceRef);
+			return asToolResult({
+				unobserved: true,
+				sourceRef
+			});
+		}
+	};
+}
 //#endregion
 //#region src/tools/fork.ts
 function createForkTool(ctx) {
@@ -1087,25 +1151,66 @@ function filterChoicesByEnabledModels(choices, values) {
 	const filtered = choices.filter((choice) => enabled.has(choice.value));
 	return filtered.length > 0 ? filtered : choices;
 }
+/**
+* Anthropic-specific budget mapping for `reasoningEffort`.
+*
+* Anthropic's `thinking.budget_tokens` is a hard cap on tokens spent
+* inside the thinking block before the model must commit to its
+* answer. Docs require ≥ 1024; we scale from there. Numbers tuned so
+* `medium` is the spot most "show your work" requests land, and
+* `high` covers tougher reasoning without uncapped spend.
+*
+* Keep in sync with provider doc updates — Anthropic has shifted the
+* minimum once already (older models capped lower).
+*/
+const ANTHROPIC_THINKING_BUDGET_BY_EFFORT = {
+	minimal: 1024,
+	low: 2048,
+	medium: 8192,
+	high: 24576
+};
 function withProviderPayloadDefaults(config, choice, reasoningEffort) {
-	if (choice.provider !== `openai` && choice.provider !== `openai-codex` || !choice.reasoning) return config;
-	const defaultEffort = choice.provider === `openai-codex` ? `low` : `minimal`;
-	const effort = reasoningEffort === `minimal` && choice.provider === `openai-codex` ? `low` : reasoningEffort ?? defaultEffort;
-	return {
-		...config,
-		onPayload: (payload) => {
-			if (typeof payload !== `object` || payload === null) return void 0;
-			const body = payload;
-			const existingReasoning = typeof body.reasoning === `object` && body.reasoning !== null ? body.reasoning : {};
-			return {
-				...body,
-				reasoning: {
-					...existingReasoning,
-					effort
-				}
-			};
-		}
-	};
+	if (!choice.reasoning) return config;
+	if (choice.provider === `openai` || choice.provider === `openai-codex`) {
+		const defaultEffort = choice.provider === `openai-codex` ? `low` : `minimal`;
+		const effort = reasoningEffort === `minimal` && choice.provider === `openai-codex` ? `low` : reasoningEffort ?? defaultEffort;
+		return {
+			...config,
+			onPayload: (payload) => {
+				if (typeof payload !== `object` || payload === null) return void 0;
+				const body = payload;
+				const existingReasoning = typeof body.reasoning === `object` && body.reasoning !== null ? body.reasoning : {};
+				return {
+					...body,
+					reasoning: {
+						...existingReasoning,
+						effort
+					}
+				};
+			}
+		};
+	}
+	if (choice.provider === `anthropic`) {
+		const effectiveEffort = reasoningEffort ?? `minimal`;
+		const budgetTokens = ANTHROPIC_THINKING_BUDGET_BY_EFFORT[effectiveEffort];
+		return {
+			...config,
+			onPayload: (payload) => {
+				if (typeof payload !== `object` || payload === null) return void 0;
+				const body = payload;
+				const existingThinking = typeof body.thinking === `object` && body.thinking !== null ? body.thinking : {};
+				return {
+					...body,
+					thinking: {
+						...existingThinking,
+						type: `enabled`,
+						budget_tokens: budgetTokens
+					}
+				};
+			}
+		};
+	}
+	return config;
 }
 function parseReasoningEffort(value) {
 	return value === `minimal` || value === `low` || value === `medium` || value === `high` ? value : null;
@@ -1265,7 +1370,7 @@ async function generateTitle(userMessage, llmCall, onFallback) {
 }
 function buildHortonSystemPrompt(workingDirectory, opts = {}) {
 	const docsTools = opts.hasDocsSupport ? `\n- search_electric_agents_docs: hybrid search over the built-in Electric Agents docs index` : ``;
-	const eventSourceTools = opts.hasEventSourceTools ? `\n- list_event_sources: list external webhook/event feeds you can subscribe to, including available buckets and parameters\n- subscribe_event_source: subscribe yourself to one of those feeds or buckets so matching future events wake you\n- list_event_source_subscriptions: list your active event source subscriptions\n- unsubscribe_event_source: remove one of your event source subscriptions by id` : ``;
+	const webhookSourceTools = opts.hasWebhookSourceTools ? `\n- list_webhook_sources: list external webhook feeds you can subscribe to, including available buckets and parameters\n- subscribe_webhook_source: subscribe yourself to one of those feeds or buckets so matching future webhooks wake you\n- list_webhook_source_subscriptions: list your active webhook source subscriptions\n- unsubscribe_webhook_source: remove one of your webhook source subscriptions by id` : ``;
 	const titleTool = `\n- set_title: set or rename this chat session's UI title`;
 	const scheduleTools = opts.hasScheduleTools ? `\n- upsert_cron_schedule: create or update a recurring cron wake for yourself. Always include payload with the concrete instruction/message you should receive when the cron fires.\n- delete_schedule: delete one of your cron or future-send schedules by stable id\n- list_schedules: list your manifest-backed cron and future-send schedules` : ``;
 	const skillsTools = opts.hasSkills ? `\n- use_skill: load a skill (knowledge, instructions, or a tutorial) into your context to help with the user's request\n- remove_skill: unload a skill from context when you're done with it` : ``;
@@ -1322,9 +1427,10 @@ When a user opens with a greeting ("hi", "hello", "hey", etc.) or a broad statem
 - fetch_url: fetch and convert a URL to markdown
 - spawn_worker: dispatch a subagent for an isolated task
 - fork: spawn a child session that inherits this conversation's history up to the latest completed response. Same parent-ownership model as spawn_worker — when the fork's next run finishes, you'll wake with its response.
-- observe_pg_sync: observe an Electric Postgres sync stream and wake on matching changes
+- observe_pg_sync: observe an Electric Postgres sync stream and wake on matching changes (see "Observing Postgres tables")
+- unobserve_pg_sync: stop being woken by a pg-sync stream you previously observed (see "Observing Postgres tables")
 - send: send a message to an Electric Agent/entity. To schedule future work for yourself, call send with self: true and afterMs.
-${eventSourceTools}${titleTool}${scheduleTools}${docsTools}${skillsTools}
+${webhookSourceTools}${titleTool}${scheduleTools}${docsTools}${skillsTools}
 # Working with files
 - Prefer edit over write when modifying existing files.
@@ -1332,6 +1438,14 @@ ${eventSourceTools}${titleTool}${scheduleTools}${docsTools}${skillsTools}
 - Use absolute paths or paths relative to the current working directory.
 ${modelGuidance}${docsGuidance}${skillsGuidance}${onboardingGuidance}${docsUrlGuidance}
+# Observing Postgres tables
+observe_pg_sync subscribes you to row changes in a Postgres table via an Electric shape stream:
+- The \`url\` parameter is the HTTP(S) URL of an Electric shape endpoint (e.g. \`http://localhost:3000/v1/shape\`). It is NOT a \`postgres://\` connection string and there is no default — if the user hasn't given you the endpoint URL, ask for it. Never guess or invent one.
+- Registration validates the endpoint by fetching the shape log first. If it fails, the error includes Electric's response or the failure reason — use it to correct the table name, where clause, or URL, or relay it to the user.
+- Use \`where\` and \`columns\` to narrow the shape so you only wake on changes you care about; use \`wake.ops\` to filter by operation and \`wake.debounceMs\` to batch bursts.
+- The observation persists across wakes — register it once, don't re-register on every wake.
+- To stop, call unobserve_pg_sync with the sourceRef from observe_pg_sync (or the table name). Call it with no arguments to list your active observations. This only ends your own subscription.
 # Risky actions
 Pause and confirm with the user before:
 - Destructive operations (deleting files, rm -rf, dropping data, force-pushing)
@@ -1369,7 +1483,18 @@ Workflow when forking yourself for parallel exploration:
 Report outcomes faithfully. If a command failed, say so with the relevant output. If you didn't run a verification step, say that rather than implying you did. Don't hedge confirmed results with unnecessary disclaimers.
 Working directory: ${workingDirectory}
-The current year is ${new Date().getFullYear()}.`;
+The current year is ${new Date().getFullYear()}.${buildGoalGuidance(opts.activeGoal)}`;
+}
+function buildGoalGuidance(goal) {
+	if (!goal) return ``;
+	const budgetLine = goal.tokenBudget === null ? `unlimited` : `${goal.tokensUsed} / ${goal.tokenBudget} tokens used`;
+	return `
+# Active goal
+- Objective: ${goal.objective}
+- Token budget: ${budgetLine}
+The user set this goal with /goal set. Work autonomously toward it: do NOT ask the user clarifying questions or pause for confirmation — make reasonable assumptions and proceed. When you believe the goal is met, call the \`mark_goal_complete\` tool. If you hit a blocker that genuinely requires the user (e.g. credentials, a destructive action), call \`mark_goal_complete\` with a summary explaining what's needed. The runtime will abort this run automatically if you exceed the token budget.`;
 }
 function getToolName(tool) {
 	if (typeof tool !== `object` || tool === null) return null;
@@ -1392,8 +1517,10 @@ function createHortonTools(sandbox, ctx, readSet, opts = {}) {
 		createSpawnWorkerTool(ctx, opts.modelConfig),
 		createForkTool(ctx),
 		createObservePgSyncTool(ctx),
+		createUnobservePgSyncTool(ctx),
 		createSetTitleTool(ctx),
 		createSendTool(ctx.send, { selfEntityUrl: ctx.entityUrl }),
+		...ctx.getGoal()?.status === `active` ? [createMarkGoalCompleteTool(ctx)] : [],
 		...opts.docsSearchTool ? [opts.docsSearchTool] : []
 	];
 }
@@ -1462,11 +1589,58 @@ async function readAgentsMd(sandbox) {
 		return null;
 	}
 }
+function extractWakeText(wake) {
+	if (wake.type !== `inbox`) return null;
+	const payload = wake.payload;
+	if (typeof payload === `string`) return payload;
+	if (payload && typeof payload === `object`) {
+		const record = payload;
+		if (typeof record.text === `string`) return record.text;
+		if (typeof record.source === `string`) return record.source;
+	}
+	return null;
+}
+async function tryHandleSlashCommand(ctx, wake) {
+	const text = extractWakeText(wake);
+	if (text === null) return false;
+	if (isGoalCommandText(text)) {
+		const command = parseGoalCommand(text);
+		const result = dispatchGoalCommand(ctx, command);
+		if (result.message) {
+			serverLog.info(`[horton ${ctx.entityUrl}] ${result.message}`);
+			writeSlashCommandReply(ctx, result.message);
+		}
+		if (command.kind === `set`) await kickoffGoalRun(ctx);
+		return result.handled;
+	}
+	return false;
+}
+const GOAL_KICKOFF_TEXT = `Start working toward the active goal now. Call \`mark_goal_complete\` when you believe it is done.`;
+async function kickoffGoalRun(ctx) {
+	const goal = ctx.getGoal();
+	if (!goal || goal.status !== `active`) return;
+	try {
+		await ctx.send(ctx.entityUrl, {
+			kind: `goal_kickoff`,
+			text: GOAL_KICKOFF_TEXT
+		}, { type: `inbox` });
+	} catch (err) {
+		serverLog.warn(`[horton ${ctx.entityUrl}] failed to enqueue goal kickoff: ${err instanceof Error ? err.message : String(err)}`);
+	}
+}
+function writeSlashCommandReply(ctx, text) {
+	try {
+		ctx.replyText(text);
+	} catch (err) {
+		serverLog.warn(`[horton ${ctx.entityUrl}] failed to render slash command reply: ${err instanceof Error ? err.message : String(err)}`);
+	}
+}
 function createAssistantHandler(options) {
 	const { streamFn, docsSupport, docsSearchTool, skillsRegistry, modelCatalog, docsUrl } = options;
 	const skillLoader = createContextSkillLoader(skillsRegistry, { slashCommandOwner: HORTON_SKILLS_SLASH_COMMAND_OWNER });
 	const hasSkills = skillLoader.hasSkills;
 	return async function assistantHandler(ctx, wake) {
+		if (await tryHandleSlashCommand(ctx, wake)) return;
 		const loadedSkills = await skillLoader.load(ctx);
 		const readSet = new Set();
 		const modelConfig = resolveBuiltinModelConfig(modelCatalog, ctx.args);
@@ -1484,7 +1658,7 @@ function createAssistantHandler(options) {
 			...loadedSkills.tools,
 			...mcp.tools()
 		];
-		const hasEventSourceTools = tools.some((tool) => getToolName(tool) === `list_event_sources`);
+		const hasWebhookSourceTools = tools.some((tool) => getToolName(tool) === `list_webhook_sources`);
 		const hasScheduleTools = tools.some((tool) => getToolName(tool) === `upsert_cron_schedule`);
 		const titlePromise = !ctx.tags.title ? (async () => {
 			const firstUserMessage = await extractFirstUserMessage(ctx);
@@ -1559,6 +1733,26 @@ function createAssistantHandler(options) {
 				}
 			}
 		});
+		const goal = ctx.getGoal();
+		const enforcedGoal = goal && goal.status === `active` ? goal : void 0;
+		const activeGoalPromptInfo = enforcedGoal ? {
+			objective: enforcedGoal.objective,
+			tokenBudget: enforcedGoal.tokenBudget,
+			tokensUsed: enforcedGoal.tokensUsed
+		} : void 0;
+		const budgetAbort = new AbortController();
+		let runTokensUsed = enforcedGoal?.tokensUsed ?? 0;
+		let budgetTripped = false;
+		const onStepEnd = enforcedGoal ? (stats) => {
+			if (budgetTripped) return;
+			runTokensUsed += stats.uncachedInput + stats.output;
+			ctx.updateGoalUsage(runTokensUsed);
+			if (enforcedGoal.tokenBudget !== null && runTokensUsed >= enforcedGoal.tokenBudget) {
+				budgetTripped = true;
+				serverLog.info(`[horton ${ctx.entityUrl}] goal budget exhausted (${runTokensUsed} tokens) — aborting run`);
+				budgetAbort.abort();
+			}
+		} : void 0;
 		ctx.useAgent({
 			systemPrompt: buildHortonSystemPrompt(sandboxCwd, {
 				hasDocsSupport: Boolean(docsSupport),
@@ -1566,14 +1760,27 @@ function createAssistantHandler(options) {
 				docsUrl,
 				modelProvider: modelConfig.provider,
 				modelId: String(modelConfig.model),
-				hasEventSourceTools,
-				hasScheduleTools
+				hasWebhookSourceTools,
+				hasScheduleTools,
+				...activeGoalPromptInfo && { activeGoal: activeGoalPromptInfo }
 			}),
 			...modelConfig,
 			tools,
-			...streamFn && { streamFn }
+			...streamFn && { streamFn },
+			...onStepEnd && { onStepEnd }
 		});
-		await ctx.agent.run();
+		try {
+			await ctx.agent.run(void 0, budgetAbort.signal);
+		} catch (err) {
+			if (!budgetTripped) throw err;
+			serverLog.info(`[horton ${ctx.entityUrl}] agent.run aborted by budget enforcement`);
+		}
+		if (enforcedGoal) ctx.updateGoalUsage(runTokensUsed, budgetTripped ? { status: `budget_limited` } : void 0);
+		if (budgetTripped && enforcedGoal && enforcedGoal.tokenBudget !== null) {
+			const budget = enforcedGoal.tokenBudget;
+			const suggestedNext = Math.max(budget * 2, budget + 1e4);
+			writeSlashCommandReply(ctx, `⚠️ Stopped — goal hit the token budget (${formatTokenCount(runTokensUsed)} / ${formatTokenCount(budget)} tokens used). Raise the budget with \`/goal set "..." --tokens ${formatTokenCount(suggestedNext)}\`, or call \`/goal complete\` to finalize.`);
+		}
 		await titlePromise;
 	};
 }
@@ -1613,7 +1820,8 @@ function registerHorton(registry, options) {
 			subject_value: `user`,
 			permission: `manage`
 		}],
-		slashCommands: buildSkillSlashCommands(skillsRegistry),
+		state: { comments: commentsCollection },
+		slashCommands: [GOAL_SLASH_COMMAND, ...buildSkillSlashCommands(skillsRegistry)],
 		handler: assistantHandler
 	});
 	return [`horton`];
@@ -1797,6 +2005,7 @@ function registerWorker(registry, options) {
 			subject_value: `user`,
 			permission: `manage`
 		}],
+		state: { comments: commentsCollection },
 		async handler(ctx) {
 			const args = parseWorkerArgs(ctx.args);
 			const readSet = new Set();
@@ -1839,7 +2048,7 @@ function dedupeToolsByName(tools) {
 }
 function createBuiltinElectricTools(custom) {
 	return async (context) => {
-		const builtinTools = [...createEventSourceTools(context), ...createScheduleTools({
+		const builtinTools = [...createWebhookSourceTools(context), ...createScheduleTools({
 			...context,
 			db: context.db
 		})];
@@ -1848,7 +2057,7 @@ function createBuiltinElectricTools(custom) {
 	};
 }
 async function createBuiltinAgentHandler(options) {
-	const { agentServerUrl, serveEndpoint, workingDirectory, streamFn, enabledModelValues, createElectricTools, publicUrl, runtimeName, baseSkillsDir: baseSkillsDirOverride, serverHeaders, defaultDispatchPolicyForType } = options;
+	const { agentServerUrl, serveEndpoint, workingDirectory, streamFn, enabledModelValues, createElectricTools, publicUrl, runtimeName, baseSkillsDir: baseSkillsDirOverride, serverHeaders, defaultDispatchPolicyForType, dockerSandbox: dockerSandboxOpts } = options;
 	const modelCatalog = await createBuiltinModelCatalog({
 		allowMockFallback: Boolean(streamFn),
 		enabledModelValues
@@ -1884,7 +2093,7 @@ async function createBuiltinAgentHandler(options) {
 		modelCatalog
 	});
 	typeNames.push(`worker`);
-	const { profiles: sandboxProfiles, shutdownSandboxes } = await buildBuiltinSandboxProfiles(cwd);
+	const { profiles: sandboxProfiles, shutdownSandboxes } = await buildBuiltinSandboxProfiles(cwd, dockerSandboxOpts);
 	const runtime = createRuntimeHandler({
 		baseUrl: agentServerUrl,
 		serveEndpoint,
@@ -1904,7 +2113,8 @@ async function createBuiltinAgentHandler(options) {
 		registry,
 		typeNames,
 		skillsRegistry,
-		shutdownSandboxes
+		shutdownSandboxes,
+		modelCatalog
 	};
 }
 async function registerBuiltinAgentTypes(bootstrap) {
@@ -1923,6 +2133,21 @@ function sweepOrphanedDockerSandboxesOnce(sweep) {
 	return dockerBootSweep;
 }
 /**
+* Merge the profile's working-directory mount with embedder docker options
+* into the option fragment spread into `dockerSandbox()`. An internal helper:
+* exported from this module so the unit test can import it, but intentionally
+* not re-exported from `index.ts` (not part of the package's public API).
+*/
+function resolveDockerSandboxOpts(cwdMount, custom) {
+	const extraMounts = [...cwdMount ? [cwdMount] : [], ...custom?.extraMounts ?? []];
+	return {
+		...custom?.image !== void 0 && { image: custom.image },
+		...custom?.allowFloatingTag !== void 0 && { allowFloatingTag: custom.allowFloatingTag },
+		...custom?.env !== void 0 && { env: custom.env },
+		...extraMounts.length > 0 && { extraMounts }
+	};
+}
+/**
 * Built-in sandbox profiles. `local` is always available. `docker` is
 * gated on Docker being reachable so a user without Docker installed
 * sees only what works — the UI never offers a non-functional choice.
@@ -1932,7 +2157,7 @@ function sweepOrphanedDockerSandboxesOnce(sweep) {
 * server must run on shutdown (the providers' debounced idle teardowns die
 * with the process).
 */
-async function buildBuiltinSandboxProfiles(workingDirectory) {
+async function buildBuiltinSandboxProfiles(workingDirectory, dockerOpts) {
 	const profiles = [{
 		name: `local`,
 		label: `Local`,
@@ -1957,11 +2182,11 @@ async function buildBuiltinSandboxProfiles(workingDirectory) {
 						workingDirectory: `/work`,
 						factory: () => dockerSandbox({
 							initialNetworkPolicy: { mode: `allow-all` },
-							extraMounts: cwd ? [{
+							...resolveDockerSandboxOpts(cwd ? {
 								hostPath: cwd,
 								containerPath: `/work`,
 								readOnly: false
-							}] : void 0,
+							} : void 0, dockerOpts),
 							sandboxKey,
 							persistent,
 							owner,