npm - @electric-ax/agents - Versions diffs - 0.4.17 → 0.4.18 - Mend

@electric-ax/agents 0.4.17 → 0.4.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/entrypoint.js CHANGED Viewed

@@ -4,8 +4,8 @@ import { cacheStores, getGlobalDispatcher, interceptors, setGlobalDispatcher } f
 import fs from "node:fs";
 import pino from "pino";
 import { fileURLToPath } from "node:url";
-import { MOONSHOT_API_BASE_URL, MOONSHOT_PROVIDER, appendPathToUrl, buildSkillSlashCommands, completeWithLowCostModel, createContextSkillLoader, createEntityRegistry, createPullWakeRunner, createRuntimeHandler, createSkillsRegistry, db, detectAvailableProviders, getMoonshotApiKey, getMoonshotModel, getMoonshotModels, pgSync, readCodexAccessToken, registerToolProvider, unregisterToolProvider } from "@electric-ax/agents-runtime";
-import { braveSearchTool, createBashTool, createEditTool, createEventSourceTools, createFetchUrlTool, createReadFileTool, createScheduleTools, createSendTool, createWriteTool } from "@electric-ax/agents-runtime/tools";
+import { GOAL_SLASH_COMMAND, MOONSHOT_API_BASE_URL, MOONSHOT_PROVIDER, appendPathToUrl, buildSkillSlashCommands, commentsCollection, completeWithLowCostModel, createContextSkillLoader, createEntityRegistry, createPullWakeRunner, createRuntimeHandler, createSkillsRegistry, db, detectAvailableProviders, dispatchGoalCommand, formatTokenCount, getMoonshotApiKey, getMoonshotModel, getMoonshotModels, isGoalCommandText, parseGoalCommand, pgSync, readCodexAccessToken, registerToolProvider, unregisterToolProvider } from "@electric-ax/agents-runtime";
+import { braveSearchTool, createBashTool, createEditTool, createEventSourceTools, createFetchUrlTool, createMarkGoalCompleteTool, createReadFileTool, createScheduleTools, createSendTool, createWriteTool } from "@electric-ax/agents-runtime/tools";
 import { chooseDefaultSandbox, isE2BAvailable, lazySandbox, remoteSandbox } from "@electric-ax/agents-runtime/sandbox";
 import { z } from "zod";
 import { createHash } from "node:crypto";
@@ -1087,25 +1087,66 @@ function filterChoicesByEnabledModels(choices, values) {
 	const filtered = choices.filter((choice) => enabled.has(choice.value));
 	return filtered.length > 0 ? filtered : choices;
 }
+/**
+* Anthropic-specific budget mapping for `reasoningEffort`.
+*
+* Anthropic's `thinking.budget_tokens` is a hard cap on tokens spent
+* inside the thinking block before the model must commit to its
+* answer. Docs require ≥ 1024; we scale from there. Numbers tuned so
+* `medium` is the spot most "show your work" requests land, and
+* `high` covers tougher reasoning without uncapped spend.
+*
+* Keep in sync with provider doc updates — Anthropic has shifted the
+* minimum once already (older models capped lower).
+*/
+const ANTHROPIC_THINKING_BUDGET_BY_EFFORT = {
+	minimal: 1024,
+	low: 2048,
+	medium: 8192,
+	high: 24576
+};
 function withProviderPayloadDefaults(config, choice, reasoningEffort) {
-	if (choice.provider !== `openai` && choice.provider !== `openai-codex` || !choice.reasoning) return config;
-	const defaultEffort = choice.provider === `openai-codex` ? `low` : `minimal`;
-	const effort = reasoningEffort === `minimal` && choice.provider === `openai-codex` ? `low` : reasoningEffort ?? defaultEffort;
-	return {
-		...config,
-		onPayload: (payload) => {
-			if (typeof payload !== `object` || payload === null) return void 0;
-			const body = payload;
-			const existingReasoning = typeof body.reasoning === `object` && body.reasoning !== null ? body.reasoning : {};
-			return {
-				...body,
-				reasoning: {
-					...existingReasoning,
-					effort
-				}
-			};
-		}
-	};
+	if (!choice.reasoning) return config;
+	if (choice.provider === `openai` || choice.provider === `openai-codex`) {
+		const defaultEffort = choice.provider === `openai-codex` ? `low` : `minimal`;
+		const effort = reasoningEffort === `minimal` && choice.provider === `openai-codex` ? `low` : reasoningEffort ?? defaultEffort;
+		return {
+			...config,
+			onPayload: (payload) => {
+				if (typeof payload !== `object` || payload === null) return void 0;
+				const body = payload;
+				const existingReasoning = typeof body.reasoning === `object` && body.reasoning !== null ? body.reasoning : {};
+				return {
+					...body,
+					reasoning: {
+						...existingReasoning,
+						effort
+					}
+				};
+			}
+		};
+	}
+	if (choice.provider === `anthropic`) {
+		const effectiveEffort = reasoningEffort ?? `minimal`;
+		const budgetTokens = ANTHROPIC_THINKING_BUDGET_BY_EFFORT[effectiveEffort];
+		return {
+			...config,
+			onPayload: (payload) => {
+				if (typeof payload !== `object` || payload === null) return void 0;
+				const body = payload;
+				const existingThinking = typeof body.thinking === `object` && body.thinking !== null ? body.thinking : {};
+				return {
+					...body,
+					thinking: {
+						...existingThinking,
+						type: `enabled`,
+						budget_tokens: budgetTokens
+					}
+				};
+			}
+		};
+	}
+	return config;
 }
 function parseReasoningEffort(value) {
 	return value === `minimal` || value === `low` || value === `medium` || value === `high` ? value : null;
@@ -1369,7 +1410,18 @@ Workflow when forking yourself for parallel exploration:
 Report outcomes faithfully. If a command failed, say so with the relevant output. If you didn't run a verification step, say that rather than implying you did. Don't hedge confirmed results with unnecessary disclaimers.
 Working directory: ${workingDirectory}
-The current year is ${new Date().getFullYear()}.`;
+The current year is ${new Date().getFullYear()}.${buildGoalGuidance(opts.activeGoal)}`;
+}
+function buildGoalGuidance(goal) {
+	if (!goal) return ``;
+	const budgetLine = goal.tokenBudget === null ? `unlimited` : `${goal.tokensUsed} / ${goal.tokenBudget} tokens used`;
+	return `
+# Active goal
+- Objective: ${goal.objective}
+- Token budget: ${budgetLine}
+The user set this goal with /goal set. Work autonomously toward it: do NOT ask the user clarifying questions or pause for confirmation — make reasonable assumptions and proceed. When you believe the goal is met, call the \`mark_goal_complete\` tool. If you hit a blocker that genuinely requires the user (e.g. credentials, a destructive action), call \`mark_goal_complete\` with a summary explaining what's needed. The runtime will abort this run automatically if you exceed the token budget.`;
 }
 function getToolName(tool) {
 	if (typeof tool !== `object` || tool === null) return null;
@@ -1394,6 +1446,7 @@ function createHortonTools(sandbox, ctx, readSet, opts = {}) {
 		createObservePgSyncTool(ctx),
 		createSetTitleTool(ctx),
 		createSendTool(ctx.send, { selfEntityUrl: ctx.entityUrl }),
+		...ctx.getGoal()?.status === `active` ? [createMarkGoalCompleteTool(ctx)] : [],
 		...opts.docsSearchTool ? [opts.docsSearchTool] : []
 	];
 }
@@ -1462,11 +1515,58 @@ async function readAgentsMd(sandbox) {
 		return null;
 	}
 }
+function extractWakeText(wake) {
+	if (wake.type !== `inbox`) return null;
+	const payload = wake.payload;
+	if (typeof payload === `string`) return payload;
+	if (payload && typeof payload === `object`) {
+		const record = payload;
+		if (typeof record.text === `string`) return record.text;
+		if (typeof record.source === `string`) return record.source;
+	}
+	return null;
+}
+async function tryHandleSlashCommand(ctx, wake) {
+	const text = extractWakeText(wake);
+	if (text === null) return false;
+	if (isGoalCommandText(text)) {
+		const command = parseGoalCommand(text);
+		const result = dispatchGoalCommand(ctx, command);
+		if (result.message) {
+			serverLog.info(`[horton ${ctx.entityUrl}] ${result.message}`);
+			writeSlashCommandReply(ctx, result.message);
+		}
+		if (command.kind === `set`) await kickoffGoalRun(ctx);
+		return result.handled;
+	}
+	return false;
+}
+const GOAL_KICKOFF_TEXT = `Start working toward the active goal now. Call \`mark_goal_complete\` when you believe it is done.`;
+async function kickoffGoalRun(ctx) {
+	const goal = ctx.getGoal();
+	if (!goal || goal.status !== `active`) return;
+	try {
+		await ctx.send(ctx.entityUrl, {
+			kind: `goal_kickoff`,
+			text: GOAL_KICKOFF_TEXT
+		}, { type: `inbox` });
+	} catch (err) {
+		serverLog.warn(`[horton ${ctx.entityUrl}] failed to enqueue goal kickoff: ${err instanceof Error ? err.message : String(err)}`);
+	}
+}
+function writeSlashCommandReply(ctx, text) {
+	try {
+		ctx.replyText(text);
+	} catch (err) {
+		serverLog.warn(`[horton ${ctx.entityUrl}] failed to render slash command reply: ${err instanceof Error ? err.message : String(err)}`);
+	}
+}
 function createAssistantHandler(options) {
 	const { streamFn, docsSupport, docsSearchTool, skillsRegistry, modelCatalog, docsUrl } = options;
 	const skillLoader = createContextSkillLoader(skillsRegistry, { slashCommandOwner: HORTON_SKILLS_SLASH_COMMAND_OWNER });
 	const hasSkills = skillLoader.hasSkills;
 	return async function assistantHandler(ctx, wake) {
+		if (await tryHandleSlashCommand(ctx, wake)) return;
 		const loadedSkills = await skillLoader.load(ctx);
 		const readSet = new Set();
 		const modelConfig = resolveBuiltinModelConfig(modelCatalog, ctx.args);
@@ -1559,6 +1659,26 @@ function createAssistantHandler(options) {
 				}
 			}
 		});
+		const goal = ctx.getGoal();
+		const enforcedGoal = goal && goal.status === `active` ? goal : void 0;
+		const activeGoalPromptInfo = enforcedGoal ? {
+			objective: enforcedGoal.objective,
+			tokenBudget: enforcedGoal.tokenBudget,
+			tokensUsed: enforcedGoal.tokensUsed
+		} : void 0;
+		const budgetAbort = new AbortController();
+		let runTokensUsed = enforcedGoal?.tokensUsed ?? 0;
+		let budgetTripped = false;
+		const onStepEnd = enforcedGoal ? (stats) => {
+			if (budgetTripped) return;
+			runTokensUsed += stats.uncachedInput + stats.output;
+			ctx.updateGoalUsage(runTokensUsed);
+			if (enforcedGoal.tokenBudget !== null && runTokensUsed >= enforcedGoal.tokenBudget) {
+				budgetTripped = true;
+				serverLog.info(`[horton ${ctx.entityUrl}] goal budget exhausted (${runTokensUsed} tokens) — aborting run`);
+				budgetAbort.abort();
+			}
+		} : void 0;
 		ctx.useAgent({
 			systemPrompt: buildHortonSystemPrompt(sandboxCwd, {
 				hasDocsSupport: Boolean(docsSupport),
@@ -1567,13 +1687,26 @@ function createAssistantHandler(options) {
 				modelProvider: modelConfig.provider,
 				modelId: String(modelConfig.model),
 				hasEventSourceTools,
-				hasScheduleTools
+				hasScheduleTools,
+				...activeGoalPromptInfo && { activeGoal: activeGoalPromptInfo }
 			}),
 			...modelConfig,
 			tools,
-			...streamFn && { streamFn }
+			...streamFn && { streamFn },
+			...onStepEnd && { onStepEnd }
 		});
-		await ctx.agent.run();
+		try {
+			await ctx.agent.run(void 0, budgetAbort.signal);
+		} catch (err) {
+			if (!budgetTripped) throw err;
+			serverLog.info(`[horton ${ctx.entityUrl}] agent.run aborted by budget enforcement`);
+		}
+		if (enforcedGoal) ctx.updateGoalUsage(runTokensUsed, budgetTripped ? { status: `budget_limited` } : void 0);
+		if (budgetTripped && enforcedGoal && enforcedGoal.tokenBudget !== null) {
+			const budget = enforcedGoal.tokenBudget;
+			const suggestedNext = Math.max(budget * 2, budget + 1e4);
+			writeSlashCommandReply(ctx, `⚠️ Stopped — goal hit the token budget (${formatTokenCount(runTokensUsed)} / ${formatTokenCount(budget)} tokens used). Raise the budget with \`/goal set "..." --tokens ${formatTokenCount(suggestedNext)}\`, or call \`/goal complete\` to finalize.`);
+		}
 		await titlePromise;
 	};
 }
@@ -1613,7 +1746,8 @@ function registerHorton(registry, options) {
 			subject_value: `user`,
 			permission: `manage`
 		}],
-		slashCommands: buildSkillSlashCommands(skillsRegistry),
+		state: { comments: commentsCollection },
+		slashCommands: [GOAL_SLASH_COMMAND, ...buildSkillSlashCommands(skillsRegistry)],
 		handler: assistantHandler
 	});
 	return [`horton`];
@@ -1797,6 +1931,7 @@ function registerWorker(registry, options) {
 			subject_value: `user`,
 			permission: `manage`
 		}],
+		state: { comments: commentsCollection },
 		async handler(ctx) {
 			const args = parseWorkerArgs(ctx.args);
 			const readSet = new Set();
@@ -1848,7 +1983,7 @@ function createBuiltinElectricTools(custom) {
 	};
 }
 async function createBuiltinAgentHandler(options) {
-	const { agentServerUrl, serveEndpoint, workingDirectory, streamFn, enabledModelValues, createElectricTools, publicUrl, runtimeName, baseSkillsDir: baseSkillsDirOverride, serverHeaders, defaultDispatchPolicyForType } = options;
+	const { agentServerUrl, serveEndpoint, workingDirectory, streamFn, enabledModelValues, createElectricTools, publicUrl, runtimeName, baseSkillsDir: baseSkillsDirOverride, serverHeaders, defaultDispatchPolicyForType, dockerSandbox: dockerSandboxOpts } = options;
 	const modelCatalog = await createBuiltinModelCatalog({
 		allowMockFallback: Boolean(streamFn),
 		enabledModelValues
@@ -1884,7 +2019,7 @@ async function createBuiltinAgentHandler(options) {
 		modelCatalog
 	});
 	typeNames.push(`worker`);
-	const { profiles: sandboxProfiles, shutdownSandboxes } = await buildBuiltinSandboxProfiles(cwd);
+	const { profiles: sandboxProfiles, shutdownSandboxes } = await buildBuiltinSandboxProfiles(cwd, dockerSandboxOpts);
 	const runtime = createRuntimeHandler({
 		baseUrl: agentServerUrl,
 		serveEndpoint,
@@ -1904,7 +2039,8 @@ async function createBuiltinAgentHandler(options) {
 		registry,
 		typeNames,
 		skillsRegistry,
-		shutdownSandboxes
+		shutdownSandboxes,
+		modelCatalog
 	};
 }
 async function registerBuiltinAgentTypes(bootstrap) {
@@ -1923,6 +2059,21 @@ function sweepOrphanedDockerSandboxesOnce(sweep) {
 	return dockerBootSweep;
 }
 /**
+* Merge the profile's working-directory mount with embedder docker options
+* into the option fragment spread into `dockerSandbox()`. An internal helper:
+* exported from this module so the unit test can import it, but intentionally
+* not re-exported from `index.ts` (not part of the package's public API).
+*/
+function resolveDockerSandboxOpts(cwdMount, custom) {
+	const extraMounts = [...cwdMount ? [cwdMount] : [], ...custom?.extraMounts ?? []];
+	return {
+		...custom?.image !== void 0 && { image: custom.image },
+		...custom?.allowFloatingTag !== void 0 && { allowFloatingTag: custom.allowFloatingTag },
+		...custom?.env !== void 0 && { env: custom.env },
+		...extraMounts.length > 0 && { extraMounts }
+	};
+}
+/**
 * Built-in sandbox profiles. `local` is always available. `docker` is
 * gated on Docker being reachable so a user without Docker installed
 * sees only what works — the UI never offers a non-functional choice.
@@ -1932,7 +2083,7 @@ function sweepOrphanedDockerSandboxesOnce(sweep) {
 * server must run on shutdown (the providers' debounced idle teardowns die
 * with the process).
 */
-async function buildBuiltinSandboxProfiles(workingDirectory) {
+async function buildBuiltinSandboxProfiles(workingDirectory, dockerOpts) {
 	const profiles = [{
 		name: `local`,
 		label: `Local`,
@@ -1957,11 +2108,11 @@ async function buildBuiltinSandboxProfiles(workingDirectory) {
 						workingDirectory: `/work`,
 						factory: () => dockerSandbox({
 							initialNetworkPolicy: { mode: `allow-all` },
-							extraMounts: cwd ? [{
+							...resolveDockerSandboxOpts(cwd ? {
 								hostPath: cwd,
 								containerPath: `/work`,
 								readOnly: false
-							}] : void 0,
+							} : void 0, dockerOpts),
 							sandboxKey,
 							persistent,
 							owner,

package/dist/index.cjs CHANGED Viewed

@@ -1093,25 +1093,66 @@ function filterChoicesByEnabledModels(choices, values) {
 	const filtered = choices.filter((choice) => enabled.has(choice.value));
 	return filtered.length > 0 ? filtered : choices;
 }
+/**
+* Anthropic-specific budget mapping for `reasoningEffort`.
+*
+* Anthropic's `thinking.budget_tokens` is a hard cap on tokens spent
+* inside the thinking block before the model must commit to its
+* answer. Docs require ≥ 1024; we scale from there. Numbers tuned so
+* `medium` is the spot most "show your work" requests land, and
+* `high` covers tougher reasoning without uncapped spend.
+*
+* Keep in sync with provider doc updates — Anthropic has shifted the
+* minimum once already (older models capped lower).
+*/
+const ANTHROPIC_THINKING_BUDGET_BY_EFFORT = {
+	minimal: 1024,
+	low: 2048,
+	medium: 8192,
+	high: 24576
+};
 function withProviderPayloadDefaults(config, choice, reasoningEffort) {
-	if (choice.provider !== `openai` && choice.provider !== `openai-codex` || !choice.reasoning) return config;
-	const defaultEffort = choice.provider === `openai-codex` ? `low` : `minimal`;
-	const effort = reasoningEffort === `minimal` && choice.provider === `openai-codex` ? `low` : reasoningEffort ?? defaultEffort;
-	return {
-		...config,
-		onPayload: (payload) => {
-			if (typeof payload !== `object` || payload === null) return void 0;
-			const body = payload;
-			const existingReasoning = typeof body.reasoning === `object` && body.reasoning !== null ? body.reasoning : {};
-			return {
-				...body,
-				reasoning: {
-					...existingReasoning,
-					effort
-				}
-			};
-		}
-	};
+	if (!choice.reasoning) return config;
+	if (choice.provider === `openai` || choice.provider === `openai-codex`) {
+		const defaultEffort = choice.provider === `openai-codex` ? `low` : `minimal`;
+		const effort = reasoningEffort === `minimal` && choice.provider === `openai-codex` ? `low` : reasoningEffort ?? defaultEffort;
+		return {
+			...config,
+			onPayload: (payload) => {
+				if (typeof payload !== `object` || payload === null) return void 0;
+				const body = payload;
+				const existingReasoning = typeof body.reasoning === `object` && body.reasoning !== null ? body.reasoning : {};
+				return {
+					...body,
+					reasoning: {
+						...existingReasoning,
+						effort
+					}
+				};
+			}
+		};
+	}
+	if (choice.provider === `anthropic`) {
+		const effectiveEffort = reasoningEffort ?? `minimal`;
+		const budgetTokens = ANTHROPIC_THINKING_BUDGET_BY_EFFORT[effectiveEffort];
+		return {
+			...config,
+			onPayload: (payload) => {
+				if (typeof payload !== `object` || payload === null) return void 0;
+				const body = payload;
+				const existingThinking = typeof body.thinking === `object` && body.thinking !== null ? body.thinking : {};
+				return {
+					...body,
+					thinking: {
+						...existingThinking,
+						type: `enabled`,
+						budget_tokens: budgetTokens
+					}
+				};
+			}
+		};
+	}
+	return config;
 }
 function parseReasoningEffort(value) {
 	return value === `minimal` || value === `low` || value === `medium` || value === `high` ? value : null;
@@ -1376,7 +1417,18 @@ Workflow when forking yourself for parallel exploration:
 Report outcomes faithfully. If a command failed, say so with the relevant output. If you didn't run a verification step, say that rather than implying you did. Don't hedge confirmed results with unnecessary disclaimers.
 Working directory: ${workingDirectory}
-The current year is ${new Date().getFullYear()}.`;
+The current year is ${new Date().getFullYear()}.${buildGoalGuidance(opts.activeGoal)}`;
+}
+function buildGoalGuidance(goal) {
+	if (!goal) return ``;
+	const budgetLine = goal.tokenBudget === null ? `unlimited` : `${goal.tokensUsed} / ${goal.tokenBudget} tokens used`;
+	return `
+# Active goal
+- Objective: ${goal.objective}
+- Token budget: ${budgetLine}
+The user set this goal with /goal set. Work autonomously toward it: do NOT ask the user clarifying questions or pause for confirmation — make reasonable assumptions and proceed. When you believe the goal is met, call the \`mark_goal_complete\` tool. If you hit a blocker that genuinely requires the user (e.g. credentials, a destructive action), call \`mark_goal_complete\` with a summary explaining what's needed. The runtime will abort this run automatically if you exceed the token budget.`;
 }
 function getToolName(tool) {
 	if (typeof tool !== `object` || tool === null) return null;
@@ -1401,6 +1453,7 @@ function createHortonTools(sandbox, ctx, readSet, opts = {}) {
 		createObservePgSyncTool(ctx),
 		createSetTitleTool(ctx),
 		(0, __electric_ax_agents_runtime_tools.createSendTool)(ctx.send, { selfEntityUrl: ctx.entityUrl }),
+		...ctx.getGoal()?.status === `active` ? [(0, __electric_ax_agents_runtime_tools.createMarkGoalCompleteTool)(ctx)] : [],
 		...opts.docsSearchTool ? [opts.docsSearchTool] : []
 	];
 }
@@ -1469,11 +1522,58 @@ async function readAgentsMd(sandbox) {
 		return null;
 	}
 }
+function extractWakeText(wake) {
+	if (wake.type !== `inbox`) return null;
+	const payload = wake.payload;
+	if (typeof payload === `string`) return payload;
+	if (payload && typeof payload === `object`) {
+		const record = payload;
+		if (typeof record.text === `string`) return record.text;
+		if (typeof record.source === `string`) return record.source;
+	}
+	return null;
+}
+async function tryHandleSlashCommand(ctx, wake) {
+	const text = extractWakeText(wake);
+	if (text === null) return false;
+	if ((0, __electric_ax_agents_runtime.isGoalCommandText)(text)) {
+		const command = (0, __electric_ax_agents_runtime.parseGoalCommand)(text);
+		const result = (0, __electric_ax_agents_runtime.dispatchGoalCommand)(ctx, command);
+		if (result.message) {
+			serverLog.info(`[horton ${ctx.entityUrl}] ${result.message}`);
+			writeSlashCommandReply(ctx, result.message);
+		}
+		if (command.kind === `set`) await kickoffGoalRun(ctx);
+		return result.handled;
+	}
+	return false;
+}
+const GOAL_KICKOFF_TEXT = `Start working toward the active goal now. Call \`mark_goal_complete\` when you believe it is done.`;
+async function kickoffGoalRun(ctx) {
+	const goal = ctx.getGoal();
+	if (!goal || goal.status !== `active`) return;
+	try {
+		await ctx.send(ctx.entityUrl, {
+			kind: `goal_kickoff`,
+			text: GOAL_KICKOFF_TEXT
+		}, { type: `inbox` });
+	} catch (err) {
+		serverLog.warn(`[horton ${ctx.entityUrl}] failed to enqueue goal kickoff: ${err instanceof Error ? err.message : String(err)}`);
+	}
+}
+function writeSlashCommandReply(ctx, text) {
+	try {
+		ctx.replyText(text);
+	} catch (err) {
+		serverLog.warn(`[horton ${ctx.entityUrl}] failed to render slash command reply: ${err instanceof Error ? err.message : String(err)}`);
+	}
+}
 function createAssistantHandler(options) {
 	const { streamFn, docsSupport, docsSearchTool, skillsRegistry, modelCatalog, docsUrl } = options;
 	const skillLoader = (0, __electric_ax_agents_runtime.createContextSkillLoader)(skillsRegistry, { slashCommandOwner: HORTON_SKILLS_SLASH_COMMAND_OWNER });
 	const hasSkills = skillLoader.hasSkills;
 	return async function assistantHandler(ctx, wake) {
+		if (await tryHandleSlashCommand(ctx, wake)) return;
 		const loadedSkills = await skillLoader.load(ctx);
 		const readSet = new Set();
 		const modelConfig = resolveBuiltinModelConfig(modelCatalog, ctx.args);
@@ -1566,6 +1666,26 @@ function createAssistantHandler(options) {
 				}
 			}
 		});
+		const goal = ctx.getGoal();
+		const enforcedGoal = goal && goal.status === `active` ? goal : void 0;
+		const activeGoalPromptInfo = enforcedGoal ? {
+			objective: enforcedGoal.objective,
+			tokenBudget: enforcedGoal.tokenBudget,
+			tokensUsed: enforcedGoal.tokensUsed
+		} : void 0;
+		const budgetAbort = new AbortController();
+		let runTokensUsed = enforcedGoal?.tokensUsed ?? 0;
+		let budgetTripped = false;
+		const onStepEnd = enforcedGoal ? (stats) => {
+			if (budgetTripped) return;
+			runTokensUsed += stats.uncachedInput + stats.output;
+			ctx.updateGoalUsage(runTokensUsed);
+			if (enforcedGoal.tokenBudget !== null && runTokensUsed >= enforcedGoal.tokenBudget) {
+				budgetTripped = true;
+				serverLog.info(`[horton ${ctx.entityUrl}] goal budget exhausted (${runTokensUsed} tokens) — aborting run`);
+				budgetAbort.abort();
+			}
+		} : void 0;
 		ctx.useAgent({
 			systemPrompt: buildHortonSystemPrompt(sandboxCwd, {
 				hasDocsSupport: Boolean(docsSupport),
@@ -1574,13 +1694,26 @@ function createAssistantHandler(options) {
 				modelProvider: modelConfig.provider,
 				modelId: String(modelConfig.model),
 				hasEventSourceTools,
-				hasScheduleTools
+				hasScheduleTools,
+				...activeGoalPromptInfo && { activeGoal: activeGoalPromptInfo }
 			}),
 			...modelConfig,
 			tools,
-			...streamFn && { streamFn }
+			...streamFn && { streamFn },
+			...onStepEnd && { onStepEnd }
 		});
-		await ctx.agent.run();
+		try {
+			await ctx.agent.run(void 0, budgetAbort.signal);
+		} catch (err) {
+			if (!budgetTripped) throw err;
+			serverLog.info(`[horton ${ctx.entityUrl}] agent.run aborted by budget enforcement`);
+		}
+		if (enforcedGoal) ctx.updateGoalUsage(runTokensUsed, budgetTripped ? { status: `budget_limited` } : void 0);
+		if (budgetTripped && enforcedGoal && enforcedGoal.tokenBudget !== null) {
+			const budget = enforcedGoal.tokenBudget;
+			const suggestedNext = Math.max(budget * 2, budget + 1e4);
+			writeSlashCommandReply(ctx, `⚠️ Stopped — goal hit the token budget (${(0, __electric_ax_agents_runtime.formatTokenCount)(runTokensUsed)} / ${(0, __electric_ax_agents_runtime.formatTokenCount)(budget)} tokens used). Raise the budget with \`/goal set "..." --tokens ${(0, __electric_ax_agents_runtime.formatTokenCount)(suggestedNext)}\`, or call \`/goal complete\` to finalize.`);
+		}
 		await titlePromise;
 	};
 }
@@ -1620,7 +1753,8 @@ function registerHorton(registry, options) {
 			subject_value: `user`,
 			permission: `manage`
 		}],
-		slashCommands: (0, __electric_ax_agents_runtime.buildSkillSlashCommands)(skillsRegistry),
+		state: { comments: __electric_ax_agents_runtime.commentsCollection },
+		slashCommands: [__electric_ax_agents_runtime.GOAL_SLASH_COMMAND, ...(0, __electric_ax_agents_runtime.buildSkillSlashCommands)(skillsRegistry)],
 		handler: assistantHandler
 	});
 	return [`horton`];
@@ -1804,6 +1938,7 @@ function registerWorker(registry, options) {
 			subject_value: `user`,
 			permission: `manage`
 		}],
+		state: { comments: __electric_ax_agents_runtime.commentsCollection },
 		async handler(ctx) {
 			const args = parseWorkerArgs(ctx.args);
 			const readSet = new Set();
@@ -1856,7 +1991,7 @@ function createBuiltinElectricTools(custom) {
 	};
 }
 async function createBuiltinAgentHandler(options) {
-	const { agentServerUrl, serveEndpoint, workingDirectory, streamFn, enabledModelValues, createElectricTools, publicUrl, runtimeName, baseSkillsDir: baseSkillsDirOverride, serverHeaders, defaultDispatchPolicyForType } = options;
+	const { agentServerUrl, serveEndpoint, workingDirectory, streamFn, enabledModelValues, createElectricTools, publicUrl, runtimeName, baseSkillsDir: baseSkillsDirOverride, serverHeaders, defaultDispatchPolicyForType, dockerSandbox: dockerSandboxOpts } = options;
 	const modelCatalog = await createBuiltinModelCatalog({
 		allowMockFallback: Boolean(streamFn),
 		enabledModelValues
@@ -1892,7 +2027,7 @@ async function createBuiltinAgentHandler(options) {
 		modelCatalog
 	});
 	typeNames.push(`worker`);
-	const { profiles: sandboxProfiles, shutdownSandboxes } = await buildBuiltinSandboxProfiles(cwd);
+	const { profiles: sandboxProfiles, shutdownSandboxes } = await buildBuiltinSandboxProfiles(cwd, dockerSandboxOpts);
 	const runtime = (0, __electric_ax_agents_runtime.createRuntimeHandler)({
 		baseUrl: agentServerUrl,
 		serveEndpoint,
@@ -1912,7 +2047,8 @@ async function createBuiltinAgentHandler(options) {
 		registry,
 		typeNames,
 		skillsRegistry,
-		shutdownSandboxes
+		shutdownSandboxes,
+		modelCatalog
 	};
 }
 async function createAgentHandler(agentServerUrl, workingDirectory, streamFn, createElectricTools, serveEndpoint) {
@@ -1941,6 +2077,21 @@ function sweepOrphanedDockerSandboxesOnce(sweep) {
 	return dockerBootSweep;
 }
 /**
+* Merge the profile's working-directory mount with embedder docker options
+* into the option fragment spread into `dockerSandbox()`. An internal helper:
+* exported from this module so the unit test can import it, but intentionally
+* not re-exported from `index.ts` (not part of the package's public API).
+*/
+function resolveDockerSandboxOpts(cwdMount, custom) {
+	const extraMounts = [...cwdMount ? [cwdMount] : [], ...custom?.extraMounts ?? []];
+	return {
+		...custom?.image !== void 0 && { image: custom.image },
+		...custom?.allowFloatingTag !== void 0 && { allowFloatingTag: custom.allowFloatingTag },
+		...custom?.env !== void 0 && { env: custom.env },
+		...extraMounts.length > 0 && { extraMounts }
+	};
+}
+/**
 * Built-in sandbox profiles. `local` is always available. `docker` is
 * gated on Docker being reachable so a user without Docker installed
 * sees only what works — the UI never offers a non-functional choice.
@@ -1950,7 +2101,7 @@ function sweepOrphanedDockerSandboxesOnce(sweep) {
 * server must run on shutdown (the providers' debounced idle teardowns die
 * with the process).
 */
-async function buildBuiltinSandboxProfiles(workingDirectory) {
+async function buildBuiltinSandboxProfiles(workingDirectory, dockerOpts) {
 	const profiles = [{
 		name: `local`,
 		label: `Local`,
@@ -1975,11 +2126,11 @@ async function buildBuiltinSandboxProfiles(workingDirectory) {
 						workingDirectory: `/work`,
 						factory: () => dockerSandbox({
 							initialNetworkPolicy: { mode: `allow-all` },
-							extraMounts: cwd ? [{
+							...resolveDockerSandboxOpts(cwd ? {
 								hostPath: cwd,
 								containerPath: `/work`,
 								readOnly: false
-							}] : void 0,
+							} : void 0, dockerOpts),
 							sandboxKey,
 							persistent,
 							owner,
@@ -2391,4 +2542,5 @@ exports.registerBuiltinAgentTypes = registerBuiltinAgentTypes
 exports.registerHorton = registerHorton
 exports.registerWorker = registerWorker
 exports.resolveBuiltinAgentsEntrypointOptions = resolveBuiltinAgentsEntrypointOptions
+exports.resolveBuiltinModelConfig = resolveBuiltinModelConfig
 exports.runBuiltinAgentsEntrypoint = runBuiltinAgentsEntrypoint

package/dist/index.d.cts CHANGED Viewed

@@ -6,6 +6,36 @@ import { Sandbox } from "@electric-ax/agents-runtime/sandbox";
 import { ChangeEvent } from "@durable-streams/state";
 import { braveSearchTool } from "@electric-ax/agents-runtime/tools";
+//#region src/model-catalog.d.ts
+type BuiltinModelProvider = AvailableProvider;
+type BuiltinModelInput = `text` | `image`;
+interface BuiltinModelChoice {
+  provider: BuiltinModelProvider;
+  id: string;
+  label: string;
+  value: string;
+  reasoning: boolean;
+  input: Array<BuiltinModelInput>;
+}
+interface BuiltinModelCatalog {
+  choices: Array<BuiltinModelChoice>;
+  defaultChoice: BuiltinModelChoice;
+}
+interface BuiltinModelCatalogOptions {
+  allowMockFallback?: boolean;
+  enabledModelValues?: ReadonlyArray<string> | null;
+}
+declare const REASONING_EFFORT_VALUES: readonly ["auto", "minimal", "low", "medium", "high"];
+type BuiltinReasoningEffort = (typeof REASONING_EFFORT_VALUES)[number];
+type ExplicitReasoningEffort = Exclude<BuiltinReasoningEffort, `auto`>;
+type BuiltinAgentModelConfig = Pick<AgentConfig, `model` | `provider` | `onPayload` | `getApiKey`> & {
+  reasoningEffort?: ExplicitReasoningEffort;
+};
+declare function builtinModelProviderLabel(provider: BuiltinModelProvider): string;
+declare function listBuiltinModelChoices(providers: ReadonlyArray<BuiltinModelProvider>): Array<BuiltinModelChoice>;
+declare function resolveBuiltinModelConfig(catalog: BuiltinModelCatalog, args: Readonly<Record<string, unknown>>): BuiltinAgentModelConfig;
+//#endregion
 //#region src/bootstrap.d.ts
 declare const DEFAULT_BUILTIN_AGENT_HANDLER_PATH = "/_electric/builtin-agent-handler";
 interface AgentHandlerResult {
@@ -21,8 +51,38 @@ interface AgentHandlerResult {
    * die with the process, which would leave containers running.
    */
   shutdownSandboxes: (() => Promise<void>) | null;
+  /**
+   * Model catalog the built-in agents resolve `model` args against — lets
+   * embedders register sibling agent types with the same model resolution.
+   */
+  modelCatalog: BuiltinModelCatalog;
 }
 type BuiltinElectricToolsFactory = NonNullable<ProcessWakeConfig[`createElectricTools`]>;
+/** Mount spec mirroring `DockerSandboxOpts['extraMounts']` items. */
+interface BuiltinDockerSandboxMount {
+  hostPath: string;
+  containerPath: string;
+  readOnly?: boolean;
+}
+/**
+ * Embedder customization for the built-in `docker` sandbox profile.
+ * Threads straight into `dockerSandbox()` (which already supports these);
+ * custom `extraMounts` are appended after the working-directory mount.
+ * These are embedder/operator-trust inputs: `extraMounts` is subject to the
+ * runtime's docker-socket guard, and `env` is passed verbatim into the
+ * container.
+ *
+ * Note: custom `extraMounts` must not target the working-directory container
+ * path (`/work`) — it collides with the cwd mount and fails at container-create
+ * time with an opaque docker error.
+ */
+interface BuiltinDockerSandboxOptions {
+  /** Digest-pinned image unless `allowFloatingTag` is set. */
+  image?: string;
+  allowFloatingTag?: boolean;
+  env?: Record<string, string>;
+  extraMounts?: Array<BuiltinDockerSandboxMount>;
+}
 interface BuiltinAgentHandlerOptions {
   agentServerUrl: string;
   serveEndpoint?: string;
@@ -36,6 +96,8 @@ interface BuiltinAgentHandlerOptions {
   serverHeaders?: HeadersProvider;
   defaultDispatchPolicyForType?: (typeName: string) => DispatchPolicy | undefined;
   createElectricTools?: BuiltinElectricToolsFactory;
+  /** Customize the built-in `docker` sandbox profile (image, env, mounts). */
+  dockerSandbox?: BuiltinDockerSandboxOptions;
 }
 declare function createBuiltinElectricTools(custom?: BuiltinElectricToolsFactory): BuiltinElectricToolsFactory;
 declare function createBuiltinAgentHandler(options: BuiltinAgentHandlerOptions): Promise<AgentHandlerResult | null>;
@@ -45,6 +107,12 @@ declare const registerAgentTypes: typeof registerBuiltinAgentTypes;
 //#endregion
 //#region src/durable-streams-cache.d.ts
+/**
+ * Merge the profile's working-directory mount with embedder docker options
+ * into the option fragment spread into `dockerSandbox()`. An internal helper:
+ * exported from this module so the unit test can import it, but intentionally
+ * not re-exported from `index.ts` (not part of the package's public API).
+ */
 type DurableStreamsFetchCacheOptions = false | {
   store?: `memory` | `sqlite`;
   sqliteLocation?: string;
@@ -160,40 +228,15 @@ declare function runBuiltinAgentsEntrypoint({
   url: string;
 }>;
-//#endregion
-//#region src/model-catalog.d.ts
-type BuiltinModelProvider = AvailableProvider;
-type BuiltinModelInput = `text` | `image`;
-interface BuiltinModelChoice {
-  provider: BuiltinModelProvider;
-  id: string;
-  label: string;
-  value: string;
-  reasoning: boolean;
-  input: Array<BuiltinModelInput>;
-}
-interface BuiltinModelCatalog {
-  choices: Array<BuiltinModelChoice>;
-  defaultChoice: BuiltinModelChoice;
-}
-interface BuiltinModelCatalogOptions {
-  allowMockFallback?: boolean;
-  enabledModelValues?: ReadonlyArray<string> | null;
-}
-declare const REASONING_EFFORT_VALUES: readonly ["auto", "minimal", "low", "medium", "high"];
-type BuiltinReasoningEffort = (typeof REASONING_EFFORT_VALUES)[number];
-type ExplicitReasoningEffort = Exclude<BuiltinReasoningEffort, `auto`>;
-type BuiltinAgentModelConfig = Pick<AgentConfig, `model` | `provider` | `onPayload` | `getApiKey`> & {
-  reasoningEffort?: ExplicitReasoningEffort;
-};
-declare function builtinModelProviderLabel(provider: BuiltinModelProvider): string;
-declare function listBuiltinModelChoices(providers: ReadonlyArray<BuiltinModelProvider>): Array<BuiltinModelChoice>;
-declare function resolveBuiltinModelConfig(catalog: BuiltinModelCatalog, args: Readonly<Record<string, unknown>>): BuiltinAgentModelConfig;
 //#endregion
 //#region src/agents/horton.d.ts
 declare const HORTON_MODEL = "claude-sonnet-4-6";
 declare function generateTitle(userMessage: string, llmCall: (prompt: string) => Promise<string>, onFallback?: (reason: string) => void): Promise<string>;
+interface ActiveGoalPromptInfo {
+  objective: string;
+  tokenBudget: number | null;
+  tokensUsed: number;
+}
 declare function buildHortonSystemPrompt(workingDirectory: string, opts?: {
   hasDocsSupport?: boolean;
   hasEventSourceTools?: boolean;
@@ -202,6 +245,7 @@ declare function buildHortonSystemPrompt(workingDirectory: string, opts?: {
   docsUrl?: string;
   modelProvider?: string;
   modelId?: string;
+  activeGoal?: ActiveGoalPromptInfo;
 }): string;
 declare function createHortonTools(sandbox: Sandbox, ctx: HandlerContext, readSet: Set<string>, opts?: {
   docsSearchTool?: AgentTool$1;
@@ -254,4 +298,4 @@ declare function createHortonDocsSupport(workingDirectory: string, opts?: {
 }): HortonDocsSupport | null;
 //#endregion
-export { AgentHandlerResult, BuiltinAgentHandlerOptions, BuiltinAgentsEntrypointOptions, BuiltinAgentsEntrypointServer, BuiltinAgentsServer, BuiltinAgentsServerOptions, BuiltinElectricToolsFactory, BuiltinModelCatalogOptions, BuiltinModelChoice, BuiltinModelProvider, DEFAULT_BUILTIN_AGENT_HANDLER_PATH, HORTON_MODEL, McpConfig, McpListedEntry, McpRegistry, McpServerConfig, RegistrySnapshot, RegistrySubscriber, RunBuiltinAgentsEntrypointOptions, WORKER_TOOL_NAMES, WorkerToolName, braveSearchTool, buildHortonSystemPrompt, builtinModelProviderLabel, createAgentHandler, createBuiltinAgentHandler, createBuiltinElectricTools, createForkTool, createHortonDocsSupport, createHortonTools, createSpawnWorkerTool, generateTitle, listBuiltinModelChoices, registerAgentTypes, registerBuiltinAgentTypes, registerHorton, registerWorker, resolveBuiltinAgentsEntrypointOptions, runBuiltinAgentsEntrypoint };
+export { AgentHandlerResult, BuiltinAgentHandlerOptions, BuiltinAgentModelConfig, BuiltinAgentsEntrypointOptions, BuiltinAgentsEntrypointServer, BuiltinAgentsServer, BuiltinAgentsServerOptions, BuiltinDockerSandboxMount, BuiltinDockerSandboxOptions, BuiltinElectricToolsFactory, BuiltinModelCatalog, BuiltinModelCatalogOptions, BuiltinModelChoice, BuiltinModelProvider, DEFAULT_BUILTIN_AGENT_HANDLER_PATH, HORTON_MODEL, McpConfig, McpListedEntry, McpRegistry, McpServerConfig, RegistrySnapshot, RegistrySubscriber, RunBuiltinAgentsEntrypointOptions, WORKER_TOOL_NAMES, WorkerToolName, braveSearchTool, buildHortonSystemPrompt, builtinModelProviderLabel, createAgentHandler, createBuiltinAgentHandler, createBuiltinElectricTools, createForkTool, createHortonDocsSupport, createHortonTools, createSpawnWorkerTool, generateTitle, listBuiltinModelChoices, registerAgentTypes, registerBuiltinAgentTypes, registerHorton, registerWorker, resolveBuiltinAgentsEntrypointOptions, resolveBuiltinModelConfig, runBuiltinAgentsEntrypoint };

package/dist/index.d.ts CHANGED Viewed

@@ -6,6 +6,36 @@ import { AgentTool as AgentTool$1, StreamFn } from "@mariozechner/pi-agent-core"
 import { IncomingMessage, ServerResponse } from "node:http";
 import { ChangeEvent } from "@durable-streams/state";
+//#region src/model-catalog.d.ts
+type BuiltinModelProvider = AvailableProvider;
+type BuiltinModelInput = `text` | `image`;
+interface BuiltinModelChoice {
+  provider: BuiltinModelProvider;
+  id: string;
+  label: string;
+  value: string;
+  reasoning: boolean;
+  input: Array<BuiltinModelInput>;
+}
+interface BuiltinModelCatalog {
+  choices: Array<BuiltinModelChoice>;
+  defaultChoice: BuiltinModelChoice;
+}
+interface BuiltinModelCatalogOptions {
+  allowMockFallback?: boolean;
+  enabledModelValues?: ReadonlyArray<string> | null;
+}
+declare const REASONING_EFFORT_VALUES: readonly ["auto", "minimal", "low", "medium", "high"];
+type BuiltinReasoningEffort = (typeof REASONING_EFFORT_VALUES)[number];
+type ExplicitReasoningEffort = Exclude<BuiltinReasoningEffort, `auto`>;
+type BuiltinAgentModelConfig = Pick<AgentConfig, `model` | `provider` | `onPayload` | `getApiKey`> & {
+  reasoningEffort?: ExplicitReasoningEffort;
+};
+declare function builtinModelProviderLabel(provider: BuiltinModelProvider): string;
+declare function listBuiltinModelChoices(providers: ReadonlyArray<BuiltinModelProvider>): Array<BuiltinModelChoice>;
+declare function resolveBuiltinModelConfig(catalog: BuiltinModelCatalog, args: Readonly<Record<string, unknown>>): BuiltinAgentModelConfig;
+//#endregion
 //#region src/bootstrap.d.ts
 declare const DEFAULT_BUILTIN_AGENT_HANDLER_PATH = "/_electric/builtin-agent-handler";
 interface AgentHandlerResult {
@@ -21,8 +51,38 @@ interface AgentHandlerResult {
    * die with the process, which would leave containers running.
    */
   shutdownSandboxes: (() => Promise<void>) | null;
+  /**
+   * Model catalog the built-in agents resolve `model` args against — lets
+   * embedders register sibling agent types with the same model resolution.
+   */
+  modelCatalog: BuiltinModelCatalog;
 }
 type BuiltinElectricToolsFactory = NonNullable<ProcessWakeConfig[`createElectricTools`]>;
+/** Mount spec mirroring `DockerSandboxOpts['extraMounts']` items. */
+interface BuiltinDockerSandboxMount {
+  hostPath: string;
+  containerPath: string;
+  readOnly?: boolean;
+}
+/**
+ * Embedder customization for the built-in `docker` sandbox profile.
+ * Threads straight into `dockerSandbox()` (which already supports these);
+ * custom `extraMounts` are appended after the working-directory mount.
+ * These are embedder/operator-trust inputs: `extraMounts` is subject to the
+ * runtime's docker-socket guard, and `env` is passed verbatim into the
+ * container.
+ *
+ * Note: custom `extraMounts` must not target the working-directory container
+ * path (`/work`) — it collides with the cwd mount and fails at container-create
+ * time with an opaque docker error.
+ */
+interface BuiltinDockerSandboxOptions {
+  /** Digest-pinned image unless `allowFloatingTag` is set. */
+  image?: string;
+  allowFloatingTag?: boolean;
+  env?: Record<string, string>;
+  extraMounts?: Array<BuiltinDockerSandboxMount>;
+}
 interface BuiltinAgentHandlerOptions {
   agentServerUrl: string;
   serveEndpoint?: string;
@@ -36,6 +96,8 @@ interface BuiltinAgentHandlerOptions {
   serverHeaders?: HeadersProvider;
   defaultDispatchPolicyForType?: (typeName: string) => DispatchPolicy | undefined;
   createElectricTools?: BuiltinElectricToolsFactory;
+  /** Customize the built-in `docker` sandbox profile (image, env, mounts). */
+  dockerSandbox?: BuiltinDockerSandboxOptions;
 }
 declare function createBuiltinElectricTools(custom?: BuiltinElectricToolsFactory): BuiltinElectricToolsFactory;
 declare function createBuiltinAgentHandler(options: BuiltinAgentHandlerOptions): Promise<AgentHandlerResult | null>;
@@ -45,6 +107,12 @@ declare const registerAgentTypes: typeof registerBuiltinAgentTypes;
 //#endregion
 //#region src/durable-streams-cache.d.ts
+/**
+ * Merge the profile's working-directory mount with embedder docker options
+ * into the option fragment spread into `dockerSandbox()`. An internal helper:
+ * exported from this module so the unit test can import it, but intentionally
+ * not re-exported from `index.ts` (not part of the package's public API).
+ */
 type DurableStreamsFetchCacheOptions = false | {
   store?: `memory` | `sqlite`;
   sqliteLocation?: string;
@@ -160,40 +228,15 @@ declare function runBuiltinAgentsEntrypoint({
   url: string;
 }>;
-//#endregion
-//#region src/model-catalog.d.ts
-type BuiltinModelProvider = AvailableProvider;
-type BuiltinModelInput = `text` | `image`;
-interface BuiltinModelChoice {
-  provider: BuiltinModelProvider;
-  id: string;
-  label: string;
-  value: string;
-  reasoning: boolean;
-  input: Array<BuiltinModelInput>;
-}
-interface BuiltinModelCatalog {
-  choices: Array<BuiltinModelChoice>;
-  defaultChoice: BuiltinModelChoice;
-}
-interface BuiltinModelCatalogOptions {
-  allowMockFallback?: boolean;
-  enabledModelValues?: ReadonlyArray<string> | null;
-}
-declare const REASONING_EFFORT_VALUES: readonly ["auto", "minimal", "low", "medium", "high"];
-type BuiltinReasoningEffort = (typeof REASONING_EFFORT_VALUES)[number];
-type ExplicitReasoningEffort = Exclude<BuiltinReasoningEffort, `auto`>;
-type BuiltinAgentModelConfig = Pick<AgentConfig, `model` | `provider` | `onPayload` | `getApiKey`> & {
-  reasoningEffort?: ExplicitReasoningEffort;
-};
-declare function builtinModelProviderLabel(provider: BuiltinModelProvider): string;
-declare function listBuiltinModelChoices(providers: ReadonlyArray<BuiltinModelProvider>): Array<BuiltinModelChoice>;
-declare function resolveBuiltinModelConfig(catalog: BuiltinModelCatalog, args: Readonly<Record<string, unknown>>): BuiltinAgentModelConfig;
 //#endregion
 //#region src/agents/horton.d.ts
 declare const HORTON_MODEL = "claude-sonnet-4-6";
 declare function generateTitle(userMessage: string, llmCall: (prompt: string) => Promise<string>, onFallback?: (reason: string) => void): Promise<string>;
+interface ActiveGoalPromptInfo {
+  objective: string;
+  tokenBudget: number | null;
+  tokensUsed: number;
+}
 declare function buildHortonSystemPrompt(workingDirectory: string, opts?: {
   hasDocsSupport?: boolean;
   hasEventSourceTools?: boolean;
@@ -202,6 +245,7 @@ declare function buildHortonSystemPrompt(workingDirectory: string, opts?: {
   docsUrl?: string;
   modelProvider?: string;
   modelId?: string;
+  activeGoal?: ActiveGoalPromptInfo;
 }): string;
 declare function createHortonTools(sandbox: Sandbox, ctx: HandlerContext, readSet: Set<string>, opts?: {
   docsSearchTool?: AgentTool$1;
@@ -254,4 +298,4 @@ declare function createHortonDocsSupport(workingDirectory: string, opts?: {
 }): HortonDocsSupport | null;
 //#endregion
-export { AgentHandlerResult, BuiltinAgentHandlerOptions, BuiltinAgentsEntrypointOptions, BuiltinAgentsEntrypointServer, BuiltinAgentsServer, BuiltinAgentsServerOptions, BuiltinElectricToolsFactory, BuiltinModelCatalogOptions, BuiltinModelChoice, BuiltinModelProvider, DEFAULT_BUILTIN_AGENT_HANDLER_PATH, HORTON_MODEL, McpConfig, McpListedEntry, McpRegistry, McpServerConfig, RegistrySnapshot, RegistrySubscriber, RunBuiltinAgentsEntrypointOptions, WORKER_TOOL_NAMES, WorkerToolName, braveSearchTool, buildHortonSystemPrompt, builtinModelProviderLabel, createAgentHandler, createBuiltinAgentHandler, createBuiltinElectricTools, createForkTool, createHortonDocsSupport, createHortonTools, createSpawnWorkerTool, generateTitle, listBuiltinModelChoices, registerAgentTypes, registerBuiltinAgentTypes, registerHorton, registerWorker, resolveBuiltinAgentsEntrypointOptions, runBuiltinAgentsEntrypoint };
+export { AgentHandlerResult, BuiltinAgentHandlerOptions, BuiltinAgentModelConfig, BuiltinAgentsEntrypointOptions, BuiltinAgentsEntrypointServer, BuiltinAgentsServer, BuiltinAgentsServerOptions, BuiltinDockerSandboxMount, BuiltinDockerSandboxOptions, BuiltinElectricToolsFactory, BuiltinModelCatalog, BuiltinModelCatalogOptions, BuiltinModelChoice, BuiltinModelProvider, DEFAULT_BUILTIN_AGENT_HANDLER_PATH, HORTON_MODEL, McpConfig, McpListedEntry, McpRegistry, McpServerConfig, RegistrySnapshot, RegistrySubscriber, RunBuiltinAgentsEntrypointOptions, WORKER_TOOL_NAMES, WorkerToolName, braveSearchTool, buildHortonSystemPrompt, builtinModelProviderLabel, createAgentHandler, createBuiltinAgentHandler, createBuiltinElectricTools, createForkTool, createHortonDocsSupport, createHortonTools, createSpawnWorkerTool, generateTitle, listBuiltinModelChoices, registerAgentTypes, registerBuiltinAgentTypes, registerHorton, registerWorker, resolveBuiltinAgentsEntrypointOptions, resolveBuiltinModelConfig, runBuiltinAgentsEntrypoint };

package/dist/index.js CHANGED Viewed

@@ -1,8 +1,8 @@
 import { mergeElectricPrincipalHeader } from "./server-headers-KD5yHFYT.js";
 import path from "node:path";
 import { fileURLToPath } from "node:url";
-import { MOONSHOT_API_BASE_URL, MOONSHOT_PROVIDER, appendPathToUrl, buildSkillSlashCommands, completeWithLowCostModel, createContextSkillLoader, createEntityRegistry, createPullWakeRunner, createRuntimeHandler, createSkillsRegistry, db, detectAvailableProviders, getMoonshotApiKey, getMoonshotModel, getMoonshotModels, pgSync, readCodexAccessToken, registerToolProvider, unregisterToolProvider } from "@electric-ax/agents-runtime";
-import { braveSearchTool, braveSearchTool as braveSearchTool$1, createBashTool, createEditTool, createEventSourceTools, createFetchUrlTool, createReadFileTool, createScheduleTools, createSendTool, createWriteTool } from "@electric-ax/agents-runtime/tools";
+import { GOAL_SLASH_COMMAND, MOONSHOT_API_BASE_URL, MOONSHOT_PROVIDER, appendPathToUrl, buildSkillSlashCommands, commentsCollection, completeWithLowCostModel, createContextSkillLoader, createEntityRegistry, createPullWakeRunner, createRuntimeHandler, createSkillsRegistry, db, detectAvailableProviders, dispatchGoalCommand, formatTokenCount, getMoonshotApiKey, getMoonshotModel, getMoonshotModels, isGoalCommandText, parseGoalCommand, pgSync, readCodexAccessToken, registerToolProvider, unregisterToolProvider } from "@electric-ax/agents-runtime";
+import { braveSearchTool, braveSearchTool as braveSearchTool$1, createBashTool, createEditTool, createEventSourceTools, createFetchUrlTool, createMarkGoalCompleteTool, createReadFileTool, createScheduleTools, createSendTool, createWriteTool } from "@electric-ax/agents-runtime/tools";
 import { chooseDefaultSandbox, isE2BAvailable, lazySandbox, remoteSandbox } from "@electric-ax/agents-runtime/sandbox";
 import fsSync from "node:fs";
 import pino from "pino";
@@ -1069,25 +1069,66 @@ function filterChoicesByEnabledModels(choices, values) {
 	const filtered = choices.filter((choice) => enabled.has(choice.value));
 	return filtered.length > 0 ? filtered : choices;
 }
+/**
+* Anthropic-specific budget mapping for `reasoningEffort`.
+*
+* Anthropic's `thinking.budget_tokens` is a hard cap on tokens spent
+* inside the thinking block before the model must commit to its
+* answer. Docs require ≥ 1024; we scale from there. Numbers tuned so
+* `medium` is the spot most "show your work" requests land, and
+* `high` covers tougher reasoning without uncapped spend.
+*
+* Keep in sync with provider doc updates — Anthropic has shifted the
+* minimum once already (older models capped lower).
+*/
+const ANTHROPIC_THINKING_BUDGET_BY_EFFORT = {
+	minimal: 1024,
+	low: 2048,
+	medium: 8192,
+	high: 24576
+};
 function withProviderPayloadDefaults(config, choice, reasoningEffort) {
-	if (choice.provider !== `openai` && choice.provider !== `openai-codex` || !choice.reasoning) return config;
-	const defaultEffort = choice.provider === `openai-codex` ? `low` : `minimal`;
-	const effort = reasoningEffort === `minimal` && choice.provider === `openai-codex` ? `low` : reasoningEffort ?? defaultEffort;
-	return {
-		...config,
-		onPayload: (payload) => {
-			if (typeof payload !== `object` || payload === null) return void 0;
-			const body = payload;
-			const existingReasoning = typeof body.reasoning === `object` && body.reasoning !== null ? body.reasoning : {};
-			return {
-				...body,
-				reasoning: {
-					...existingReasoning,
-					effort
-				}
-			};
-		}
-	};
+	if (!choice.reasoning) return config;
+	if (choice.provider === `openai` || choice.provider === `openai-codex`) {
+		const defaultEffort = choice.provider === `openai-codex` ? `low` : `minimal`;
+		const effort = reasoningEffort === `minimal` && choice.provider === `openai-codex` ? `low` : reasoningEffort ?? defaultEffort;
+		return {
+			...config,
+			onPayload: (payload) => {
+				if (typeof payload !== `object` || payload === null) return void 0;
+				const body = payload;
+				const existingReasoning = typeof body.reasoning === `object` && body.reasoning !== null ? body.reasoning : {};
+				return {
+					...body,
+					reasoning: {
+						...existingReasoning,
+						effort
+					}
+				};
+			}
+		};
+	}
+	if (choice.provider === `anthropic`) {
+		const effectiveEffort = reasoningEffort ?? `minimal`;
+		const budgetTokens = ANTHROPIC_THINKING_BUDGET_BY_EFFORT[effectiveEffort];
+		return {
+			...config,
+			onPayload: (payload) => {
+				if (typeof payload !== `object` || payload === null) return void 0;
+				const body = payload;
+				const existingThinking = typeof body.thinking === `object` && body.thinking !== null ? body.thinking : {};
+				return {
+					...body,
+					thinking: {
+						...existingThinking,
+						type: `enabled`,
+						budget_tokens: budgetTokens
+					}
+				};
+			}
+		};
+	}
+	return config;
 }
 function parseReasoningEffort(value) {
 	return value === `minimal` || value === `low` || value === `medium` || value === `high` ? value : null;
@@ -1352,7 +1393,18 @@ Workflow when forking yourself for parallel exploration:
 Report outcomes faithfully. If a command failed, say so with the relevant output. If you didn't run a verification step, say that rather than implying you did. Don't hedge confirmed results with unnecessary disclaimers.
 Working directory: ${workingDirectory}
-The current year is ${new Date().getFullYear()}.`;
+The current year is ${new Date().getFullYear()}.${buildGoalGuidance(opts.activeGoal)}`;
+}
+function buildGoalGuidance(goal) {
+	if (!goal) return ``;
+	const budgetLine = goal.tokenBudget === null ? `unlimited` : `${goal.tokensUsed} / ${goal.tokenBudget} tokens used`;
+	return `
+# Active goal
+- Objective: ${goal.objective}
+- Token budget: ${budgetLine}
+The user set this goal with /goal set. Work autonomously toward it: do NOT ask the user clarifying questions or pause for confirmation — make reasonable assumptions and proceed. When you believe the goal is met, call the \`mark_goal_complete\` tool. If you hit a blocker that genuinely requires the user (e.g. credentials, a destructive action), call \`mark_goal_complete\` with a summary explaining what's needed. The runtime will abort this run automatically if you exceed the token budget.`;
 }
 function getToolName(tool) {
 	if (typeof tool !== `object` || tool === null) return null;
@@ -1377,6 +1429,7 @@ function createHortonTools(sandbox, ctx, readSet, opts = {}) {
 		createObservePgSyncTool(ctx),
 		createSetTitleTool(ctx),
 		createSendTool(ctx.send, { selfEntityUrl: ctx.entityUrl }),
+		...ctx.getGoal()?.status === `active` ? [createMarkGoalCompleteTool(ctx)] : [],
 		...opts.docsSearchTool ? [opts.docsSearchTool] : []
 	];
 }
@@ -1445,11 +1498,58 @@ async function readAgentsMd(sandbox) {
 		return null;
 	}
 }
+function extractWakeText(wake) {
+	if (wake.type !== `inbox`) return null;
+	const payload = wake.payload;
+	if (typeof payload === `string`) return payload;
+	if (payload && typeof payload === `object`) {
+		const record = payload;
+		if (typeof record.text === `string`) return record.text;
+		if (typeof record.source === `string`) return record.source;
+	}
+	return null;
+}
+async function tryHandleSlashCommand(ctx, wake) {
+	const text = extractWakeText(wake);
+	if (text === null) return false;
+	if (isGoalCommandText(text)) {
+		const command = parseGoalCommand(text);
+		const result = dispatchGoalCommand(ctx, command);
+		if (result.message) {
+			serverLog.info(`[horton ${ctx.entityUrl}] ${result.message}`);
+			writeSlashCommandReply(ctx, result.message);
+		}
+		if (command.kind === `set`) await kickoffGoalRun(ctx);
+		return result.handled;
+	}
+	return false;
+}
+const GOAL_KICKOFF_TEXT = `Start working toward the active goal now. Call \`mark_goal_complete\` when you believe it is done.`;
+async function kickoffGoalRun(ctx) {
+	const goal = ctx.getGoal();
+	if (!goal || goal.status !== `active`) return;
+	try {
+		await ctx.send(ctx.entityUrl, {
+			kind: `goal_kickoff`,
+			text: GOAL_KICKOFF_TEXT
+		}, { type: `inbox` });
+	} catch (err) {
+		serverLog.warn(`[horton ${ctx.entityUrl}] failed to enqueue goal kickoff: ${err instanceof Error ? err.message : String(err)}`);
+	}
+}
+function writeSlashCommandReply(ctx, text) {
+	try {
+		ctx.replyText(text);
+	} catch (err) {
+		serverLog.warn(`[horton ${ctx.entityUrl}] failed to render slash command reply: ${err instanceof Error ? err.message : String(err)}`);
+	}
+}
 function createAssistantHandler(options) {
 	const { streamFn, docsSupport, docsSearchTool, skillsRegistry, modelCatalog, docsUrl } = options;
 	const skillLoader = createContextSkillLoader(skillsRegistry, { slashCommandOwner: HORTON_SKILLS_SLASH_COMMAND_OWNER });
 	const hasSkills = skillLoader.hasSkills;
 	return async function assistantHandler(ctx, wake) {
+		if (await tryHandleSlashCommand(ctx, wake)) return;
 		const loadedSkills = await skillLoader.load(ctx);
 		const readSet = new Set();
 		const modelConfig = resolveBuiltinModelConfig(modelCatalog, ctx.args);
@@ -1542,6 +1642,26 @@ function createAssistantHandler(options) {
 				}
 			}
 		});
+		const goal = ctx.getGoal();
+		const enforcedGoal = goal && goal.status === `active` ? goal : void 0;
+		const activeGoalPromptInfo = enforcedGoal ? {
+			objective: enforcedGoal.objective,
+			tokenBudget: enforcedGoal.tokenBudget,
+			tokensUsed: enforcedGoal.tokensUsed
+		} : void 0;
+		const budgetAbort = new AbortController();
+		let runTokensUsed = enforcedGoal?.tokensUsed ?? 0;
+		let budgetTripped = false;
+		const onStepEnd = enforcedGoal ? (stats) => {
+			if (budgetTripped) return;
+			runTokensUsed += stats.uncachedInput + stats.output;
+			ctx.updateGoalUsage(runTokensUsed);
+			if (enforcedGoal.tokenBudget !== null && runTokensUsed >= enforcedGoal.tokenBudget) {
+				budgetTripped = true;
+				serverLog.info(`[horton ${ctx.entityUrl}] goal budget exhausted (${runTokensUsed} tokens) — aborting run`);
+				budgetAbort.abort();
+			}
+		} : void 0;
 		ctx.useAgent({
 			systemPrompt: buildHortonSystemPrompt(sandboxCwd, {
 				hasDocsSupport: Boolean(docsSupport),
@@ -1550,13 +1670,26 @@ function createAssistantHandler(options) {
 				modelProvider: modelConfig.provider,
 				modelId: String(modelConfig.model),
 				hasEventSourceTools,
-				hasScheduleTools
+				hasScheduleTools,
+				...activeGoalPromptInfo && { activeGoal: activeGoalPromptInfo }
 			}),
 			...modelConfig,
 			tools,
-			...streamFn && { streamFn }
+			...streamFn && { streamFn },
+			...onStepEnd && { onStepEnd }
 		});
-		await ctx.agent.run();
+		try {
+			await ctx.agent.run(void 0, budgetAbort.signal);
+		} catch (err) {
+			if (!budgetTripped) throw err;
+			serverLog.info(`[horton ${ctx.entityUrl}] agent.run aborted by budget enforcement`);
+		}
+		if (enforcedGoal) ctx.updateGoalUsage(runTokensUsed, budgetTripped ? { status: `budget_limited` } : void 0);
+		if (budgetTripped && enforcedGoal && enforcedGoal.tokenBudget !== null) {
+			const budget = enforcedGoal.tokenBudget;
+			const suggestedNext = Math.max(budget * 2, budget + 1e4);
+			writeSlashCommandReply(ctx, `⚠️ Stopped — goal hit the token budget (${formatTokenCount(runTokensUsed)} / ${formatTokenCount(budget)} tokens used). Raise the budget with \`/goal set "..." --tokens ${formatTokenCount(suggestedNext)}\`, or call \`/goal complete\` to finalize.`);
+		}
 		await titlePromise;
 	};
 }
@@ -1596,7 +1729,8 @@ function registerHorton(registry, options) {
 			subject_value: `user`,
 			permission: `manage`
 		}],
-		slashCommands: buildSkillSlashCommands(skillsRegistry),
+		state: { comments: commentsCollection },
+		slashCommands: [GOAL_SLASH_COMMAND, ...buildSkillSlashCommands(skillsRegistry)],
 		handler: assistantHandler
 	});
 	return [`horton`];
@@ -1780,6 +1914,7 @@ function registerWorker(registry, options) {
 			subject_value: `user`,
 			permission: `manage`
 		}],
+		state: { comments: commentsCollection },
 		async handler(ctx) {
 			const args = parseWorkerArgs(ctx.args);
 			const readSet = new Set();
@@ -1832,7 +1967,7 @@ function createBuiltinElectricTools(custom) {
 	};
 }
 async function createBuiltinAgentHandler(options) {
-	const { agentServerUrl, serveEndpoint, workingDirectory, streamFn, enabledModelValues, createElectricTools, publicUrl, runtimeName, baseSkillsDir: baseSkillsDirOverride, serverHeaders, defaultDispatchPolicyForType } = options;
+	const { agentServerUrl, serveEndpoint, workingDirectory, streamFn, enabledModelValues, createElectricTools, publicUrl, runtimeName, baseSkillsDir: baseSkillsDirOverride, serverHeaders, defaultDispatchPolicyForType, dockerSandbox: dockerSandboxOpts } = options;
 	const modelCatalog = await createBuiltinModelCatalog({
 		allowMockFallback: Boolean(streamFn),
 		enabledModelValues
@@ -1868,7 +2003,7 @@ async function createBuiltinAgentHandler(options) {
 		modelCatalog
 	});
 	typeNames.push(`worker`);
-	const { profiles: sandboxProfiles, shutdownSandboxes } = await buildBuiltinSandboxProfiles(cwd);
+	const { profiles: sandboxProfiles, shutdownSandboxes } = await buildBuiltinSandboxProfiles(cwd, dockerSandboxOpts);
 	const runtime = createRuntimeHandler({
 		baseUrl: agentServerUrl,
 		serveEndpoint,
@@ -1888,7 +2023,8 @@ async function createBuiltinAgentHandler(options) {
 		registry,
 		typeNames,
 		skillsRegistry,
-		shutdownSandboxes
+		shutdownSandboxes,
+		modelCatalog
 	};
 }
 async function createAgentHandler(agentServerUrl, workingDirectory, streamFn, createElectricTools, serveEndpoint) {
@@ -1917,6 +2053,21 @@ function sweepOrphanedDockerSandboxesOnce(sweep) {
 	return dockerBootSweep;
 }
 /**
+* Merge the profile's working-directory mount with embedder docker options
+* into the option fragment spread into `dockerSandbox()`. An internal helper:
+* exported from this module so the unit test can import it, but intentionally
+* not re-exported from `index.ts` (not part of the package's public API).
+*/
+function resolveDockerSandboxOpts(cwdMount, custom) {
+	const extraMounts = [...cwdMount ? [cwdMount] : [], ...custom?.extraMounts ?? []];
+	return {
+		...custom?.image !== void 0 && { image: custom.image },
+		...custom?.allowFloatingTag !== void 0 && { allowFloatingTag: custom.allowFloatingTag },
+		...custom?.env !== void 0 && { env: custom.env },
+		...extraMounts.length > 0 && { extraMounts }
+	};
+}
+/**
 * Built-in sandbox profiles. `local` is always available. `docker` is
 * gated on Docker being reachable so a user without Docker installed
 * sees only what works — the UI never offers a non-functional choice.
@@ -1926,7 +2077,7 @@ function sweepOrphanedDockerSandboxesOnce(sweep) {
 * server must run on shutdown (the providers' debounced idle teardowns die
 * with the process).
 */
-async function buildBuiltinSandboxProfiles(workingDirectory) {
+async function buildBuiltinSandboxProfiles(workingDirectory, dockerOpts) {
 	const profiles = [{
 		name: `local`,
 		label: `Local`,
@@ -1951,11 +2102,11 @@ async function buildBuiltinSandboxProfiles(workingDirectory) {
 						workingDirectory: `/work`,
 						factory: () => dockerSandbox({
 							initialNetworkPolicy: { mode: `allow-all` },
-							extraMounts: cwd ? [{
+							...resolveDockerSandboxOpts(cwd ? {
 								hostPath: cwd,
 								containerPath: `/work`,
 								readOnly: false
-							}] : void 0,
+							} : void 0, dockerOpts),
 							sandboxKey,
 							persistent,
 							owner,
@@ -2341,4 +2492,4 @@ async function runBuiltinAgentsEntrypoint({ env = process.env, cwd = process.cwd
 }
 //#endregion
-export { BuiltinAgentsServer, DEFAULT_BUILTIN_AGENT_HANDLER_PATH, HORTON_MODEL, WORKER_TOOL_NAMES, braveSearchTool, buildHortonSystemPrompt, builtinModelProviderLabel, createAgentHandler, createBuiltinAgentHandler, createBuiltinElectricTools, createForkTool, createHortonDocsSupport, createHortonTools, createSpawnWorkerTool, generateTitle, listBuiltinModelChoices, registerAgentTypes, registerBuiltinAgentTypes, registerHorton, registerWorker, resolveBuiltinAgentsEntrypointOptions, runBuiltinAgentsEntrypoint };
+export { BuiltinAgentsServer, DEFAULT_BUILTIN_AGENT_HANDLER_PATH, HORTON_MODEL, WORKER_TOOL_NAMES, braveSearchTool, buildHortonSystemPrompt, builtinModelProviderLabel, createAgentHandler, createBuiltinAgentHandler, createBuiltinElectricTools, createForkTool, createHortonDocsSupport, createHortonTools, createSpawnWorkerTool, generateTitle, listBuiltinModelChoices, registerAgentTypes, registerBuiltinAgentTypes, registerHorton, registerWorker, resolveBuiltinAgentsEntrypointOptions, resolveBuiltinModelConfig, runBuiltinAgentsEntrypoint };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@electric-ax/agents",
-  "version": "0.4.17",
+  "version": "0.4.18",
   "description": "Built-in Electric Agents runtimes such as Horton and worker",
   "repository": {
     "type": "git",
@@ -50,7 +50,7 @@
     "undici": "^7.24.7",
     "zod": "^4.3.6",
     "@electric-ax/agents-mcp": "0.2.3",
-    "@electric-ax/agents-runtime": "0.3.13"
+    "@electric-ax/agents-runtime": "0.4.0"
   },
   "devDependencies": {
     "@types/better-sqlite3": "^7.6.13",