npm - @flue/sdk - Versions diffs - 0.3.11 → 0.4.1 - Mend

@flue/sdk 0.3.11 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/README.md +14 -23
package/dist/abort-Bg3qsAkU.mjs +43 -0
package/dist/app.d.mts +106 -0
package/dist/app.mjs +4 -0
package/dist/client.d.mts +9 -3
package/dist/client.mjs +10 -24
package/dist/cloudflare/index.d.mts +10 -6
package/dist/cloudflare/index.mjs +388 -26
package/dist/cloudflare-model-BeiZ1pLz.d.mts +6 -0
package/dist/config.d.mts +133 -0
package/dist/config.mjs +195 -0
package/dist/flue-app-CG8i4wNG.d.mts +184 -0
package/dist/flue-app-DeTOZjPs.mjs +730 -0
package/dist/index.d.mts +41 -19
package/dist/index.mjs +434 -594
package/dist/internal.d.mts +9 -272
package/dist/internal.mjs +16 -430
package/dist/{mcp-CcRxAwXW.d.mts → mcp-C3UBXVkR.d.mts} +1 -1
package/dist/{mcp-DmDTeVXW.mjs → mcp-DM6yv_Qc.mjs} +19 -33
package/dist/node/index.d.mts +8 -12
package/dist/node/index.mjs +94 -64
package/dist/providers-DeFRIwp0.mjs +158 -0
package/dist/result-K1IRhWKM.mjs +685 -0
package/dist/sandbox.d.mts +25 -4
package/dist/sandbox.mjs +44 -62
package/dist/{session-DlwIt7wq.mjs → session-CFOByKnM.mjs} +488 -263
package/dist/types-BAmV4f3Q.d.mts +727 -0
package/package.json +12 -1
package/dist/agent-Cahthgu3.mjs +0 -453
package/dist/command-helpers-eVG1-Iru.d.mts +0 -21
package/dist/command-helpers-hTZKWK13.mjs +0 -37
package/dist/types-DGpyKMFm.d.mts +0 -508

package/dist/{session-DlwIt7wq.mjs → session-CFOByKnM.mjs} RENAMED Viewed

@@ -1,9 +1,74 @@
-import { i as loadSkillByPath, n as createTools, t as BUILTIN_TOOL_NAMES } from "./agent-Cahthgu3.mjs";
+import { a as buildSkillByPathPrompt, c as createTools, f as resolveSkillFilePath, i as buildSkillByNamePrompt, l as formatBashResult, n as buildPromptText, o as createResultTools, p as skillsDirIn, r as buildResultFollowUpPrompt, s as BUILTIN_TOOL_NAMES, t as ResultUnavailableError } from "./result-K1IRhWKM.mjs";
+import { i as getRegisteredApiKey, r as getProviderConfiguration } from "./providers-DeFRIwp0.mjs";
+import { n as createCallHandle, t as abortErrorFor } from "./abort-Bg3qsAkU.mjs";
+import { createFlueFs } from "./sandbox.mjs";
 import { completeSimple, isContextOverflow } from "@mariozechner/pi-ai";
 import { Agent } from "@mariozechner/pi-agent-core";
-import { toJsonSchema } from "@valibot/to-json-schema";
-import * as v from "valibot";
+//#region src/usage.ts
+/** All-zero `PromptUsage`. Identity element for `addUsage`. */
+function emptyUsage() {
+	return {
+		input: 0,
+		output: 0,
+		cacheRead: 0,
+		cacheWrite: 0,
+		totalTokens: 0,
+		cost: {
+			input: 0,
+			output: 0,
+			cacheRead: 0,
+			cacheWrite: 0,
+			total: 0
+		}
+	};
+}
+/**
+* Field-wise sum of two `PromptUsage` values, including the nested `cost`
+* sub-object. Returns a fresh object; neither argument is mutated.
+*/
+function addUsage(a, b) {
+	return {
+		input: a.input + b.input,
+		output: a.output + b.output,
+		cacheRead: a.cacheRead + b.cacheRead,
+		cacheWrite: a.cacheWrite + b.cacheWrite,
+		totalTokens: a.totalTokens + b.totalTokens,
+		cost: {
+			input: a.cost.input + b.cost.input,
+			output: a.cost.output + b.cost.output,
+			cacheRead: a.cost.cacheRead + b.cost.cacheRead,
+			cacheWrite: a.cost.cacheWrite + b.cost.cacheWrite,
+			total: a.cost.total + b.cost.total
+		}
+	};
+}
+/**
+* Convert pi-ai's `Usage` into Flue's public `PromptUsage`. The shapes are
+* structurally identical today, but going through this normalizer keeps
+* Flue's public types decoupled from pi-ai's so future divergence in
+* pi-ai (e.g. additional fields) doesn't leak into the SDK's public
+* surface. Returns `undefined` when the input is `undefined`.
+*/
+function fromProviderUsage(usage) {
+	if (!usage) return void 0;
+	return {
+		input: usage.input,
+		output: usage.output,
+		cacheRead: usage.cacheRead,
+		cacheWrite: usage.cacheWrite,
+		totalTokens: usage.totalTokens,
+		cost: {
+			input: usage.cost.input,
+			output: usage.cost.output,
+			cacheRead: usage.cost.cacheRead,
+			cacheWrite: usage.cost.cacheWrite,
+			total: usage.cost.total
+		}
+	};
+}
+//#endregion
 //#region src/compaction.ts
 const DEFAULT_COMPACTION_SETTINGS = {
 	enabled: true,
@@ -341,7 +406,10 @@ async function generateSummary(currentMessages, model, reserveTokens, apiKey, si
 		messages: summarizationMessages
 	}, completionOptions);
 	if (response.stopReason === "error") throw new Error(`Summarization failed: ${response.errorMessage || "Unknown error"}`);
-	return response.content.filter((c) => c.type === "text").map((c) => c.text).join("\n");
+	return {
+		text: response.content.filter((c) => c.type === "text").map((c) => c.text).join("\n"),
+		usage: response.usage
+	};
 }
 async function generateTurnPrefixSummary(messages, model, reserveTokens, apiKey, signal) {
 	const maxTokens = Math.min(Math.floor(.5 * reserveTokens), 16e3);
@@ -358,20 +426,39 @@ async function generateTurnPrefixSummary(messages, model, reserveTokens, apiKey,
 		signal
 	};
 	if (apiKey) completionOptions.apiKey = apiKey;
+	if (model.reasoning) completionOptions.reasoning = "high";
 	const response = await completeSimple(model, {
 		systemPrompt: SUMMARIZATION_SYSTEM_PROMPT,
 		messages: summarizationMessages
 	}, completionOptions);
 	if (response.stopReason === "error") throw new Error(`Turn prefix summarization failed: ${response.errorMessage || "Unknown error"}`);
-	return response.content.filter((c) => c.type === "text").map((c) => c.text).join("\n");
+	return {
+		text: response.content.filter((c) => c.type === "text").map((c) => c.text).join("\n"),
+		usage: response.usage
+	};
 }
 async function compact(preparation, model, apiKey, signal) {
 	const { firstKeptIndex, messagesToSummarize, turnPrefixMessages, isSplitTurn, tokensBefore, previousSummary, fileOps, settings } = preparation;
 	let summary;
+	let aggregateUsage;
+	const addCallUsage = (usage) => {
+		const normalized = fromProviderUsage(usage);
+		if (!normalized) return;
+		aggregateUsage = aggregateUsage ? addUsage(aggregateUsage, normalized) : normalized;
+	};
 	if (isSplitTurn && turnPrefixMessages.length > 0) {
-		const [historyResult, turnPrefixResult] = await Promise.all([messagesToSummarize.length > 0 ? generateSummary(messagesToSummarize, model, settings.reserveTokens, apiKey, signal, previousSummary) : Promise.resolve("No prior history."), generateTurnPrefixSummary(turnPrefixMessages, model, settings.reserveTokens, apiKey, signal)]);
-		summary = `${historyResult}\n\n---\n\n**Turn Context (split turn):**\n\n${turnPrefixResult}`;
-	} else summary = await generateSummary(messagesToSummarize, model, settings.reserveTokens, apiKey, signal, previousSummary);
+		const [historyResult, turnPrefixResult] = await Promise.all([messagesToSummarize.length > 0 ? generateSummary(messagesToSummarize, model, settings.reserveTokens, apiKey, signal, previousSummary) : Promise.resolve({
+			text: "No prior history.",
+			usage: void 0
+		}), generateTurnPrefixSummary(turnPrefixMessages, model, settings.reserveTokens, apiKey, signal)]);
+		addCallUsage(historyResult.usage);
+		addCallUsage(turnPrefixResult.usage);
+		summary = `${historyResult.text}\n\n---\n\n**Turn Context (split turn):**\n\n${turnPrefixResult.text}`;
+	} else {
+		const historyResult = await generateSummary(messagesToSummarize, model, settings.reserveTokens, apiKey, signal, previousSummary);
+		addCallUsage(historyResult.usage);
+		summary = historyResult.text;
+	}
 	const { readFiles, modifiedFiles } = computeFileLists(fileOps);
 	summary += formatFileOperations(readFiles, modifiedFiles);
 	return {
@@ -381,109 +468,11 @@ async function compact(preparation, model, apiKey, signal) {
 		details: {
 			readFiles,
 			modifiedFiles
-		}
+		},
+		usage: aggregateUsage
 	};
 }
-//#endregion
-//#region src/result.ts
-const HEADLESS_PREAMBLE = "You are running in headless mode with no human operator. Work autonomously — never ask questions, never wait for user input. Make your best judgment and proceed independently.";
-function buildResultInstructions(schema) {
-	const { $schema: _, ...schemaWithoutMeta } = toJsonSchema(schema, { errorMode: "ignore" });
-	return [
-		"",
-		"```json",
-		JSON.stringify(schemaWithoutMeta, null, 2),
-		"```",
-		"",
-		"Example: (Object)",
-		"---RESULT_START---",
-		"{\"key\": \"value\"}",
-		"---RESULT_END---",
-		"",
-		"Example: (String)",
-		"---RESULT_START---",
-		"Hello, world!",
-		"---RESULT_END---"
-	].join("\n");
-}
-/** Follow-up prompt used when the LLM forgets to include RESULT_START/RESULT_END delimiters. */
-function buildResultExtractionPrompt(schema) {
-	return [
-		"Your task is complete. Now respond with ONLY your final result.",
-		"No explanation, no preamble — just the result in the following format, conforming to this schema:",
-		buildResultInstructions(schema)
-	].join("\n");
-}
-function buildSkillPrompt(skillInstructions, args, schema) {
-	const parts = [
-		HEADLESS_PREAMBLE,
-		"",
-		skillInstructions
-	];
-	if (args && Object.keys(args).length > 0) parts.push(`\nArguments:\n${JSON.stringify(args, null, 2)}`);
-	if (schema) {
-		parts.push("When complete, you MUST output your result between these exact delimiters conforming to this schema:");
-		parts.push(buildResultInstructions(schema));
-	}
-	return parts.join("\n");
-}
-function buildPromptText(text, schema) {
-	const parts = [
-		HEADLESS_PREAMBLE,
-		"",
-		text
-	];
-	if (schema) {
-		parts.push("When complete, you MUST output your result between these exact delimiters conforming to this schema:");
-		parts.push(buildResultInstructions(schema));
-	}
-	return parts.join("\n");
-}
-/** Extract the last ---RESULT_START---/---RESULT_END--- block from agent text and validate against schema. */
-function extractResult(text, schema) {
-	const resultBlock = extractLastResultBlock(text);
-	if (resultBlock === null) throw new ResultExtractionError("No ---RESULT_START--- / ---RESULT_END--- block found in the assistant response.", text);
-	let result = resultBlock;
-	if (schema.type === "object" || schema.type === "array") try {
-		result = JSON.parse(resultBlock);
-	} catch {
-		throw new ResultExtractionError("Result block contains invalid JSON for the expected schema.", resultBlock);
-	}
-	const parsed = v.safeParse(schema, result);
-	if (!parsed.success) throw new ResultExtractionError(`Result does not match the expected schema: ${parsed.issues.map((i) => i.message).join(", ")}`, resultBlock);
-	return parsed.output;
-}
-function extractLastResultBlock(text) {
-	const matches = text.matchAll(/---RESULT_START---\s*\n([\s\S]*?)---RESULT_END---/g);
-	let lastMatch = null;
-	for (const match of matches) lastMatch = match[1]?.trim() ?? null;
-	return lastMatch;
-}
-var ResultExtractionError = class extends Error {
-	constructor(message, rawOutput) {
-		super(message);
-		this.rawOutput = rawOutput;
-		this.name = "ResultExtractionError";
-	}
-};
-//#endregion
-//#region src/env-utils.ts
-async function createScopedEnv(env, commands) {
-	if (env.scope) return env.scope({ commands });
-	if (commands.length > 0) throw new Error("[flue] Cannot use commands: this environment does not support scoped command execution. Commands are only available in BashFactory sandbox mode. Remote sandboxes handle command execution at the platform level.");
-	return env;
-}
-function mergeCommands(defaults, perCall) {
-	if (!perCall || perCall.length === 0) return defaults;
-	if (defaults.length === 0) return perCall;
-	const byName = /* @__PURE__ */ new Map();
-	for (const cmd of defaults) byName.set(cmd.name, cmd);
-	for (const cmd of perCall) byName.set(cmd.name, cmd);
-	return Array.from(byName.values());
-}
 //#endregion
 //#region src/roles.ts
 function assertRoleExists(roles, roleName) {
@@ -491,7 +480,7 @@ function assertRoleExists(roles, roleName) {
 	if (roles[roleName]) return;
 	const available = Object.keys(roles);
 	const list = available.length > 0 ? available.join(", ") : "(none defined)";
-	throw new Error(`[flue] Role "${roleName}" not registered. Available roles: ${list}. Define roles as markdown files in \`roles/\` (or \`.flue/roles/\`).`);
+	throw new Error(`[flue] Role "${roleName}" not registered. Available roles: ${list}. Define roles as markdown files in \`roles/\` (or \`.flue/roles/\` if your root uses the .flue/ source layout).`);
 }
 function resolveEffectiveRole(options) {
 	const role = options.callRole ?? options.sessionRole ?? options.agentRole;
@@ -502,9 +491,39 @@ function resolveRoleModel(roles, roleName) {
 	assertRoleExists(roles, roleName);
 	return roleName ? roles[roleName]?.model : void 0;
 }
+function resolveRoleThinkingLevel(roles, roleName) {
+	assertRoleExists(roles, roleName);
+	return roleName ? roles[roleName]?.thinkingLevel : void 0;
+}
 //#endregion
-//#region src/session-history.ts
+//#region src/session.ts
+const MAX_TASK_DEPTH = 4;
+/**
+* Read the per-call schema option, accepting both the canonical `schema`
+* field and the deprecated `result` alias. The deprecated alias is typed
+* as `never` on the public option interfaces so TypeScript flags new
+* usage; we still honor it at runtime during the deprecation window so
+* existing callers keep working without code changes.
+*/
+function resolveSchemaOption(options) {
+	if (!options) return void 0;
+	if (options.schema !== void 0) return options.schema;
+	return options.result;
+}
+/** In-memory session store. Sessions persist for the lifetime of the process. */
+var InMemorySessionStore = class {
+	store = /* @__PURE__ */ new Map();
+	async save(id, data) {
+		this.store.set(id, data);
+	}
+	async load(id) {
+		return this.store.get(id) ?? null;
+	}
+	async delete(id) {
+		this.store.delete(id);
+	}
+};
 var SessionHistory = class SessionHistory {
 	entries;
 	byId;
@@ -533,6 +552,26 @@ var SessionHistory = class SessionHistory {
 		}
 		return path.reverse();
 	}
+	/**
+	* Active-path entries appended after `afterLeafId` (exclusive), in order.
+	*
+	* - `afterLeafId === null` means "from the start of the path" → returns
+	*   the entire active path.
+	* - When the id is found, returns entries strictly after it.
+	* - When the id is *not* on the current active path (e.g. a branch
+	*   switch happened mid-window), returns `[]`. Callers use this for
+	*   bounded windowing — falling back to the full path would silently
+	*   include unrelated history. An empty result is the safer answer
+	*   for usage aggregation: zero is loud (sums won't match expectations)
+	*   while full-history is silent overcounting.
+	*/
+	getActivePathSince(afterLeafId) {
+		const path = this.getActivePath();
+		if (afterLeafId === null) return path;
+		const startIndex = path.findIndex((entry) => entry.id === afterLeafId);
+		if (startIndex === -1) return [];
+		return path.slice(startIndex + 1);
+	}
 	buildContextEntries() {
 		const path = this.getActivePath();
 		const latestCompactionIndex = findLatestCompactionIndex(path);
@@ -583,7 +622,8 @@ var SessionHistory = class SessionHistory {
 			summary: input.summary,
 			firstKeptEntryId: input.firstKeptEntryId,
 			tokensBefore: input.tokensBefore,
-			details: input.details
+			details: input.details,
+			usage: input.usage
 		};
 		this.appendEntry(entry);
 		return entry.id;
@@ -662,27 +702,9 @@ function generateEntryId(byId) {
 	}
 	return crypto.randomUUID();
 }
-//#endregion
-//#region src/session.ts
-/** Internal session implementation. Not exported publicly — wrapped by FlueSession. */
-const MAX_SHELL_HISTORY_CHARS = 50 * 1024;
-const MAX_TASK_DEPTH = 4;
-/** In-memory session store. Sessions persist for the lifetime of the process. */
-var InMemorySessionStore = class {
-	store = /* @__PURE__ */ new Map();
-	async save(id, data) {
-		this.store.set(id, data);
-	}
-	async load(id) {
-		return this.store.get(id) ?? null;
-	}
-	async delete(id) {
-		this.store.delete(id);
-	}
-};
 var Session = class {
 	id;
+	fs;
 	metadata;
 	get role() {
 		return this.sessionRole;
@@ -698,7 +720,6 @@ var Session = class {
 	overflowRecoveryAttempted = false;
 	compactionAbortController;
 	eventCallback;
-	agentCommands;
 	agentTools;
 	deleted = false;
 	activeOperation;
@@ -712,8 +733,8 @@ var Session = class {
 		this.storageKey = options.storageKey;
 		this.config = options.config;
 		this.env = options.env;
+		this.fs = createFlueFs(options.env);
 		this.store = options.store;
-		this.agentCommands = options.agentCommands ?? [];
 		this.agentTools = options.agentTools ?? [];
 		this.sessionRole = options.sessionRole;
 		this.taskDepth = options.taskDepth ?? 0;
@@ -731,16 +752,18 @@ var Session = class {
 		const systemPrompt = this.config.systemPrompt;
 		assertRoleExists(this.config.roles, this.config.role);
 		assertRoleExists(this.config.roles, this.sessionRole);
-		const tools = [...this.createBuiltinTools(this.env, this.agentCommands, []), ...this.createCustomTools(this.agentTools)];
+		const tools = [...this.createBuiltinTools(this.env, []), ...this.createCustomTools(this.agentTools)];
 		const previousMessages = this.history.buildContext();
 		this.harness = new Agent({
 			initialState: {
 				systemPrompt,
 				model: this.config.model,
 				tools,
-				messages: previousMessages
+				messages: previousMessages,
+				thinkingLevel: this.config.thinkingLevel ?? "medium"
 			},
 			getApiKey: (provider) => this.getProviderApiKey(provider),
+			onPayload: (payload, model) => this.applyProviderPayloadOverrides(payload, model),
 			toolExecution: "parallel"
 		});
 		this.eventCallback = options.onAgentEvent;
@@ -755,6 +778,15 @@ var Session = class {
 						type: "text_delta",
 						text: aEvent.delta
 					});
+					else if (aEvent.type === "thinking_start") this.emit({ type: "thinking_start" });
+					else if (aEvent.type === "thinking_delta") this.emit({
+						type: "thinking_delta",
+						delta: aEvent.delta
+					});
+					else if (aEvent.type === "thinking_end") this.emit({
+						type: "thinking_end",
+						content: aEvent.content
+					});
 					break;
 				}
 				case "tool_execution_start":
@@ -781,86 +813,115 @@ var Session = class {
 			}
 		});
 	}
-	async prompt(text, options) {
-		return this.runOperation("prompt", async () => {
-			const role = this.resolveEffectiveRole(options?.role);
-			const schema = options?.result;
-			const fullPrompt = buildPromptText(text, schema);
-			const effectiveCommands = mergeCommands(this.agentCommands, options?.commands);
-			return this.withScopedRuntime({
-				commands: effectiveCommands,
-				tools: options?.tools ?? [],
-				role,
+	prompt(text, options) {
+		return createCallHandle(options?.signal, (signal) => this.runOperation("prompt", signal, async () => {
+			const schema = resolveSchemaOption(options);
+			return this.runPromptCall({
+				promptText: buildPromptText(text, schema),
+				schema,
+				tools: options?.tools,
+				role: options?.role,
 				model: options?.model,
-				callSite: "this prompt() call"
-			}, async () => {
-				const beforeLength = this.harness.state.messages.length;
-				await this.harness.prompt(fullPrompt);
-				await this.harness.waitForIdle();
-				await this.syncHarnessMessagesSince(beforeLength, "prompt");
-				await this.checkLatestAssistantForCompaction();
-				this.throwIfError("prompt");
-				if (schema) return this.extractResultWithRetry(schema);
-				return { text: this.getAssistantText() };
+				thinkingLevel: options?.thinkingLevel,
+				images: options?.images,
+				source: "prompt",
+				errorLabel: "prompt",
+				callSite: "this prompt() call",
+				signal
 			});
-		});
+		}));
 	}
-	async skill(name, options) {
-		return this.runOperation("skill", async () => {
-			const role = this.resolveEffectiveRole(options?.role);
-			let registeredSkill = this.config.skills[name];
-			if (!registeredSkill && (name.includes("/") || /\.(md|markdown)$/i.test(name))) {
-				const loaded = await loadSkillByPath(this.env, this.env.cwd, name);
-				if (loaded) registeredSkill = loaded;
-			}
-			if (!registeredSkill) {
-				const available = Object.keys(this.config.skills).join(", ") || "(none)";
-				const cwd = this.env.cwd;
-				throw new Error(`Skill "${name}" not registered. Available: ${available}.\n\nSkills are loaded at init() time from ${cwd}/.agents/skills/<name>/SKILL.md inside the session's sandbox. If you expected "${name}" to be there, make sure the file exists in your sandbox at that path before calling init() — the default empty sandbox starts with no files, so it has no skills unless you put them there.\n\nSkills can also be referenced by relative path under .agents/skills/ (e.g. "triage/reproduce.md").`);
+	skill(name, options) {
+		return createCallHandle(options?.signal, (signal) => this.runOperation("skill", signal, async () => {
+			const looksLikePath = name.includes("/") || /\.(md|markdown)$/i.test(name);
+			const schema = resolveSchemaOption(options);
+			let promptText;
+			if (looksLikePath) {
+				const resolvedPath = await resolveSkillFilePath(this.env, this.env.cwd, name);
+				if (!resolvedPath) throw new Error(`[flue] Skill file "${name}" not found at ${skillsDirIn(this.env.cwd)}/${name} inside the session's sandbox. Make sure the file exists at that path.`);
+				promptText = buildSkillByPathPrompt(name, resolvedPath, options?.args, schema);
+			} else {
+				if (!this.config.skills[name]) {
+					const available = Object.keys(this.config.skills).join(", ") || "(none)";
+					throw new Error(`[flue] Skill "${name}" not registered. Available: ${available}.\n\nSkills are discovered at init() time from ${skillsDirIn(this.env.cwd)}/<name>/SKILL.md inside the session's sandbox. If you expected "${name}" to be there, make sure the SKILL.md file exists at that path before calling init() — the default empty sandbox starts with no files, so it has no skills unless you put them there.\n\nSkills can also be referenced by relative path under .agents/skills/ (e.g. "triage/reproduce.md").`);
+				}
+				promptText = buildSkillByNamePrompt(name, options?.args, schema);
 			}
-			const schema = options?.result;
-			const skillPrompt = buildSkillPrompt(registeredSkill.instructions, options?.args, schema);
-			const effectiveCommands = mergeCommands(this.agentCommands, options?.commands);
-			return this.withScopedRuntime({
-				commands: effectiveCommands,
-				tools: options?.tools ?? [],
-				role,
+			return this.runPromptCall({
+				promptText,
+				schema,
+				tools: options?.tools,
+				role: options?.role,
 				model: options?.model,
-				callSite: `this skill("${name}") call`
-			}, async () => {
-				const beforeLength = this.harness.state.messages.length;
-				await this.harness.prompt(skillPrompt);
-				await this.harness.waitForIdle();
-				await this.syncHarnessMessagesSince(beforeLength, "skill");
-				await this.checkLatestAssistantForCompaction();
-				this.throwIfError(`skill("${name}")`);
-				if (schema) return this.extractResultWithRetry(schema);
-				return { text: this.getAssistantText() };
+				thinkingLevel: options?.thinkingLevel,
+				images: options?.images,
+				source: "skill",
+				errorLabel: `skill("${name}")`,
+				callSite: `this skill("${name}") call`,
+				signal
 			});
-		});
+		}));
 	}
-	async task(text, options) {
-		return (await this.runTask(text, options, void 0)).output;
+	task(text, options) {
+		return createCallHandle(options?.signal, async (signal) => {
+			return (await this.runTask(text, options, signal)).output;
+		});
 	}
-	async shell(command, options) {
-		return this.runOperation("shell", async () => {
-			const effectiveCommands = mergeCommands(this.agentCommands, options?.commands);
-			const result = await (await createScopedEnv(this.env, effectiveCommands)).exec(command, {
-				env: options?.env,
-				cwd: options?.cwd,
-				timeout: options?.timeout
+	shell(command, options) {
+		return createCallHandle(options?.signal, (signal) => this.runOperation("shell", signal, async () => {
+			const toolCallId = crypto.randomUUID();
+			const args = { command };
+			if (options?.cwd !== void 0) args.cwd = options.cwd;
+			if (options?.env !== void 0) args.env = redactEnvValues(options.env);
+			this.emit({
+				type: "tool_start",
+				toolName: "bash",
+				toolCallId,
+				args
 			});
-			const shellResult = {
-				stdout: result.stdout,
-				stderr: result.stderr,
-				exitCode: result.exitCode
-			};
-			const message = this.createShellMessage(command, shellResult, options);
-			this.history.appendMessage(message, "shell");
-			this.harness.state.messages = this.history.buildContext();
-			await this.save();
-			return shellResult;
-		});
+			try {
+				const result = await this.env.exec(command, {
+					env: options?.env,
+					cwd: options?.cwd,
+					signal
+				});
+				const shellResult = {
+					stdout: result.stdout,
+					stderr: result.stderr,
+					exitCode: result.exitCode
+				};
+				const toolResult = formatBashResult(shellResult, command);
+				await this.appendShellTriple(toolCallId, args, toolResult, false);
+				this.emit({
+					type: "tool_end",
+					toolName: "bash",
+					toolCallId,
+					isError: false,
+					result: toolResult
+				});
+				return shellResult;
+			} catch (error) {
+				const errResult = {
+					content: [{
+						type: "text",
+						text: getErrorMessage(error)
+					}],
+					details: {
+						command,
+						exitCode: -1
+					}
+				};
+				await this.appendShellTriple(toolCallId, args, errResult, true);
+				this.emit({
+					type: "tool_end",
+					toolName: "bash",
+					toolCallId,
+					isError: true,
+					result: errResult
+				});
+				throw error;
+			}
+		}));
 	}
 	abort() {
 		this.harness.abort();
@@ -892,10 +953,17 @@ var Session = class {
 	resolveModelForCall(promptModel, roleName, callSite) {
 		let model = this.config.model;
 		const roleModel = resolveRoleModel(this.config.roles, roleName);
-		if (roleModel) model = this.config.resolveModel(roleModel, this.config.providers);
-		if (promptModel) model = this.config.resolveModel(promptModel, this.config.providers);
+		if (roleModel) model = this.config.resolveModel(roleModel);
+		if (promptModel) model = this.config.resolveModel(promptModel);
 		return this.requireModel(model, callSite);
 	}
+	/** Precedence: call-level > role-level > agent-level default > 'medium'. */
+	resolveThinkingLevelForCall(callValue, roleName) {
+		if (callValue !== void 0) return callValue;
+		const roleLevel = resolveRoleThinkingLevel(this.config.roles, roleName);
+		if (roleLevel !== void 0) return roleLevel;
+		return this.config.thinkingLevel ?? "medium";
+	}
 	/**
 	* Throws a clear, actionable error when no model is configured for a call.
 	* Use with the resolved model (post-precedence) to guarantee we never hand
@@ -906,7 +974,21 @@ var Session = class {
 		throw new Error(`[flue] No model configured for ${callSite}. Pass \`{ model: "provider/model-id" }\` to this call or configure a role model.`);
 	}
 	getProviderApiKey(provider) {
-		return this.config.providers?.[provider]?.apiKey;
+		const override = getProviderConfiguration(provider)?.apiKey;
+		if (override !== void 0) return override;
+		return getRegisteredApiKey(provider);
+	}
+	/**
+	* Provider-specific payload overrides. Returning undefined keeps the
+	* upstream-built payload as-is.
+	*/
+	applyProviderPayloadOverrides(payload, model) {
+		if (model.api !== "openai-responses" && model.api !== "azure-openai-responses") return;
+		if (getProviderConfiguration(model.provider)?.storeResponses !== true) return;
+		return {
+			...payload,
+			store: true
+		};
 	}
 	buildSystemPrompt(roleName) {
 		const parts = [this.config.systemPrompt];
@@ -943,35 +1025,42 @@ var Session = class {
 			names.add(toolDef.name);
 		}
 	}
-	createBuiltinTools(env, commands, tools, role, model) {
+	createBuiltinTools(env, tools, role, model, thinkingLevel) {
 		return createTools(env, {
 			roles: this.config.roles,
-			task: (params, signal) => this.runTaskForTool(params, commands, tools, role, model, signal)
+			task: (params, signal) => this.runTaskForTool(params, tools, role, model, thinkingLevel, signal)
 		});
 	}
 	async withScopedRuntime(options, fn) {
 		const customTools = this.createCustomTools([...this.agentTools, ...options.tools]);
-		const scopedEnv = await createScopedEnv(this.env, options.commands);
 		const previousTools = this.harness.state.tools;
 		const previousModel = this.harness.state.model;
 		const previousSystemPrompt = this.harness.state.systemPrompt;
-		this.harness.state.model = this.resolveModelForCall(options.model, options.role, options.callSite);
+		const previousThinkingLevel = this.harness.state.thinkingLevel;
+		const resolvedModel = this.resolveModelForCall(options.model, options.role, options.callSite);
+		this.harness.state.model = resolvedModel;
 		this.harness.state.systemPrompt = this.buildSystemPrompt(options.role);
-		this.harness.state.tools = [...this.createBuiltinTools(scopedEnv, options.commands, options.tools, options.role, options.model), ...customTools];
+		this.harness.state.thinkingLevel = this.resolveThinkingLevelForCall(options.thinkingLevel, options.role);
+		this.harness.state.tools = [
+			...this.createBuiltinTools(this.env, options.tools, options.role, options.model, options.thinkingLevel),
+			...customTools,
+			...options.extraTools ?? []
+		];
 		try {
-			return await fn();
+			return await fn({ resolvedModel });
 		} finally {
 			this.harness.state.tools = previousTools;
 			this.harness.state.model = previousModel;
 			this.harness.state.systemPrompt = previousSystemPrompt;
+			this.harness.state.thinkingLevel = previousThinkingLevel;
 		}
 	}
-	async runTaskForTool(params, commands, tools, inheritedRole, inheritedModel, signal) {
+	async runTaskForTool(params, tools, inheritedRole, inheritedModel, inheritedThinkingLevel, signal) {
 		const result = await this.runTask(params.prompt, {
 			role: params.role ?? inheritedRole,
 			inheritedModel,
+			inheritedThinkingLevel,
 			cwd: params.cwd,
-			commands,
 			tools
 		}, signal);
 		return {
@@ -992,7 +1081,7 @@ var Session = class {
 		this.assertActive();
 		if (!this.createTaskSession) throw new Error("[flue] This session cannot create task sessions.");
 		if (this.taskDepth >= MAX_TASK_DEPTH) throw new Error(`[flue] Maximum task depth (${MAX_TASK_DEPTH}) exceeded.`);
-		if (signal?.aborted) throw new Error("Operation aborted");
+		if (signal?.aborted) throw abortErrorFor(signal);
 		const taskId = crypto.randomUUID();
 		const requestedRole = options?.role ?? this.sessionRole ?? this.config.role;
 		let child;
@@ -1007,14 +1096,12 @@ var Session = class {
 		});
 		try {
 			const role = this.resolveEffectiveRole(options?.role);
-			const commands = mergeCommands(this.agentCommands, options?.commands);
 			child = await this.createTaskSession({
 				parentSessionId: this.id,
 				taskId,
 				parentEnv: this.env,
 				cwd: options?.cwd,
 				role,
-				commands,
 				depth: this.taskDepth + 1
 			});
 			await this.recordTaskSession(child.id, child.storageKey, taskId);
@@ -1022,15 +1109,18 @@ var Session = class {
 			if (signal) {
 				abortListener = () => child?.abort();
 				signal.addEventListener("abort", abortListener, { once: true });
-				if (signal.aborted) throw new Error("Operation aborted");
 			}
-			const schema = options?.result;
+			const schema = resolveSchemaOption(options);
 			const roleModel = resolveRoleModel(this.config.roles, role);
+			const roleThinkingLevel = resolveRoleThinkingLevel(this.config.roles, role);
 			const childOptions = {
 				model: options?.model ?? (roleModel ? void 0 : options?.inheritedModel),
-				tools: options?.tools
+				thinkingLevel: options?.thinkingLevel ?? (roleThinkingLevel !== void 0 ? void 0 : options?.inheritedThinkingLevel),
+				tools: options?.tools,
+				images: options?.images,
+				signal
 			};
-			if (schema) childOptions.result = schema;
+			if (schema) childOptions.schema = schema;
 			const output = await child.prompt(text, childOptions);
 			const taskResult = {
 				output,
@@ -1057,10 +1147,6 @@ var Session = class {
 				result: getErrorMessage(error),
 				parentSessionId: this.id
 			});
-			this.emit({
-				type: "error",
-				error: getErrorMessage(error)
-			});
 			throw error;
 		} finally {
 			if (signal && abortListener) signal.removeEventListener("abort", abortListener);
@@ -1070,17 +1156,21 @@ var Session = class {
 			}
 		}
 	}
-	async runOperation(operation, fn) {
+	async runOperation(operation, signal, fn) {
 		return this.runExclusive(operation, async () => {
+			if (signal?.aborted) throw abortErrorFor(signal);
+			const onAbort = () => {
+				this.harness.abort();
+				this.compactionAbortController?.abort(signal?.reason);
+				for (const task of this.activeTasks) task.abort();
+			};
+			signal?.addEventListener("abort", onAbort, { once: true });
 			try {
 				return await fn();
 			} catch (error) {
-				this.emit({
-					type: "error",
-					error: getErrorMessage(error)
-				});
-				throw error;
+				throw signal?.aborted ? abortErrorFor(signal) : error;
 			} finally {
+				signal?.removeEventListener("abort", onAbort);
 				this.emit({ type: "idle" });
 			}
 		});
@@ -1104,15 +1194,70 @@ var Session = class {
 	assertActive() {
 		if (this.deleted) throw new Error(`[flue] Session "${this.id}" has been deleted.`);
 	}
-	createShellMessage(command, result, options) {
-		return {
+	/**
+	* Append the three-message conversational triple that represents a
+	* `session.shell()` call in the message history:
+	*
+	*   1. user        — out-of-band request to run the command
+	*   2. assistant   — synthetic turn whose content is a single bash
+	*                    tool_use block (matching the shape pi-ai's
+	*                    providers produce when the LLM itself calls bash)
+	*   3. toolResult  — the bash output, keyed to the same toolCallId
+	*
+	* This makes a session.shell() call indistinguishable from an
+	* LLM-issued bash tool call when later turns read the transcript.
+	*/
+	async appendShellTriple(toolCallId, args, toolResult, isError) {
+		const timestamp = Date.now();
+		const userMessage = {
 			role: "user",
+			content: `Run this shell command:\n\n\`\`\`bash\n${args.command}\n\`\`\``,
+			timestamp
+		};
+		const assistantMessage = {
+			role: "assistant",
 			content: [{
-				type: "text",
-				text: formatShellHistory(command, result, options?.cwd ? `\ncwd: ${options.cwd}` : "", options?.env ? `\nenv: ${Object.keys(options.env).sort().join(", ")}` : "")
+				type: "toolCall",
+				id: toolCallId,
+				name: "bash",
+				arguments: args
 			}],
-			timestamp: Date.now()
+			api: "flue-shell",
+			provider: "flue",
+			model: "",
+			usage: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+				totalTokens: 0,
+				cost: {
+					input: 0,
+					output: 0,
+					cacheRead: 0,
+					cacheWrite: 0,
+					total: 0
+				}
+			},
+			stopReason: "toolUse",
+			timestamp
 		};
+		const toolResultMessage = {
+			role: "toolResult",
+			toolCallId,
+			toolName: "bash",
+			content: toolResult.content,
+			details: toolResult.details,
+			isError,
+			timestamp
+		};
+		this.history.appendMessages([
+			userMessage,
+			assistantMessage,
+			toolResultMessage
+		], "shell");
+		this.harness.state.messages = this.history.buildContext();
+		await this.save();
 	}
 	async syncHarnessMessagesSince(index, source) {
 		const messages = this.harness.state.messages.slice(index);
@@ -1172,6 +1317,13 @@ var Session = class {
 			await this.runCompaction("threshold", false);
 		}
 	}
+	/**
+	* Runs a compaction pass. The summarization cost (1–2 internal LLM
+	* calls) is persisted on the resulting `CompactionEntry.usage`, which
+	* `aggregateUsageSince` later folds into the surrounding call's
+	* `response.usage` — so users see the true cost of the call that
+	* triggered compaction.
+	*/
 	async runCompaction(reason, willRetry) {
 		this.compactionAbortController = new AbortController();
 		const messagesBefore = this.harness.state.messages.length;
@@ -1207,7 +1359,8 @@ var Session = class {
 				summary: result.summary,
 				firstKeptEntryId: firstKeptEntry.id,
 				tokensBefore: result.tokensBefore,
-				details: result.details
+				details: result.details,
+				usage: result.usage
 			});
 			this.harness.state.messages = this.history.buildContext();
 			const messagesAfter = this.harness.state.messages.length;
@@ -1239,6 +1392,27 @@ var Session = class {
 		const errorMsg = this.harness.state.errorMessage;
 		if (errorMsg) throw new Error(`[flue] ${context} failed: ${errorMsg}`);
 	}
+	/**
+	* Sum the usage of every entry the call appended to the active path
+	* after `beforeLeafId`: assistant messages contribute their per-turn
+	* `usage` (provider-reported, normalized through `fromProviderUsage`),
+	* and compaction entries contribute the aggregated cost of the
+	* summarization call(s) they dispatched. Returns zeros when nothing
+	* was appended (defensive — `throwIfError` normally fires first).
+	*
+	* Walks the durable, parent-linked active path rather than the volatile
+	* flat `harness.state.messages` array, so the result is robust to
+	* mid-call mutations (e.g. overflow recovery removing a failed
+	* assistant turn before retry).
+	*/
+	aggregateUsageSince(beforeLeafId) {
+		let totals = emptyUsage();
+		for (const entry of this.history.getActivePathSince(beforeLeafId)) if (entry.type === "message" && entry.message.role === "assistant") {
+			const usage = fromProviderUsage(entry.message.usage);
+			if (usage) totals = addUsage(totals, usage);
+		} else if (entry.type === "compaction" && entry.usage) totals = addUsage(totals, entry.usage);
+		return totals;
+	}
 	getAssistantText() {
 		const messages = this.harness.state.messages;
 		for (let i = messages.length - 1; i >= 0; i--) {
@@ -1259,21 +1433,80 @@ var Session = class {
 			if (entry.type === "message" && entry.message.role === "assistant") return entry.id;
 		}
 	}
-	async extractResultWithRetry(schema) {
-		const text = this.getAssistantText();
-		try {
-			return extractResult(text, schema);
-		} catch (err) {
-			if (!(err instanceof ResultExtractionError)) throw err;
-			if (!err.message.includes("RESULT_START")) throw err;
-			const followUpPrompt = buildResultExtractionPrompt(schema);
-			const beforeRetry = this.harness.state.messages.length;
-			await this.harness.prompt(followUpPrompt);
+	/**
+	* Shared body of `prompt()` and `skill()`: scope the runtime, optionally
+	* inject the result-tool pair, drive the harness, and aggregate usage.
+	*
+	* Returns `PromptResultResponse<T>` when `schema` is set, else `PromptResponse`.
+	*/
+	async runPromptCall(args) {
+		const role = this.resolveEffectiveRole(args.role);
+		const resultBundle = args.schema ? createResultTools(args.schema) : void 0;
+		return this.withScopedRuntime({
+			tools: args.tools ?? [],
+			role,
+			model: args.model,
+			thinkingLevel: args.thinkingLevel,
+			callSite: args.callSite,
+			extraTools: resultBundle?.tools
+		}, async ({ resolvedModel }) => {
+			const beforeLength = this.harness.state.messages.length;
+			const beforeLeafId = this.history.getLeafId();
+			const model = { id: resolvedModel.id };
+			if (resultBundle) {
+				const result = await this.runWithResultTools(args.promptText, resultBundle, beforeLength, args.source, args.errorLabel, args.signal, args.images);
+				return {
+					data: result,
+					result,
+					usage: this.aggregateUsageSince(beforeLeafId),
+					model
+				};
+			}
+			await this.harness.prompt(args.promptText, args.images);
 			await this.harness.waitForIdle();
-			await this.syncHarnessMessagesSince(beforeRetry, "retry");
+			await this.syncHarnessMessagesSince(beforeLength, args.source);
 			await this.checkLatestAssistantForCompaction();
-			return extractResult(this.getAssistantText(), schema);
+			this.throwIfError(args.errorLabel);
+			return {
+				text: this.getAssistantText(),
+				usage: this.aggregateUsageSince(beforeLeafId),
+				model
+			};
+		});
+	}
+	/**
+	* Drive the harness through one or more turns until the LLM either calls
+	* the `finish` tool (success) or the `give_up` tool (typed error).
+	*
+	* If a turn ends with neither tool called, we send a brief reminder and
+	* loop. There is no retry cap from the SDK's perspective: the model has a
+	* clear escape hatch via `give_up`, the user has cancellation via `signal`,
+	* and pi-agent-core has its own iteration limits as the final ceiling.
+	* `MAX_FOLLOWUPS` is a defense-in-depth ceiling against pathological loops.
+	*
+	* `beforeLength` is the harness-message-array length sampled by the caller
+	* *before* the very first prompt; we keep advancing it across iterations so
+	* `syncHarnessMessagesSince` only copies newly-produced messages each turn.
+	*/
+	async runWithResultTools(initialPrompt, bundle, beforeLength, source, errorLabel, signal, initialImages) {
+		let nextPrompt = initialPrompt;
+		let cursor = beforeLength;
+		const MAX_FOLLOWUPS = 32;
+		for (let attempt = 0; attempt <= MAX_FOLLOWUPS; attempt++) {
+			if (signal.aborted) throw abortErrorFor(signal);
+			await this.harness.prompt(nextPrompt, attempt === 0 ? initialImages : void 0);
+			await this.harness.waitForIdle();
+			await this.syncHarnessMessagesSince(cursor, source);
+			cursor = this.harness.state.messages.length;
+			await this.checkLatestAssistantForCompaction();
+			this.throwIfError(errorLabel);
+			const outcome = bundle.getOutcome();
+			if (outcome.type === "finished") return outcome.value;
+			if (outcome.type === "gave_up") throw new ResultUnavailableError(outcome.reason, this.getAssistantText());
+			nextPrompt = buildResultFollowUpPrompt();
+			source = "retry";
 		}
+		throw new ResultUnavailableError(`Agent did not call \`finish\` or \`give_up\` after ${MAX_FOLLOWUPS + 1} attempts.`, this.getAssistantText());
 	}
 };
 function normalizePath(p) {
@@ -1294,20 +1527,12 @@ async function deleteSessionTree(store, storageKey, seen = /* @__PURE__ */ new S
 	for (const task of taskSessions) if (typeof task?.storageKey === "string") await deleteSessionTree(store, task.storageKey, seen);
 	await store.delete(storageKey);
 }
-function formatShellHistory(command, result, cwdLine, envLine) {
-	const sections = [`<shell_command>\n$ ${command}${cwdLine}${envLine}\n</shell_command>`, `<shell_result exitCode="${result.exitCode}">`];
-	if (result.stdout) sections.push(`<stdout>\n${result.stdout}\n</stdout>`);
-	if (result.stderr) sections.push(`<stderr>\n${result.stderr}\n</stderr>`);
-	sections.push("</shell_result>");
-	return truncateShellHistory(sections.join("\n"));
-}
-function truncateShellHistory(text) {
-	if (text.length <= MAX_SHELL_HISTORY_CHARS) return text;
-	return `[Shell output truncated: ${text.length - MAX_SHELL_HISTORY_CHARS} leading characters omitted]\n` + text.slice(text.length - MAX_SHELL_HISTORY_CHARS);
-}
 function getErrorMessage(error) {
 	return error instanceof Error ? error.message : String(error);
 }
+function redactEnvValues(env) {
+	return Object.fromEntries(Object.keys(env).map((key) => [key, "<redacted>"]));
+}
 //#endregion
-export { assertRoleExists as a, normalizePath as i, Session as n, createScopedEnv as o, deleteSessionTree as r, mergeCommands as s, InMemorySessionStore as t };
+export { assertRoleExists as a, normalizePath as i, Session as n, deleteSessionTree as r, InMemorySessionStore as t };