npm - @roj-ai/sdk - Versions diffs - 0.1.14 → 0.1.15 - Mend

@roj-ai/sdk 0.1.14 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (101) hide show

package/dist/bootstrap.d.ts +1 -0
package/dist/bootstrap.d.ts.map +1 -1
package/dist/core/agents/agent.d.ts +25 -1
package/dist/core/agents/agent.d.ts.map +1 -1
package/dist/core/agents/agent.js +117 -21
package/dist/core/agents/agent.js.map +1 -1
package/dist/core/agents/config.d.ts +7 -0
package/dist/core/agents/config.d.ts.map +1 -1
package/dist/core/agents/context.d.ts +10 -0
package/dist/core/agents/context.d.ts.map +1 -1
package/dist/core/agents/state.d.ts +11 -3
package/dist/core/agents/state.d.ts.map +1 -1
package/dist/core/agents/state.js.map +1 -1
package/dist/core/file-store/file-store.d.ts +5 -1
package/dist/core/file-store/file-store.d.ts.map +1 -1
package/dist/core/file-store/file-store.js +31 -21
package/dist/core/file-store/file-store.js.map +1 -1
package/dist/core/image/vips-resizer.test.js +26 -14
package/dist/core/image/vips-resizer.test.js.map +1 -1
package/dist/core/llm/anthropic.d.ts.map +1 -1
package/dist/core/llm/anthropic.js +11 -8
package/dist/core/llm/anthropic.js.map +1 -1
package/dist/core/llm/cache-breakpoints.d.ts +5 -1
package/dist/core/llm/cache-breakpoints.d.ts.map +1 -1
package/dist/core/llm/cache-breakpoints.js +10 -5
package/dist/core/llm/cache-breakpoints.js.map +1 -1
package/dist/core/sessions/session.d.ts.map +1 -1
package/dist/core/sessions/session.js +10 -0
package/dist/core/sessions/session.js.map +1 -1
package/dist/core/sessions/session.test.js +5 -0
package/dist/core/sessions/session.test.js.map +1 -1
package/dist/core/sessions/state.d.ts.map +1 -1
package/dist/core/sessions/state.js +5 -1
package/dist/core/sessions/state.js.map +1 -1
package/dist/core/tools/executor.test.js +1 -0
package/dist/core/tools/executor.test.js.map +1 -1
package/dist/plugins/agent-status/plugin.d.ts.map +1 -1
package/dist/plugins/agent-status/plugin.js +18 -26
package/dist/plugins/agent-status/plugin.js.map +1 -1
package/dist/plugins/context-compact/compaction-live.test.d.ts +17 -0
package/dist/plugins/context-compact/compaction-live.test.d.ts.map +1 -0
package/dist/plugins/context-compact/compaction-live.test.js +177 -0
package/dist/plugins/context-compact/compaction-live.test.js.map +1 -0
package/dist/plugins/context-compact/context-compact.integration.test.js +123 -3
package/dist/plugins/context-compact/context-compact.integration.test.js.map +1 -1
package/dist/plugins/context-compact/context-compactor.d.ts +47 -17
package/dist/plugins/context-compact/context-compactor.d.ts.map +1 -1
package/dist/plugins/context-compact/context-compactor.js +60 -36
package/dist/plugins/context-compact/context-compactor.js.map +1 -1
package/dist/plugins/context-compact/context-compactor.test.js +69 -103
package/dist/plugins/context-compact/context-compactor.test.js.map +1 -1
package/dist/plugins/context-compact/plugin.d.ts +9 -2
package/dist/plugins/context-compact/plugin.d.ts.map +1 -1
package/dist/plugins/context-compact/plugin.js +8 -4
package/dist/plugins/context-compact/plugin.js.map +1 -1
package/dist/plugins/filesystem/filesystem.integration.test.js +36 -0
package/dist/plugins/filesystem/filesystem.integration.test.js.map +1 -1
package/dist/plugins/filesystem/plugin.d.ts.map +1 -1
package/dist/plugins/filesystem/plugin.js +8 -6
package/dist/plugins/filesystem/plugin.js.map +1 -1
package/dist/plugins/mailbox/mailbox.integration.test.js +9 -16
package/dist/plugins/mailbox/mailbox.integration.test.js.map +1 -1
package/dist/plugins/resources/plugin.d.ts.map +1 -1
package/dist/plugins/resources/plugin.js +4 -1
package/dist/plugins/resources/plugin.js.map +1 -1
package/dist/plugins/user-chat/plugin.d.ts +2 -0
package/dist/plugins/user-chat/plugin.d.ts.map +1 -1
package/dist/plugins/user-chat/plugin.js +47 -3
package/dist/plugins/user-chat/plugin.js.map +1 -1
package/dist/plugins/user-chat/schema.d.ts +10 -0
package/dist/plugins/user-chat/schema.d.ts.map +1 -1
package/dist/plugins/user-chat/schema.js +1 -0
package/dist/plugins/user-chat/schema.js.map +1 -1
package/dist/plugins/user-chat/user-chat.integration.test.js +86 -0
package/dist/plugins/user-chat/user-chat.integration.test.js.map +1 -1
package/package.json +2 -2
package/src/core/agents/agent.ts +134 -20
package/src/core/agents/config.ts +7 -0
package/src/core/agents/context.ts +11 -0
package/src/core/agents/state.ts +11 -4
package/src/core/file-store/file-store.ts +38 -18
package/src/core/image/vips-resizer.test.ts +26 -15
package/src/core/llm/anthropic.ts +19 -12
package/src/core/llm/cache-breakpoints.ts +15 -6
package/src/core/sessions/session.test.ts +6 -0
package/src/core/sessions/session.ts +12 -0
package/src/core/sessions/state.ts +5 -1
package/src/core/tools/executor.test.ts +1 -0
package/src/plugins/agent-status/plugin.ts +18 -25
package/src/plugins/context-compact/compaction-live.test.ts +221 -0
package/src/plugins/context-compact/context-compact.integration.test.ts +135 -3
package/src/plugins/context-compact/context-compactor.test.ts +71 -110
package/src/plugins/context-compact/context-compactor.ts +88 -43
package/src/plugins/context-compact/plugin.ts +19 -10
package/src/plugins/filesystem/filesystem.integration.test.ts +44 -0
package/src/plugins/filesystem/plugin.ts +8 -6
package/src/plugins/mailbox/mailbox.integration.test.ts +12 -18
package/src/plugins/resources/plugin.ts +4 -1
package/src/plugins/user-chat/plugin.ts +60 -3
package/src/plugins/user-chat/schema.ts +10 -1
package/src/plugins/user-chat/user-chat.integration.test.ts +99 -0

package/src/core/agents/agent.ts CHANGED Viewed

@@ -46,7 +46,7 @@ import { toolEvents } from '~/core/tools/state.js'
 import { getAgentUnconsumedMailbox, selectMailboxState } from '~/plugins/mailbox/query.js'
 import { AGENT_BASE_BRIEFING } from '~/prompts/base.js'
 import { buildEnvironmentSection } from '~/prompts/builder.js'
-import { Err, type Result } from '~/lib/utils/result.js'
+import { Err, Ok, type Result } from '~/lib/utils/result.js'
 import type { Logger } from '../../lib/logger/logger.js'
 import type { SessionContext } from '../sessions/context.js'
 import type { SessionStore } from '../sessions/session-store.js'
@@ -54,7 +54,7 @@ import type { SessionState } from '../sessions/state.js'
 import type { SessionEnvironment, ToolExecutor } from '../tools/index.js'
 import type { AgentContext } from './context.js'
 import { sanitizeLLMResponse } from './response-sanitizer.js'
-import { withLLMRetry } from './retry.js'
+import { isRetryableLLMError, withLLMRetry } from './retry.js'
 // ============================================================================
 // Types
@@ -81,6 +81,14 @@ export interface AgentConfig<TInput = unknown> {
 	input?: z.ZodType<TInput>
 	/** Per-plugin agent-level configs */
 	plugins?: AgentPluginConfig[]
+	/**
+	 * Prompt cache TTL for breakpoints emitted by this agent's inference calls.
+	 * '1h' opts into Anthropic's extended cache tier (write 2× input, read 0.1×)
+	 * — useful for long-lived agents (e.g. an orchestrator that waits minutes
+	 * between user turns) where the default 5-minute TTL would expire and force
+	 * full re-uploads. Omit (or '5m') for the standard tier.
+	 */
+	cacheTtl?: '5m' | '1h'
 }
 /**
@@ -378,6 +386,58 @@ export class Agent {
 		return this.scheduled
 	}
+	/**
+	 * Run a one-off LLM call using the agent's current system prompt, tools, and
+	 * conversation prefix, with extra trailing messages appended. Does not emit
+	 * agent inference events and does not mutate conversation history; the call
+	 * is logged via the LLM provider's normal logging pipeline.
+	 *
+	 * Intended for plugins that need a constrained side-channel inference
+	 * leveraging the agent's already-warm prompt cache — e.g. context-compact
+	 * uses this to ask the same model for a summary, paying only the trailing
+	 * uncached portion plus output tokens.
+	 *
+	 * The cache breakpoint is placed so that everything up to (but excluding)
+	 * `extraMessages` is cacheable, matching the previous regular inference.
+	 */
+	async runAuxiliaryInference(extraMessages: LLMMessage[]): Promise<Result<InferenceResponse, LLMError>> {
+		const agentState = this.state
+		if (!agentState) {
+			return Err({ type: 'invalid_request', message: `Agent ${this.id} has no state` })
+		}
+		// pendingToolResults aren't in conversationHistory yet (they get committed
+		// by the next inference_completed), but the assistant tool_use that
+		// demands them IS at the tail of history. Without these inlined, an
+		// aux call placed mid-tool-turn (e.g. by the context-compact plugin's
+		// beforeInference hook) lands as `[…, assistant(tool_use), extraMessages]`
+		// and Anthropic rejects with "tool_use blocks must be followed by
+		// tool_result blocks".
+		const pendingToolResultMessages = this.buildPendingMessages(agentState)
+		const baseMessages: LLMMessage[] = [
+			...agentState.preamble,
+			...agentState.conversationHistory,
+			...pendingToolResultMessages,
+		]
+		const messages = [...baseMessages, ...extraMessages]
+		const cachedMessages = applyCacheBreakpoint(messages, extraMessages.length, this.config.cacheTtl)
+		const request: InferenceRequest = {
+			model: this.config.model,
+			systemPrompt: this.buildSystemPrompt(),
+			messages: cachedMessages,
+			tools: this.tools.size > 0 ? [...this.tools.values()] : undefined,
+		}
+		return this.llmProvider.inference(request, {
+			sessionId: this.store.sessionId,
+			agentId: this.id,
+			signal: this.abortController.signal,
+			fileStore: this.fileStore,
+			providers: this.llmProviders,
+		})
+	}
 	// ============================================================================
 	// Private methods - Processing
 	// ============================================================================
@@ -485,7 +545,11 @@ export class Agent {
 		// Mark cache breakpoint — ephemeral session-context suffix is excluded
 		// so it doesn't invalidate the cache on every inference.
-		const cachedMessages = applyCacheBreakpoint(messages, ephemeralParts.length > 0 ? 1 : 0)
+		const cachedMessages = applyCacheBreakpoint(
+			messages,
+			ephemeralParts.length > 0 ? 1 : 0,
+			this.config.cacheTtl,
+		)
 		// 5. LLM inference (with retry)
 		const request: InferenceRequest = {
@@ -513,6 +577,7 @@ export class Agent {
 		// can notify the parent.
 		let llmResponse: Result<InferenceResponse, LLMError>
 		let emptyAttempts = 0
+		let nudgeInjected = false
 		while (true) {
 			llmResponse = await withLLMRetry(
 				() =>
@@ -538,13 +603,20 @@ export class Agent {
 			if (!isEmptyStop) break
 			if (emptyAttempts >= Agent.MAX_EMPTY_RESPONSE_RETRIES) {
-				this.logger.error('LLM returned empty stop response after retries', undefined, {
+				this.logger.warn('LLM returned empty stop response after retries, coalescing to WAITING', {
 					agentId: this.id,
 					attempts: emptyAttempts + 1,
 				})
-				llmResponse = Err({
-					type: 'server_error',
-					message: `LLM returned empty response (no content, no tool calls) after ${emptyAttempts + 1} attempts`,
+				// Coalesce to WAITING instead of hard-erroring. The LLM accepted the
+				// message but couldn't produce output 3× in a row — treating this as
+				// a terminal failure was too aggressive (errored state, error message
+				// to parent, mailbox tokens stuck unconsumed). Synthetic WAITING goes
+				// through the normal success path: mailbox tokens get consumed,
+				// limits-guard skips it in dedup (existing WAITING exception), and
+				// the agent quietly transitions to pending → complete.
+				llmResponse = Ok({
+					...llmResponse.value,
+					content: 'WAITING',
 				})
 				break
 			}
@@ -553,24 +625,51 @@ export class Agent {
 				agentId: this.id,
 				attempt: emptyAttempts,
 			})
-		}
-		// Mark plugin messages as consumed (regardless of inference outcome —
-		// messages are already appended to conversationHistory via inference_started)
-		{
-			const currentAgentState = this.state
-			if (currentAgentState) {
-				const ctx = this.buildAgentContext(currentAgentState)
-				for (const dequeued of pluginDequeued) {
-					if (!dequeued.plugin.dequeue) continue
-					const pluginCtx = this.buildPluginHookContext(dequeued.plugin, ctx)
-					await dequeued.plugin.dequeue.markConsumed(pluginCtx, dequeued.token)
-				}
+			// Inject a one-shot nudge after the first empty response. Appended after the
+			// existing cache breakpoint, so the cached prefix still hits — only the new
+			// tail is uncached. Uses the canonical "WAITING" literal so the response is
+			// recognized by the sanitizer and limits-guard plugin.
+			if (!nudgeInjected) {
+				request.messages.push({
+					role: 'user',
+					content:
+						'<system-nudge>Your previous response was empty (no text and no tool calls). '
+						+ 'Either produce a meaningful response — text or tool calls — or, if you '
+						+ 'have nothing to do, output only the word WAITING on its own line per the '
+						+ 'waiting protocol.</system-nudge>',
+				})
+				nudgeInjected = true
 			}
 		}
 		if (!llmResponse.ok) {
-			// 4a. Inference failed
+			// Aborted (shutdown / interruption): bail silently. Emitting inference_failed
+			// would leave the agent in 'errored' with unconsumed plugin tokens — decide()
+			// would then loop resume_from_error ↔ infer forever (each retry re-aborts).
+			if (llmResponse.error.type === 'aborted') return
+			// Non-retryable failures (invalid_request, context_length) will fail the same
+			// way on every retry. Mark plugin tokens consumed before emitting
+			// inference_failed so decide()'s resume_from_error path doesn't re-feed the
+			// same message into a doomed retry loop. Retryable errors (rate_limit,
+			// server_error, network_error, timeout) keep the preserve-for-retry semantics.
+			if (!isRetryableLLMError(llmResponse.error)) {
+				const errorAgentState = this.state
+				if (errorAgentState) {
+					const errorCtx = this.buildAgentContext(errorAgentState)
+					for (const dequeued of pluginDequeued) {
+						if (!dequeued.plugin.dequeue) continue
+						const pluginCtx = this.buildPluginHookContext(dequeued.plugin, errorCtx)
+						await dequeued.plugin.dequeue.markConsumed(pluginCtx, dequeued.token)
+					}
+				}
+			}
+			// 4a. Inference failed — emit inference_failed without marking plugin messages
+			// consumed. The reducer leaves pendingToolResults / mailbox tokens intact so the
+			// next inference rebuilds the same turn; marking consumed here would drop the
+			// mailbox tokens and the retry would be missing the user message.
 			await this.store.emit(withSessionId(
 				this.store.sessionId,
 				llmEvents.create('inference_failed', {
@@ -587,6 +686,20 @@ export class Agent {
 			return
 		}
+		// Mark plugin messages as consumed only after successful inference. They've been
+		// appended to conversationHistory via the inference_completed reducer below.
+		{
+			const currentAgentState = this.state
+			if (currentAgentState) {
+				const ctx = this.buildAgentContext(currentAgentState)
+				for (const dequeued of pluginDequeued) {
+					if (!dequeued.plugin.dequeue) continue
+					const pluginCtx = this.buildPluginHookContext(dequeued.plugin, ctx)
+					await dequeued.plugin.dequeue.markConsumed(pluginCtx, dequeued.token)
+				}
+			}
+		}
 		// 4c. Sanitize response to prevent hallucination
 		const sanitized = sanitizeLLMResponse(llmResponse.value.content)
@@ -866,6 +979,7 @@ export class Agent {
 			agentConfig: this.config,
 			input: agentState.typedInput,
 			parentId: agentState.parentId,
+			runAuxiliaryInference: (extraMessages) => this.runAuxiliaryInference(extraMessages),
 		}
 	}

package/src/core/agents/config.ts CHANGED Viewed

@@ -52,6 +52,13 @@ export interface BaseAgentConfig<TInput = unknown> {
 	services?: ServiceConfig[]
 	/** LLM middleware chain applied per-agent (runs after preset-level middleware) */
 	llmMiddleware?: LLMMiddleware[]
+	/**
+	 * Prompt cache TTL for this agent's inference breakpoints.
+	 * '1h' opts into Anthropic's extended cache tier for long-lived agents
+	 * (e.g. an orchestrator that waits minutes between user turns). Defaults
+	 * to the standard 5-minute tier.
+	 */
+	cacheTtl?: '5m' | '1h'
 }
 /**

package/src/core/agents/context.ts CHANGED Viewed

@@ -1,3 +1,5 @@
+import type { InferenceResponse, LLMError, LLMMessage } from '~/core/llm/provider.js'
+import type { Result } from '~/lib/utils/result.js'
 import { SessionContext } from '../sessions/context.js'
 import { AgentConfig } from './agent.js'
 import { AgentId } from './schema.js'
@@ -16,4 +18,13 @@ export type AgentContext<TInput = unknown> =
 		input: TInput
 		/** The parent agent ID (null for root agents) */
 		parentId: AgentId | null
+		/**
+		 * Run a one-off LLM call reusing the agent's current system prompt, tools,
+		 * and conversation prefix, with extra trailing messages appended. Lets
+		 * plugins do side-channel inferences (e.g. summarization) while sharing
+		 * the agent's warm prompt cache. See Agent.runAuxiliaryInference for the
+		 * full contract.
+		 */
+		runAuxiliaryInference: (extraMessages: LLMMessage[]) => Promise<Result<InferenceResponse, LLMError>>
 	}

package/src/core/agents/state.ts CHANGED Viewed

@@ -8,6 +8,7 @@ import { agentIdSchema } from '~/core/agents/schema.js'
 import { createEventsFactory } from '~/core/events/types'
 import type { ToolResultContent } from '~/core/llm/llm-log-types.js'
 import type { ChatMessageContentItem } from '~/core/llm/llm-log-types.js'
+import type { LLMMetrics } from '~/core/llm/state.js'
 import type { PendingToolResult, ToolCallId } from '~/core/tools/schema.js'
 import { MessageId } from '../../plugins/mailbox/schema.js'
@@ -107,11 +108,15 @@ export type AgentPauseReason = 'limit' | 'handler' | 'manual'
 /**
  * Prompt cache breakpoint marker.
- * When set on an LLMMessage, providers place `cache_control: { type: 'ephemeral' }`
- * on the LAST content block of the mapped message (regardless of block type),
- * marking it as a prompt cache checkpoint.
+ * When set on an LLMMessage, providers place `cache_control` on the LAST
+ * content block of the mapped message (regardless of block type), marking it
+ * as a prompt cache checkpoint.
+ *
+ * `ttl: '1h'` opts into Anthropic's 1-hour cache tier (write cost 2× input,
+ * read still 0.1×). Useful for long-lived agents whose prompt cache would
+ * otherwise expire between user turns. Omit for the default 5-minute tier.
  */
-export type LLMMessageCacheControl = { type: 'ephemeral' }
+export type LLMMessageCacheControl = { type: 'ephemeral'; ttl?: '5m' | '1h' }
 /**
  * User message - from mailbox or direct input.
@@ -204,6 +209,8 @@ export interface AgentState {
 	pauseReason?: AgentPauseReason
 	/** Human-readable pause message */
 	pauseMessage?: string
+	/** Metrics from the most recent completed inference — used by plugins (e.g. context-compact) to size context against the provider-reported truth. */
+	lastInferenceMetrics?: LLMMetrics
 }
 // ============================================================================

package/src/core/file-store/file-store.ts CHANGED Viewed

@@ -92,35 +92,55 @@ export class SessionFileStore implements FileStore {
 		}
 	}
-	async list(path: string): Promise<Result<FileEntry[], string>> {
+	async list(
+		path: string,
+		options?: { maxDepth?: number; gitIgnore?: boolean },
+	): Promise<Result<FileEntry[], string>> {
 		const resolved = this.resolvePath(path)
 		if (!resolved.ok) return resolved
+		const maxDepth = options?.maxDepth ?? 1
+		if (maxDepth < 1) return Ok([])
 		try {
-			const items = await this.fs.readdir(resolved.value, { withFileTypes: true })
 			const entries: FileEntry[] = []
-			for (const item of items) {
-				let type: FileEntry['type']
-				let size: number | undefined
-				if (item.isFile()) {
-					type = 'file'
-					const s = await this.fs.stat(join(resolved.value, item.name))
-					size = s.size
-				} else if (item.isDirectory()) {
-					type = 'directory'
-				} else if (item.isSymbolicLink()) {
-					type = 'symlink'
-				} else {
-					type = 'other'
-				}
-				entries.push({ name: item.name, type, size })
-			}
+			await this.walkInto(resolved.value, '', maxDepth, entries)
 			return Ok(entries)
 		} catch {
 			return Err(`Directory not found: ${path}`)
 		}
 	}
+	private async walkInto(
+		absDir: string,
+		relPrefix: string,
+		remainingDepth: number,
+		out: FileEntry[],
+	): Promise<void> {
+		const items = await this.fs.readdir(absDir, { withFileTypes: true })
+		for (const item of items) {
+			const relName = relPrefix ? `${relPrefix}/${item.name}` : item.name
+			let type: FileEntry['type']
+			let size: number | undefined
+			if (item.isFile()) {
+				type = 'file'
+				const s = await this.fs.stat(join(absDir, item.name))
+				size = s.size
+			} else if (item.isDirectory()) {
+				type = 'directory'
+			} else if (item.isSymbolicLink()) {
+				type = 'symlink'
+			} else {
+				type = 'other'
+			}
+			out.push({ name: relName, type, size })
+			if (type === 'directory' && remainingDepth > 1) {
+				await this.walkInto(join(absDir, item.name), relName, remainingDepth - 1, out)
+			}
+		}
+	}
 	async remove(path: string): Promise<Result<void, string>> {
 		const resolved = this.resolvePath(path)
 		if (!resolved.ok) return resolved

package/src/core/image/vips-resizer.test.ts CHANGED Viewed

@@ -1,25 +1,36 @@
-import { afterEach, describe, expect, it, mock, spyOn } from 'bun:test'
-import * as childProcess from 'node:child_process'
+import { afterEach, describe, expect, it, spyOn } from 'bun:test'
+import { tmpdir } from 'node:os'
+import { VipsImageResizer } from './vips-resizer.js'
+import { createNodeFileSystem } from '~/testing/node-platform.js'
+import type { ExecFileResult, ProcessRunner } from '~/platform/process.js'
 type ExecFileCallback = (error: Error | null, stdout: string, stderr: string) => void
 let execFileImpl: (cmd: string, args: string[], opts: unknown, cb: ExecFileCallback) => void = () => {}
-mock.module('node:child_process', () => ({
-	...childProcess,
-	execFile: (cmd: string, args: string[], opts: unknown, cb: ExecFileCallback) => execFileImpl(cmd, args, opts, cb),
-}))
-const { VipsImageResizer } = await import('./vips-resizer.js')
-const { createNodePlatform } = await import('~/testing/node-platform.js')
+// Fake ProcessRunner — wires execFile calls through the test-controlled
+// `execFileImpl`. Avoids `mock.module('node:child_process')` because
+// node-platform.ts promisifies execFile at module-load and other test files
+// may load it first, freezing the binding to the real implementation.
+function createFakeProcessRunner(): ProcessRunner {
+	return {
+		execFile: (file, args, options) =>
+			new Promise<ExecFileResult>((resolve, reject) => {
+				execFileImpl(file, args, options ?? {}, (err, stdout, stderr) => {
+					if (err) reject(err)
+					else resolve({ stdout: stdout ?? '', stderr: stderr ?? '' })
+				})
+			}),
+		spawn: () => {
+			throw new Error('spawn not implemented in test fake')
+		},
+	}
+}
-// Test-scoped helper — routes through createNodePlatform so the module-level
-// node:child_process mock still intercepts execFile calls made by ProcessRunner.
 function createResizer(maxDimension?: number): InstanceType<typeof VipsImageResizer> {
-	const platform = createNodePlatform()
 	return new VipsImageResizer({
-		fs: platform.fs,
-		process: platform.process,
-		tmpDir: platform.tmpDir,
+		fs: createNodeFileSystem(),
+		process: createFakeProcessRunner(),
+		tmpDir: tmpdir(),
 		maxDimension,
 	})
 }

package/src/core/llm/anthropic.ts CHANGED Viewed

@@ -85,10 +85,15 @@ interface AnthropicErrorResponse {
 // Request body types
 // ============================================================================
+interface AnthropicCacheControl {
+	type: 'ephemeral'
+	ttl?: '5m' | '1h'
+}
 interface AnthropicTextBlockParam {
 	type: 'text'
 	text: string
-	cache_control?: { type: 'ephemeral' }
+	cache_control?: AnthropicCacheControl
 }
 interface AnthropicImageBlockParam {
@@ -96,7 +101,7 @@ interface AnthropicImageBlockParam {
 	source:
 		| { type: 'base64'; media_type: string; data: string }
 		| { type: 'url'; url: string }
-	cache_control?: { type: 'ephemeral' }
+	cache_control?: AnthropicCacheControl
 }
 interface AnthropicToolUseBlockParam {
@@ -104,7 +109,7 @@ interface AnthropicToolUseBlockParam {
 	id: string
 	name: string
 	input: unknown
-	cache_control?: { type: 'ephemeral' }
+	cache_control?: AnthropicCacheControl
 }
 interface AnthropicToolResultBlockParam {
@@ -112,7 +117,7 @@ interface AnthropicToolResultBlockParam {
 	tool_use_id: string
 	content: string | Array<AnthropicTextBlockParam | AnthropicImageBlockParam>
 	is_error?: boolean
-	cache_control?: { type: 'ephemeral' }
+	cache_control?: AnthropicCacheControl
 }
 type AnthropicContentBlockParam =
@@ -127,19 +132,19 @@ interface AnthropicMessageParam {
 }
 /**
- * Add `cache_control: { type: 'ephemeral' }` to the LAST content block of an
- * AnthropicMessageParam, regardless of block type. Converts string content to
- * a single text block first so the mark has a place to live. Mutates in place
- * so the cache breakpoint survives subsequent `mergeConsecutiveMessages`.
+ * Add `cache_control` to the LAST content block of an AnthropicMessageParam,
+ * regardless of block type. Converts string content to a single text block
+ * first so the mark has a place to live. Mutates in place so the cache
+ * breakpoint survives subsequent `mergeConsecutiveMessages`.
  */
-function applyCacheControlToLastBlock(msg: AnthropicMessageParam): void {
+function applyCacheControlToLastBlock(msg: AnthropicMessageParam, cacheControl: AnthropicCacheControl): void {
 	if (typeof msg.content === 'string') {
-		msg.content = [{ type: 'text', text: msg.content, cache_control: { type: 'ephemeral' } }]
+		msg.content = [{ type: 'text', text: msg.content, cache_control: cacheControl }]
 		return
 	}
 	if (msg.content.length === 0) return
 	const lastIdx = msg.content.length - 1
-	msg.content[lastIdx] = { ...msg.content[lastIdx], cache_control: { type: 'ephemeral' } }
+	msg.content[lastIdx] = { ...msg.content[lastIdx], cache_control: cacheControl }
 }
 interface AnthropicToolParam {
@@ -366,7 +371,9 @@ export class AnthropicProvider implements RoutableLLMProvider {
 	private async mapMessage(msg: LLMMessage, context?: InferenceContext): Promise<AnthropicMessageParam> {
 		const mapped = await this.mapMessageContent(msg, context)
 		if (msg.cacheControl) {
-			applyCacheControlToLastBlock(mapped)
+			const cc: AnthropicCacheControl = { type: 'ephemeral' }
+			if (msg.cacheControl.ttl) cc.ttl = msg.cacheControl.ttl
+			applyCacheControlToLastBlock(mapped, cc)
 		}
 		return mapped
 	}

package/src/core/llm/cache-breakpoints.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { LLMMessage } from '~/core/agents/state.js'
+import type { LLMMessage, LLMMessageCacheControl } from '~/core/agents/state.js'
 /**
  * Mark the prompt cache breakpoint on a message list.
@@ -13,25 +13,34 @@ import type { LLMMessage } from '~/core/agents/state.js'
  * Target index is `messages.length - 1 - uncachedSuffixCount`. The suffix is
  * the tail of messages that must remain fresh (e.g. ephemeral session context
  * rebuilt each inference).
+ *
+ * `ttl` opts into Anthropic's 1-hour cache tier (write cost 2× input, read
+ * still 0.1×). Useful for long-lived agents where the default 5-minute TTL
+ * would expire between user turns. Omit for the default 5-minute tier.
  */
-export function applyCacheBreakpoint(messages: LLMMessage[], uncachedSuffixCount: number): LLMMessage[] {
+export function applyCacheBreakpoint(
+	messages: LLMMessage[],
+	uncachedSuffixCount: number,
+	ttl?: '5m' | '1h',
+): LLMMessage[] {
 	const idx = messages.length - 1 - uncachedSuffixCount
 	if (idx < 0) return messages
+	const cacheControl: LLMMessageCacheControl = ttl ? { type: 'ephemeral', ttl } : { type: 'ephemeral' }
 	const target = messages[idx]
 	const result = [...messages]
 	switch (target.role) {
 		case 'user':
-			result[idx] = { ...target, cacheControl: { type: 'ephemeral' } }
+			result[idx] = { ...target, cacheControl }
 			break
 		case 'assistant':
-			result[idx] = { ...target, cacheControl: { type: 'ephemeral' } }
+			result[idx] = { ...target, cacheControl }
 			break
 		case 'system':
-			result[idx] = { ...target, cacheControl: { type: 'ephemeral' } }
+			result[idx] = { ...target, cacheControl }
 			break
 		case 'tool':
-			result[idx] = { ...target, cacheControl: { type: 'ephemeral' } }
+			result[idx] = { ...target, cacheControl }
 			break
 	}
 	return result

package/src/core/sessions/session.test.ts CHANGED Viewed

@@ -712,6 +712,8 @@ describe('applyEvent', () => {
 			expect(session.agents.get(agentId)!.pendingMessages).toHaveLength(2)
 			expect(session.agents.get(agentId)!.status).toBe('inferring')
+			const historyLenBeforeFailure = session.agents.get(agentId)!.conversationHistory.length
 			// 6. Inference fails
 			session = applyEvent(
 				session,
@@ -735,6 +737,10 @@ describe('applyEvent', () => {
 			expect(getAgentMailbox(selectMailboxState(session), agentId)[0].consumed).toBe(false)
 			// status is errored
 			expect(agent.status).toBe('errored')
+			// conversationHistory NOT extended — pendingMessages are dropped, not promoted.
+			// Otherwise tool results would appear both in history and in pendingToolResults,
+			// duplicating them on the next inference (Bedrock-style provider rejects 400).
+			expect(agent.conversationHistory).toHaveLength(historyLenBeforeFailure)
 		})
 	})

package/src/core/sessions/session.ts CHANGED Viewed

@@ -494,6 +494,15 @@ export class Session {
 		}
 		const result = await methodDef.handler(ctx, parsed.data)
+		// Plugin methods can mutate dequeue state (uploads.upload adds to pending,
+		// resources.inject can too) without explicitly calling ctx.scheduleAgent.
+		// Schedule every agent — scheduleProcessing is idempotent + debounced, and
+		// decide() shortcircuits to idle/complete when no work is actually pending.
+		for (const agent of this.agents.values()) {
+			agent.scheduleProcessing()
+		}
 		return result
 	}
@@ -795,6 +804,7 @@ export class Session {
 				checkIntervalMs: orch.checkIntervalMs,
 				input: orch.input,
 				plugins: withServicePluginConfig(orch),
+				cacheTtl: orch.cacheTtl,
 			}
 		}
@@ -810,6 +820,7 @@ export class Session {
 				checkIntervalMs: comm.checkIntervalMs,
 				input: comm.input,
 				plugins: withServicePluginConfig(comm),
+				cacheTtl: comm.cacheTtl,
 			}
 		}
@@ -828,6 +839,7 @@ export class Session {
 			checkIntervalMs: agentDef.checkIntervalMs,
 			input: agentDef.input,
 			plugins: withServicePluginConfig(agentDef),
+			cacheTtl: agentDef.cacheTtl,
 		}
 	}
 }

package/src/core/sessions/state.ts CHANGED Viewed

@@ -207,15 +207,19 @@ export const coreReducer = createTypedReducer(
 						pendingToolCalls: toolCalls,
 						pendingMessages: [],
 						pendingToolResults: [],
+						lastInferenceMetrics: event.metrics,
 					}
 				})
 			}
 			case 'inference_failed':
+				// Failure is a clean rollback: pendingMessages are dropped (not promoted to history)
+				// and pendingToolResults / mailbox tokens stay intact so the next inference
+				// rebuilds the same turn. Runtime must skip markConsumed on failure to preserve
+				// mailbox tokens — see runInference().
 				return updateAgent(state, event.agentId, (agent) => ({
 					...agent,
 					status: 'errored',
-					conversationHistory: [...agent.conversationHistory, ...agent.pendingMessages],
 					pendingMessages: [],
 				}))

package/src/core/tools/executor.test.ts CHANGED Viewed

@@ -51,6 +51,7 @@ const createTestContext = (): ToolContext => {
 		agentConfig: { systemPrompt: 'test', model: ModelId('test'), spawnableAgents: [] },
 		input: undefined,
 		parentId: null,
+		runAuxiliaryInference: async () => Err({ type: 'invalid_request', message: 'not implemented in test' }),
 	}
 }