npm - @roj-ai/sdk - Versions diffs - 0.1.13 → 0.1.15 - Mend

@roj-ai/sdk 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (154) hide show

package/dist/bootstrap.d.ts +13 -0
package/dist/bootstrap.d.ts.map +1 -1
package/dist/bootstrap.js +3 -1
package/dist/bootstrap.js.map +1 -1
package/dist/config.d.ts +2 -0
package/dist/config.d.ts.map +1 -1
package/dist/config.js +3 -0
package/dist/config.js.map +1 -1
package/dist/core/agents/agent.d.ts +25 -1
package/dist/core/agents/agent.d.ts.map +1 -1
package/dist/core/agents/agent.js +117 -21
package/dist/core/agents/agent.js.map +1 -1
package/dist/core/agents/config.d.ts +7 -0
package/dist/core/agents/config.d.ts.map +1 -1
package/dist/core/agents/context.d.ts +10 -0
package/dist/core/agents/context.d.ts.map +1 -1
package/dist/core/agents/state.d.ts +11 -3
package/dist/core/agents/state.d.ts.map +1 -1
package/dist/core/agents/state.js.map +1 -1
package/dist/core/file-store/file-store.d.ts +5 -1
package/dist/core/file-store/file-store.d.ts.map +1 -1
package/dist/core/file-store/file-store.js +31 -21
package/dist/core/file-store/file-store.js.map +1 -1
package/dist/core/image/vips-resizer.test.js +26 -14
package/dist/core/image/vips-resizer.test.js.map +1 -1
package/dist/core/llm/anthropic.d.ts.map +1 -1
package/dist/core/llm/anthropic.js +11 -8
package/dist/core/llm/anthropic.js.map +1 -1
package/dist/core/llm/cache-breakpoints.d.ts +5 -1
package/dist/core/llm/cache-breakpoints.d.ts.map +1 -1
package/dist/core/llm/cache-breakpoints.js +10 -5
package/dist/core/llm/cache-breakpoints.js.map +1 -1
package/dist/core/sessions/session.d.ts.map +1 -1
package/dist/core/sessions/session.js +10 -0
package/dist/core/sessions/session.js.map +1 -1
package/dist/core/sessions/session.test.js +5 -0
package/dist/core/sessions/session.test.js.map +1 -1
package/dist/core/sessions/state.d.ts.map +1 -1
package/dist/core/sessions/state.js +5 -1
package/dist/core/sessions/state.js.map +1 -1
package/dist/core/tools/executor.test.js +1 -0
package/dist/core/tools/executor.test.js.map +1 -1
package/dist/lib/utils/concurrency.d.ts +25 -0
package/dist/lib/utils/concurrency.d.ts.map +1 -0
package/dist/lib/utils/concurrency.js +69 -0
package/dist/lib/utils/concurrency.js.map +1 -0
package/dist/lib/utils/concurrency.test.d.ts +2 -0
package/dist/lib/utils/concurrency.test.d.ts.map +1 -0
package/dist/lib/utils/concurrency.test.js +135 -0
package/dist/lib/utils/concurrency.test.js.map +1 -0
package/dist/plugins/agent-status/plugin.d.ts.map +1 -1
package/dist/plugins/agent-status/plugin.js +18 -26
package/dist/plugins/agent-status/plugin.js.map +1 -1
package/dist/plugins/context-compact/compaction-live.test.d.ts +17 -0
package/dist/plugins/context-compact/compaction-live.test.d.ts.map +1 -0
package/dist/plugins/context-compact/compaction-live.test.js +177 -0
package/dist/plugins/context-compact/compaction-live.test.js.map +1 -0
package/dist/plugins/context-compact/context-compact.integration.test.js +123 -3
package/dist/plugins/context-compact/context-compact.integration.test.js.map +1 -1
package/dist/plugins/context-compact/context-compactor.d.ts +47 -17
package/dist/plugins/context-compact/context-compactor.d.ts.map +1 -1
package/dist/plugins/context-compact/context-compactor.js +60 -36
package/dist/plugins/context-compact/context-compactor.js.map +1 -1
package/dist/plugins/context-compact/context-compactor.test.js +69 -103
package/dist/plugins/context-compact/context-compactor.test.js.map +1 -1
package/dist/plugins/context-compact/plugin.d.ts +9 -2
package/dist/plugins/context-compact/plugin.d.ts.map +1 -1
package/dist/plugins/context-compact/plugin.js +8 -4
package/dist/plugins/context-compact/plugin.js.map +1 -1
package/dist/plugins/filesystem/filesystem.integration.test.js +36 -0
package/dist/plugins/filesystem/filesystem.integration.test.js.map +1 -1
package/dist/plugins/filesystem/plugin.d.ts.map +1 -1
package/dist/plugins/filesystem/plugin.js +8 -6
package/dist/plugins/filesystem/plugin.js.map +1 -1
package/dist/plugins/mailbox/mailbox.integration.test.js +9 -16
package/dist/plugins/mailbox/mailbox.integration.test.js.map +1 -1
package/dist/plugins/resources/plugin.d.ts.map +1 -1
package/dist/plugins/resources/plugin.js +4 -1
package/dist/plugins/resources/plugin.js.map +1 -1
package/dist/plugins/uploads/plugin.d.ts +12 -0
package/dist/plugins/uploads/plugin.d.ts.map +1 -1
package/dist/plugins/uploads/plugin.js +188 -44
package/dist/plugins/uploads/plugin.js.map +1 -1
package/dist/plugins/uploads/preprocessors/image-classifier.d.ts +9 -0
package/dist/plugins/uploads/preprocessors/image-classifier.d.ts.map +1 -1
package/dist/plugins/uploads/preprocessors/image-classifier.js +4 -1
package/dist/plugins/uploads/preprocessors/image-classifier.js.map +1 -1
package/dist/plugins/uploads/preprocessors/image-classifier.test.d.ts +2 -0
package/dist/plugins/uploads/preprocessors/image-classifier.test.d.ts.map +1 -0
package/dist/plugins/uploads/preprocessors/image-classifier.test.js +113 -0
package/dist/plugins/uploads/preprocessors/image-classifier.test.js.map +1 -0
package/dist/plugins/uploads/preprocessors/markitdown-preprocessor.d.ts.map +1 -1
package/dist/plugins/uploads/preprocessors/markitdown-preprocessor.js +8 -7
package/dist/plugins/uploads/preprocessors/markitdown-preprocessor.js.map +1 -1
package/dist/plugins/uploads/preprocessors/zip-preprocessor.d.ts.map +1 -1
package/dist/plugins/uploads/preprocessors/zip-preprocessor.js +35 -15
package/dist/plugins/uploads/preprocessors/zip-preprocessor.js.map +1 -1
package/dist/plugins/uploads/state.d.ts +1 -0
package/dist/plugins/uploads/state.d.ts.map +1 -1
package/dist/plugins/uploads/state.js +1 -1
package/dist/plugins/uploads/state.js.map +1 -1
package/dist/plugins/uploads/uploads.integration.test.js +97 -0
package/dist/plugins/uploads/uploads.integration.test.js.map +1 -1
package/dist/plugins/user-chat/plugin.d.ts +2 -0
package/dist/plugins/user-chat/plugin.d.ts.map +1 -1
package/dist/plugins/user-chat/plugin.js +47 -3
package/dist/plugins/user-chat/plugin.js.map +1 -1
package/dist/plugins/user-chat/schema.d.ts +10 -0
package/dist/plugins/user-chat/schema.d.ts.map +1 -1
package/dist/plugins/user-chat/schema.js +1 -0
package/dist/plugins/user-chat/schema.js.map +1 -1
package/dist/plugins/user-chat/user-chat.integration.test.js +86 -0
package/dist/plugins/user-chat/user-chat.integration.test.js.map +1 -1
package/dist/transport/http/routes/upload.d.ts.map +1 -1
package/dist/transport/http/routes/upload.js +60 -0
package/dist/transport/http/routes/upload.js.map +1 -1
package/package.json +2 -2
package/src/bootstrap.ts +3 -1
package/src/config.ts +6 -0
package/src/core/agents/agent.ts +134 -20
package/src/core/agents/config.ts +7 -0
package/src/core/agents/context.ts +11 -0
package/src/core/agents/state.ts +11 -4
package/src/core/file-store/file-store.ts +38 -18
package/src/core/image/vips-resizer.test.ts +26 -15
package/src/core/llm/anthropic.ts +19 -12
package/src/core/llm/cache-breakpoints.ts +15 -6
package/src/core/sessions/session.test.ts +6 -0
package/src/core/sessions/session.ts +12 -0
package/src/core/sessions/state.ts +5 -1
package/src/core/tools/executor.test.ts +1 -0
package/src/lib/utils/concurrency.test.ts +169 -0
package/src/lib/utils/concurrency.ts +72 -0
package/src/plugins/agent-status/plugin.ts +18 -25
package/src/plugins/context-compact/compaction-live.test.ts +221 -0
package/src/plugins/context-compact/context-compact.integration.test.ts +135 -3
package/src/plugins/context-compact/context-compactor.test.ts +71 -110
package/src/plugins/context-compact/context-compactor.ts +88 -43
package/src/plugins/context-compact/plugin.ts +19 -10
package/src/plugins/filesystem/filesystem.integration.test.ts +44 -0
package/src/plugins/filesystem/plugin.ts +8 -6
package/src/plugins/mailbox/mailbox.integration.test.ts +12 -18
package/src/plugins/resources/plugin.ts +4 -1
package/src/plugins/uploads/plugin.ts +212 -47
package/src/plugins/uploads/preprocessors/image-classifier.test.ts +142 -0
package/src/plugins/uploads/preprocessors/image-classifier.ts +13 -1
package/src/plugins/uploads/preprocessors/markitdown-preprocessor.ts +8 -8
package/src/plugins/uploads/preprocessors/zip-preprocessor.ts +37 -17
package/src/plugins/uploads/state.ts +1 -1
package/src/plugins/uploads/uploads.integration.test.ts +123 -0
package/src/plugins/user-chat/plugin.ts +60 -3
package/src/plugins/user-chat/schema.ts +10 -1
package/src/plugins/user-chat/user-chat.integration.test.ts +99 -0
package/src/transport/http/routes/upload.ts +87 -0

package/src/plugins/context-compact/context-compactor.ts CHANGED Viewed

@@ -2,14 +2,24 @@ import type { AgentId } from '~/core/agents/schema.js'
 import type { CompactedConversationMessage, ContextCompactedEvent } from '~/core/context/state.js'
 import { contextEvents } from '~/core/context/state.js'
 import { withSessionId } from '~/core/events/test-helpers.js'
-import type { LLMMessage, LLMProvider } from '~/core/llm/provider.js'
+import type { InferenceResponse, LLMError, LLMMessage } from '~/core/llm/provider.js'
 import type { ModelId } from '~/core/llm/schema.js'
 import { estimateMessagesTokens } from '~/core/llm/tokens.js'
 import type { SessionId } from '~/core/sessions/schema.js'
 import type { Result } from '~/lib/utils/result.js'
 import { Err, Ok } from '~/lib/utils/result.js'
 import type { Logger } from '../../lib/logger/logger.js'
-import { CONTEXT_SUMMARY_PROMPT, wrapContextSummary } from '../../prompts/index.js'
+import { wrapContextSummary } from '../../prompts/index.js'
+/**
+ * Callback used by the compactor to ask the host (an Agent) to run a side-channel
+ * inference reusing its own system prompt, tools, and conversation prefix.
+ *
+ * Implemented in practice by AgentContext.runAuxiliaryInference, which keeps the
+ * agent's prompt cache warm — only the trailing `extraMessages` and the response
+ * tokens are paid for; the rest of the prefix is served from cache.
+ */
+export type RunInferenceFn = (extraMessages: LLMMessage[]) => Promise<Result<InferenceResponse, LLMError>>
 // ============================================================================
 // Message formatting for summarization
@@ -76,24 +86,44 @@ function formatToolInput(input: unknown): string {
 // ============================================================================
 export interface CompactionConfig {
-	/** Model ID to use for summarization (required) */
-	model: ModelId
-	/** Token threshold to trigger compaction */
+	/**
+	 * @deprecated No longer used. Summarization runs on the agent's own model via
+	 * the auxiliary inference callback so the agent's prompt cache is reused.
+	 * Kept in the interface so existing preset configs continue to type-check.
+	 */
+	model?: ModelId
+	/** Token threshold to trigger compaction. */
 	maxTokens: number
-	/** Number of recent messages to keep uncompacted */
+	/** Number of recent messages to keep uncompacted. */
 	keepRecentMessages: number
-	/** Max tokens for kept recent messages (whichever limit is hit first) */
+	/** Max tokens for kept recent messages (whichever limit is hit first). */
 	keepRecentTokens?: number
-	/** Target token count after compaction (informational) */
+	/** Target token count after compaction (informational). */
 	targetTokens?: number
-	/** System prompt for summarization */
+	/** Optional override for the trailing summarization instruction sent to the model. */
 	summaryPrompt?: string
-	/** Enable history offloading before compaction */
+	/** Enable history offloading before compaction. */
 	offloadHistory?: boolean
-	/** Path prefix for offloaded history (default: /session/.history/) */
+	/** Path prefix for offloaded history (default: /session/.history/). */
 	historyPathPrefix?: string
 }
+/**
+ * Trailing user-message instruction appended to the agent's full prefix when
+ * requesting a summary. The model sees its real system prompt, tools and full
+ * conversation, then this instruction last. Phrased to discourage tool calls
+ * — Sonnet-class models reliably emit a plain text response under this prompt.
+ */
+export const DEFAULT_SUMMARY_INSTRUCTION =
+	'[CONTEXT COMPACTION REQUEST]\n'
+	+ 'The conversation above is approaching the context budget. Produce a concise '
+	+ 'summary (under 600 words) of everything discussed and decided so far. Cover: '
+	+ 'completed tasks and their outcomes, key decisions and rationale, current state '
+	+ 'of any in-progress work, important file paths or identifiers, and outstanding '
+	+ 'questions.\n\n'
+	+ 'Reply with plain text only. Do NOT call any tools. Do NOT acknowledge this '
+	+ 'request — just emit the summary directly.'
 // ============================================================================
 // Compaction Result
 // ============================================================================
@@ -141,7 +171,6 @@ export interface HistoryOffloader {
 export class ContextCompactor {
 	constructor(
-		private readonly llmProvider: LLMProvider,
 		private readonly logger: Logger,
 		private readonly config: CompactionConfig,
 		private readonly historyOffloader?: HistoryOffloader,
@@ -179,10 +208,17 @@ export class ContextCompactor {
 	}
 	/**
-	 * Check if compaction is needed based on token count.
+	 * Check if compaction is needed.
+	 *
+	 * Prefers the provider-reported prompt token count from the previous turn
+	 * (authoritative — comes straight from the model's tokenizer). Falls back
+	 * to the in-process estimator when no previous metrics exist (first turn).
+	 *
+	 * The estimator under-counts JSON-heavy tool-result history by ~2x, so
+	 * relying on it alone causes the trigger to never fire in long sessions.
 	 */
-	needsCompaction(messages: LLMMessage[]): boolean {
-		const tokens = estimateMessagesTokens(messages)
+	needsCompaction(messages: LLMMessage[], lastActualPromptTokens?: number): boolean {
+		const tokens = lastActualPromptTokens ?? estimateMessagesTokens(messages)
 		return tokens > this.config.maxTokens
 	}
@@ -194,33 +230,42 @@ export class ContextCompactor {
 		sessionId: SessionId,
 		agentId: AgentId,
 		messages: LLMMessage[],
+		runInference: RunInferenceFn,
+		lastActualPromptTokens?: number,
 	): Promise<Result<CompactionResult | null, Error>> {
-		if (!this.needsCompaction(messages)) {
+		if (!this.needsCompaction(messages, lastActualPromptTokens)) {
 			return Ok(null)
 		}
-		return this.compact(sessionId, agentId, messages)
+		return this.compact(sessionId, agentId, messages, runInference, lastActualPromptTokens)
 	}
 	/**
-	 * Compact conversation history by summarizing older messages.
+	 * Compact conversation history by asking the agent's own model to summarize
+	 * the older portion. The summarization call reuses the agent's existing
+	 * prompt cache via `runInference`, paying only for the trailing instruction
+	 * (a few hundred tokens) and the summary output — not the whole conversation
+	 * a second time.
 	 */
 	async compact(
 		sessionId: SessionId,
 		agentId: AgentId,
 		messages: LLMMessage[],
+		runInference: RunInferenceFn,
+		lastActualPromptTokens?: number,
 	): Promise<Result<CompactionResult, Error>> {
-		const originalTokens = estimateMessagesTokens(messages)
+		const originalTokens = lastActualPromptTokens ?? estimateMessagesTokens(messages)
 		this.logger.info('Starting context compaction', {
 			sessionId,
 			agentId,
 			messageCount: messages.length,
-			estimatedTokens: originalTokens,
+			originalTokens,
+			actualTokensReported: lastActualPromptTokens !== undefined,
 		})
-		// Split messages: keep recent, compact older
-		// Respect both count limit and token budget (whichever is hit first)
+		// Split messages: keep recent, compact older.
+		// Respect both count limit and token budget (whichever is hit first).
 		const keepCount = this.computeKeepCount(messages)
 		const toCompact = messages.slice(0, messages.length - keepCount)
 		const toKeep = messages.slice(messages.length - keepCount)
@@ -236,20 +281,16 @@ export class ContextCompactor {
 			})
 		}
-		// Format messages for summarization
-		const conversationText = toCompact
-			.map(formatMessageForSummary)
-			.join('\n\n')
-		// Offload history if enabled
+		// Offload the dropped messages to disk for forensics / replay.
+		// Best-effort; failures are logged but don't block compaction.
 		let historyPath: string | undefined
 		if (this.config.offloadHistory && this.historyOffloader) {
 			try {
+				const conversationText = toCompact.map(formatMessageForSummary).join('\n\n')
 				const pathPrefix = this.config.historyPathPrefix ?? DEFAULT_HISTORY_PATH_PREFIX
 				historyPath = await this.historyOffloader.offload(agentId, conversationText, pathPrefix)
 				this.logger.info('History offloaded', { sessionId, agentId, historyPath })
 			} catch (error) {
-				// History offloading is best-effort, log and continue
 				this.logger.warn('Failed to offload history', {
 					sessionId,
 					agentId,
@@ -258,18 +299,14 @@ export class ContextCompactor {
 			}
 		}
-		// Generate summary using LLM
-		const summaryResult = await this.llmProvider.inference({
-			model: this.config.model,
-			systemPrompt: this.config.summaryPrompt ?? CONTEXT_SUMMARY_PROMPT,
-			messages: [
-				{
-					role: 'user',
-					content: `Please summarize this conversation:\n\n${conversationText}`,
-				},
-			],
-			tools: [],
-		})
+		// Inline summarization: append the instruction as a trailing user message
+		// and let the host run inference with the agent's full live prefix. The
+		// agent's prompt cache from the previous turn covers everything up to
+		// (but not including) this instruction.
+		const summaryInstruction = this.config.summaryPrompt ?? DEFAULT_SUMMARY_INSTRUCTION
+		const summaryResult = await runInference([
+			{ role: 'user', content: summaryInstruction },
+		])
 		if (!summaryResult.ok) {
 			const llmError = summaryResult.error
@@ -283,9 +320,17 @@ export class ContextCompactor {
 		const summary = summaryResult.value.content ?? ''
-		// Create summary message (with history reference if offloaded)
+		if (!summary.trim()) {
+			this.logger.warn('Summarization returned empty content', { sessionId, agentId })
+			return Err(new Error('Compaction failed: model returned empty summary'))
+		}
+		// Replace the compacted portion with a single user-role summary message.
+		// Using `user` role (not `system`) so the wrap reads as part of the
+		// conversation flow — Anthropic recommends user-role for arbitrary
+		// mid-conversation context blocks.
 		const summaryMessage: LLMMessage = {
-			role: 'system',
+			role: 'user',
 			content: wrapContextSummary(summary, historyPath),
 		}

package/src/plugins/context-compact/plugin.ts CHANGED Viewed

@@ -10,36 +10,45 @@ import { type CompactionConfig, ContextCompactor, createContextCompactedEvent, t
 import { FileHistoryOffloader } from './history-offloader.js'
 /**
- * Plugin config — session-level compaction settings.
+ * Plugin config — session-level (default) compaction settings.
+ * Individual agents may override fields via `contextCompactPlugin.configureAgent({ ... })`.
  */
 export interface ContextCompactPluginConfig {
 	compaction: CompactionConfig
 }
+/**
+ * Per-agent override. Any field omitted falls back to the session-level config.
+ * Used for cases like "orchestrator gets a tighter 50k threshold while subagents
+ * keep the default 200k".
+ */
+export type ContextCompactAgentConfig = Partial<CompactionConfig>
 export const contextCompactPlugin = definePlugin('context-compact')
 	.pluginConfig<ContextCompactPluginConfig>()
+	.agentConfig<ContextCompactAgentConfig>()
 	.context(async (ctx, pluginConfig) => {
 		const historyOffloader: HistoryOffloader | undefined = pluginConfig.compaction.offloadHistory
 			? new FileHistoryOffloader(ctx.environment.sessionDir, ctx.platform.fs)
 			: undefined
-		const compactor = new ContextCompactor(
-			ctx.llm,
-			ctx.logger,
-			pluginConfig.compaction,
-			historyOffloader,
-		)
-		return { compactor }
+		return { historyOffloader, sessionConfig: pluginConfig.compaction }
 	})
 	.hook('beforeInference', async (ctx) => {
-		const compactor = ctx.pluginContext.compactor
+		const { historyOffloader, sessionConfig } = ctx.pluginContext
+		const agentOverrides = ctx.pluginAgentConfig ?? {}
+		const effectiveConfig: CompactionConfig = { ...sessionConfig, ...agentOverrides }
+		const compactor = new ContextCompactor(ctx.logger, effectiveConfig, historyOffloader)
 		const historyLLMMessages = ctx.agentState.conversationHistory
+		const lastActualPromptTokens = ctx.agentState.lastInferenceMetrics?.promptTokens
 		const result = await compactor.compactIfNeeded(
 			ctx.sessionId,
 			ctx.agentId,
 			historyLLMMessages,
+			ctx.runAuxiliaryInference,
+			lastActualPromptTokens,
 		)
 		if (result.ok && result.value !== null) {

package/src/plugins/filesystem/filesystem.integration.test.ts CHANGED Viewed

@@ -22,6 +22,14 @@ beforeAll(() => {
 	fs.writeFileSync(path.join(fixtureDir, 'hello.txt'), 'Hello, world!')
 	fs.writeFileSync(path.join(fixtureDir, 'multiline.txt'), Array.from({ length: 20 }, (_, i) => `Line ${i + 1}`).join('\n'))
+	// Create a minimal 1x1 PNG for image tests
+	const onePixelPng = Buffer.from(
+		'89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c489000000'
+			+ '0a49444154789c6300010000000500010d0a2db40000000049454e44ae426082',
+		'hex',
+	)
+	fs.writeFileSync(path.join(fixtureDir, 'pixel.png'), onePixelPng)
 	// Create subdirectory with files
 	fs.mkdirSync(path.join(fixtureDir, 'subdir'), { recursive: true })
 	fs.writeFileSync(path.join(fixtureDir, 'subdir', 'nested.txt'), 'Nested content')
@@ -154,6 +162,42 @@ describe('filesystem plugin', () => {
 			await harness.shutdown()
 		})
+		it('read image file → file:// URL uses agent-visible input path, not resolved real path', async () => {
+			// Regression: previously read_file returned file://<realPath>, which the
+			// sandboxed FileStore then rejected when re-resolving on the next inference
+			// (it only accepts agent-visible paths like /home/user/session/...). The URL
+			// must echo input.path so it stays resolvable through fileStore.realPath().
+			const filePath = path.join(fixtureDir, 'pixel.png')
+			const harness = createFsHarness({
+				presets: [createFsPreset()],
+				llmProvider: MockLLMProvider.withSequence([
+					{
+						toolCalls: [{
+							id: ToolCallId('tc1'),
+							name: 'read_file',
+							input: { path: filePath },
+						}],
+					},
+					{ content: 'Done', toolCalls: [] },
+				]),
+			})
+			const session = await harness.createSession('test')
+			await session.sendAndWaitForIdle('Read image')
+			const callHistory = harness.llmProvider.getCallHistory()
+			const toolMessages = callHistory[1].messages.filter((m) => m.role === 'tool')
+			expect(toolMessages).toHaveLength(1)
+			const content = toolMessages[0].content
+			expect(Array.isArray(content)).toBe(true)
+			const blocks = content as Array<{ type: string; text?: string; imageUrl?: { url: string } }>
+			const imageBlock = blocks.find((b) => b.type === 'image_url')
+			expect(imageBlock).toBeDefined()
+			expect(imageBlock?.imageUrl?.url).toBe(`file://${filePath}`)
+			await harness.shutdown()
+		})
 		it('read a directory path → "is not a file" error', async () => {
 			const dirPath = path.join(fixtureDir, 'subdir')
 			const harness = createFsHarness({

package/src/plugins/filesystem/plugin.ts CHANGED Viewed

@@ -158,16 +158,18 @@ export const filesystemPlugin = definePlugin('filesystem')
 						})
 					}
-					// Image files → return as multimodal image content
+					// Image files → return as multimodal image content.
+					// Store the agent-visible input.path (not the resolved real path):
+					// the URL survives into conversationHistory and gets re-resolved
+					// via fileStore.realPath() on every subsequent inference. In
+					// sandboxed mode, realPath() rejects already-resolved disk paths
+					// (only accepts the virtual prefix), so storing realPath would
+					// surface as "[Image unavailable: …]" on every later turn.
 					const mimeType = getImageMimeType(input.path)
 					if (mimeType) {
-						const realPathResult = fileStore.realPath(input.path)
-						if (!realPathResult.ok) {
-							return Err({ message: realPathResult.error, recoverable: false })
-						}
 						return Ok([
 							{ type: 'text', text: `Image: ${input.path} (${mimeType}, ${stats.size} bytes)` },
-							{ type: 'image_url', imageUrl: { url: `file://${realPathResult.value}` } },
+							{ type: 'image_url', imageUrl: { url: `file://${input.path}` } },
 						])
 					}

package/src/plugins/mailbox/mailbox.integration.test.ts CHANGED Viewed

@@ -581,7 +581,7 @@ describe('mailbox plugin', () => {
 			await harness.shutdown()
 		})
-		it('empty-stop LLM response → agent retries; persistent empty → onError reports to parent', async () => {
+		it('empty-stop LLM response → agent retries; persistent empty → coalesces to WAITING, no error', async () => {
 			let workerCalls = 0
 			let orchestratorCalls = 0
@@ -603,31 +603,25 @@ describe('mailbox plugin', () => {
 						return { content: 'Done', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
 					}
 					workerCalls++
-					// Always empty-stop → triggers retry until exhausted, then onError
+					// Always empty-stop → triggers retry until exhausted, then coalesces to WAITING
 					return { content: null, toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
 				},
 			})
 			const session = await harness.createSession('test')
-			await session.sendMessage('Start')
-			// Worker ends up errored (not idle); poll for the error message to parent.
-			const deadline = Date.now() + 5000
-			let errMsg: { message: { content: string; from: unknown } } | undefined
-			while (Date.now() < deadline) {
-				const events = await session.getEventsByType(mailboxEvents, 'mailbox_message')
-				errMsg = events.find(e =>
-					e.message.from === AgentId('worker_1')
-					&& typeof e.message.content === 'string'
-					&& e.message.content.startsWith('Agent encountered an error:'),
-				)
-				if (errMsg) break
-				await new Promise((r) => setTimeout(r, 50))
-			}
+			await session.sendAndWaitForIdle('Start')
 			// Initial + 2 retries = 3 worker LLM calls
 			expect(workerCalls).toBe(3)
-			expect(errMsg).toBeDefined()
+			// No error message to parent — exhaustion coalesces to WAITING, not failure
+			const events = await session.getEventsByType(mailboxEvents, 'mailbox_message')
+			const errMsg = events.find(e =>
+				e.message.from === AgentId('worker_1')
+				&& typeof e.message.content === 'string'
+				&& e.message.content.startsWith('Agent encountered an error:'),
+			)
+			expect(errMsg).toBeUndefined()
 			await harness.shutdown()
 		})

package/src/plugins/resources/plugin.ts CHANGED Viewed

@@ -112,7 +112,10 @@ export const resourcesPlugin = definePlugin('resources')
 				await fs.writeFile(tempPath, input.fileBuffer)
 				try {
-					await exec('unzip', ['-o', '-q', tempPath, '-d', targetDir])
+					// `-x .git .git/*` so a stray .git entry in the ZIP can't overwrite the
+					// worktree's gitdir pointer (which silently breaks every subsequent git
+					// command in the workspace).
+					await exec('unzip', ['-o', '-q', tempPath, '-d', targetDir, '-x', '.git', '.git/*'])
 				} catch (error) {
 					const message = error instanceof Error ? error.message : String(error)
 					// unzip returns exit code 1 for warnings — still usable