@roj-ai/sdk 0.1.14 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/dist/bootstrap.d.ts +1 -0
  2. package/dist/bootstrap.d.ts.map +1 -1
  3. package/dist/core/agents/agent.d.ts +25 -1
  4. package/dist/core/agents/agent.d.ts.map +1 -1
  5. package/dist/core/agents/agent.js +117 -21
  6. package/dist/core/agents/agent.js.map +1 -1
  7. package/dist/core/agents/config.d.ts +7 -0
  8. package/dist/core/agents/config.d.ts.map +1 -1
  9. package/dist/core/agents/context.d.ts +10 -0
  10. package/dist/core/agents/context.d.ts.map +1 -1
  11. package/dist/core/agents/state.d.ts +11 -3
  12. package/dist/core/agents/state.d.ts.map +1 -1
  13. package/dist/core/agents/state.js.map +1 -1
  14. package/dist/core/file-store/file-store.d.ts +5 -1
  15. package/dist/core/file-store/file-store.d.ts.map +1 -1
  16. package/dist/core/file-store/file-store.js +31 -21
  17. package/dist/core/file-store/file-store.js.map +1 -1
  18. package/dist/core/image/vips-resizer.test.js +26 -14
  19. package/dist/core/image/vips-resizer.test.js.map +1 -1
  20. package/dist/core/llm/anthropic.d.ts.map +1 -1
  21. package/dist/core/llm/anthropic.js +11 -8
  22. package/dist/core/llm/anthropic.js.map +1 -1
  23. package/dist/core/llm/cache-breakpoints.d.ts +5 -1
  24. package/dist/core/llm/cache-breakpoints.d.ts.map +1 -1
  25. package/dist/core/llm/cache-breakpoints.js +10 -5
  26. package/dist/core/llm/cache-breakpoints.js.map +1 -1
  27. package/dist/core/sessions/session.d.ts.map +1 -1
  28. package/dist/core/sessions/session.js +3 -0
  29. package/dist/core/sessions/session.js.map +1 -1
  30. package/dist/core/sessions/session.test.js +5 -0
  31. package/dist/core/sessions/session.test.js.map +1 -1
  32. package/dist/core/sessions/state.d.ts.map +1 -1
  33. package/dist/core/sessions/state.js +5 -1
  34. package/dist/core/sessions/state.js.map +1 -1
  35. package/dist/core/tools/executor.test.js +1 -0
  36. package/dist/core/tools/executor.test.js.map +1 -1
  37. package/dist/plugins/agent-status/plugin.d.ts.map +1 -1
  38. package/dist/plugins/agent-status/plugin.js +18 -26
  39. package/dist/plugins/agent-status/plugin.js.map +1 -1
  40. package/dist/plugins/context-compact/compaction-live.test.d.ts +17 -0
  41. package/dist/plugins/context-compact/compaction-live.test.d.ts.map +1 -0
  42. package/dist/plugins/context-compact/compaction-live.test.js +177 -0
  43. package/dist/plugins/context-compact/compaction-live.test.js.map +1 -0
  44. package/dist/plugins/context-compact/context-compact.integration.test.js +123 -3
  45. package/dist/plugins/context-compact/context-compact.integration.test.js.map +1 -1
  46. package/dist/plugins/context-compact/context-compactor.d.ts +47 -17
  47. package/dist/plugins/context-compact/context-compactor.d.ts.map +1 -1
  48. package/dist/plugins/context-compact/context-compactor.js +60 -36
  49. package/dist/plugins/context-compact/context-compactor.js.map +1 -1
  50. package/dist/plugins/context-compact/context-compactor.test.js +69 -103
  51. package/dist/plugins/context-compact/context-compactor.test.js.map +1 -1
  52. package/dist/plugins/context-compact/plugin.d.ts +9 -2
  53. package/dist/plugins/context-compact/plugin.d.ts.map +1 -1
  54. package/dist/plugins/context-compact/plugin.js +8 -4
  55. package/dist/plugins/context-compact/plugin.js.map +1 -1
  56. package/dist/plugins/filesystem/filesystem.integration.test.js +36 -0
  57. package/dist/plugins/filesystem/filesystem.integration.test.js.map +1 -1
  58. package/dist/plugins/filesystem/plugin.d.ts.map +1 -1
  59. package/dist/plugins/filesystem/plugin.js +8 -6
  60. package/dist/plugins/filesystem/plugin.js.map +1 -1
  61. package/dist/plugins/mailbox/mailbox.integration.test.js +9 -16
  62. package/dist/plugins/mailbox/mailbox.integration.test.js.map +1 -1
  63. package/dist/plugins/resources/plugin.d.ts.map +1 -1
  64. package/dist/plugins/resources/plugin.js +4 -1
  65. package/dist/plugins/resources/plugin.js.map +1 -1
  66. package/dist/plugins/uploads/preprocessors/image-classifier.d.ts.map +1 -1
  67. package/dist/plugins/uploads/preprocessors/image-classifier.js +15 -2
  68. package/dist/plugins/uploads/preprocessors/image-classifier.js.map +1 -1
  69. package/dist/plugins/uploads/preprocessors/markitdown-preprocessor.d.ts.map +1 -1
  70. package/dist/plugins/uploads/preprocessors/markitdown-preprocessor.js +72 -19
  71. package/dist/plugins/uploads/preprocessors/markitdown-preprocessor.js.map +1 -1
  72. package/dist/plugins/user-chat/plugin.d.ts +2 -0
  73. package/dist/plugins/user-chat/plugin.d.ts.map +1 -1
  74. package/dist/plugins/user-chat/plugin.js +47 -3
  75. package/dist/plugins/user-chat/plugin.js.map +1 -1
  76. package/dist/plugins/user-chat/schema.d.ts +10 -0
  77. package/dist/plugins/user-chat/schema.d.ts.map +1 -1
  78. package/dist/plugins/user-chat/schema.js +1 -0
  79. package/dist/plugins/user-chat/schema.js.map +1 -1
  80. package/dist/plugins/user-chat/user-chat.integration.test.js +86 -0
  81. package/dist/plugins/user-chat/user-chat.integration.test.js.map +1 -1
  82. package/package.json +2 -2
  83. package/src/core/agents/agent.ts +134 -20
  84. package/src/core/agents/config.ts +7 -0
  85. package/src/core/agents/context.ts +11 -0
  86. package/src/core/agents/state.ts +11 -4
  87. package/src/core/file-store/file-store.ts +38 -18
  88. package/src/core/image/vips-resizer.test.ts +26 -15
  89. package/src/core/llm/anthropic.ts +19 -12
  90. package/src/core/llm/cache-breakpoints.ts +15 -6
  91. package/src/core/sessions/session.test.ts +6 -0
  92. package/src/core/sessions/session.ts +4 -0
  93. package/src/core/sessions/state.ts +5 -1
  94. package/src/core/tools/executor.test.ts +1 -0
  95. package/src/plugins/agent-status/plugin.ts +18 -25
  96. package/src/plugins/context-compact/compaction-live.test.ts +221 -0
  97. package/src/plugins/context-compact/context-compact.integration.test.ts +135 -3
  98. package/src/plugins/context-compact/context-compactor.test.ts +71 -110
  99. package/src/plugins/context-compact/context-compactor.ts +88 -43
  100. package/src/plugins/context-compact/plugin.ts +19 -10
  101. package/src/plugins/filesystem/filesystem.integration.test.ts +44 -0
  102. package/src/plugins/filesystem/plugin.ts +8 -6
  103. package/src/plugins/mailbox/mailbox.integration.test.ts +12 -18
  104. package/src/plugins/resources/plugin.ts +4 -1
  105. package/src/plugins/uploads/preprocessors/image-classifier.ts +15 -2
  106. package/src/plugins/uploads/preprocessors/markitdown-preprocessor.ts +89 -20
  107. package/src/plugins/user-chat/plugin.ts +60 -3
  108. package/src/plugins/user-chat/schema.ts +10 -1
  109. package/src/plugins/user-chat/user-chat.integration.test.ts +99 -0
@@ -2,14 +2,24 @@ import type { AgentId } from '~/core/agents/schema.js'
2
2
  import type { CompactedConversationMessage, ContextCompactedEvent } from '~/core/context/state.js'
3
3
  import { contextEvents } from '~/core/context/state.js'
4
4
  import { withSessionId } from '~/core/events/test-helpers.js'
5
- import type { LLMMessage, LLMProvider } from '~/core/llm/provider.js'
5
+ import type { InferenceResponse, LLMError, LLMMessage } from '~/core/llm/provider.js'
6
6
  import type { ModelId } from '~/core/llm/schema.js'
7
7
  import { estimateMessagesTokens } from '~/core/llm/tokens.js'
8
8
  import type { SessionId } from '~/core/sessions/schema.js'
9
9
  import type { Result } from '~/lib/utils/result.js'
10
10
  import { Err, Ok } from '~/lib/utils/result.js'
11
11
  import type { Logger } from '../../lib/logger/logger.js'
12
- import { CONTEXT_SUMMARY_PROMPT, wrapContextSummary } from '../../prompts/index.js'
12
+ import { wrapContextSummary } from '../../prompts/index.js'
13
+
14
+ /**
15
+ * Callback used by the compactor to ask the host (an Agent) to run a side-channel
16
+ * inference reusing its own system prompt, tools, and conversation prefix.
17
+ *
18
+ * Implemented in practice by AgentContext.runAuxiliaryInference, which keeps the
19
+ * agent's prompt cache warm — only the trailing `extraMessages` and the response
20
+ * tokens are paid for; the rest of the prefix is served from cache.
21
+ */
22
+ export type RunInferenceFn = (extraMessages: LLMMessage[]) => Promise<Result<InferenceResponse, LLMError>>
13
23
 
14
24
  // ============================================================================
15
25
  // Message formatting for summarization
@@ -76,24 +86,44 @@ function formatToolInput(input: unknown): string {
76
86
  // ============================================================================
77
87
 
78
88
  export interface CompactionConfig {
79
- /** Model ID to use for summarization (required) */
80
- model: ModelId
81
- /** Token threshold to trigger compaction */
89
+ /**
90
+ * @deprecated No longer used. Summarization runs on the agent's own model via
91
+ * the auxiliary inference callback so the agent's prompt cache is reused.
92
+ * Kept in the interface so existing preset configs continue to type-check.
93
+ */
94
+ model?: ModelId
95
+ /** Token threshold to trigger compaction. */
82
96
  maxTokens: number
83
- /** Number of recent messages to keep uncompacted */
97
+ /** Number of recent messages to keep uncompacted. */
84
98
  keepRecentMessages: number
85
- /** Max tokens for kept recent messages (whichever limit is hit first) */
99
+ /** Max tokens for kept recent messages (whichever limit is hit first). */
86
100
  keepRecentTokens?: number
87
- /** Target token count after compaction (informational) */
101
+ /** Target token count after compaction (informational). */
88
102
  targetTokens?: number
89
- /** System prompt for summarization */
103
+ /** Optional override for the trailing summarization instruction sent to the model. */
90
104
  summaryPrompt?: string
91
- /** Enable history offloading before compaction */
105
+ /** Enable history offloading before compaction. */
92
106
  offloadHistory?: boolean
93
- /** Path prefix for offloaded history (default: /session/.history/) */
107
+ /** Path prefix for offloaded history (default: /session/.history/). */
94
108
  historyPathPrefix?: string
95
109
  }
96
110
 
111
+ /**
112
+ * Trailing user-message instruction appended to the agent's full prefix when
113
+ * requesting a summary. The model sees its real system prompt, tools and full
114
+ * conversation, then this instruction last. Phrased to discourage tool calls
115
+ * — Sonnet-class models reliably emit a plain text response under this prompt.
116
+ */
117
+ export const DEFAULT_SUMMARY_INSTRUCTION =
118
+ '[CONTEXT COMPACTION REQUEST]\n'
119
+ + 'The conversation above is approaching the context budget. Produce a concise '
120
+ + 'summary (under 600 words) of everything discussed and decided so far. Cover: '
121
+ + 'completed tasks and their outcomes, key decisions and rationale, current state '
122
+ + 'of any in-progress work, important file paths or identifiers, and outstanding '
123
+ + 'questions.\n\n'
124
+ + 'Reply with plain text only. Do NOT call any tools. Do NOT acknowledge this '
125
+ + 'request — just emit the summary directly.'
126
+
97
127
  // ============================================================================
98
128
  // Compaction Result
99
129
  // ============================================================================
@@ -141,7 +171,6 @@ export interface HistoryOffloader {
141
171
 
142
172
  export class ContextCompactor {
143
173
  constructor(
144
- private readonly llmProvider: LLMProvider,
145
174
  private readonly logger: Logger,
146
175
  private readonly config: CompactionConfig,
147
176
  private readonly historyOffloader?: HistoryOffloader,
@@ -179,10 +208,17 @@ export class ContextCompactor {
179
208
  }
180
209
 
181
210
  /**
182
- * Check if compaction is needed based on token count.
211
+ * Check if compaction is needed.
212
+ *
213
+ * Prefers the provider-reported prompt token count from the previous turn
214
+ * (authoritative — comes straight from the model's tokenizer). Falls back
215
+ * to the in-process estimator when no previous metrics exist (first turn).
216
+ *
217
+ * The estimator under-counts JSON-heavy tool-result history by ~2x, so
218
+ * relying on it alone causes the trigger to never fire in long sessions.
183
219
  */
184
- needsCompaction(messages: LLMMessage[]): boolean {
185
- const tokens = estimateMessagesTokens(messages)
220
+ needsCompaction(messages: LLMMessage[], lastActualPromptTokens?: number): boolean {
221
+ const tokens = lastActualPromptTokens ?? estimateMessagesTokens(messages)
186
222
  return tokens > this.config.maxTokens
187
223
  }
188
224
 
@@ -194,33 +230,42 @@ export class ContextCompactor {
194
230
  sessionId: SessionId,
195
231
  agentId: AgentId,
196
232
  messages: LLMMessage[],
233
+ runInference: RunInferenceFn,
234
+ lastActualPromptTokens?: number,
197
235
  ): Promise<Result<CompactionResult | null, Error>> {
198
- if (!this.needsCompaction(messages)) {
236
+ if (!this.needsCompaction(messages, lastActualPromptTokens)) {
199
237
  return Ok(null)
200
238
  }
201
239
 
202
- return this.compact(sessionId, agentId, messages)
240
+ return this.compact(sessionId, agentId, messages, runInference, lastActualPromptTokens)
203
241
  }
204
242
 
205
243
  /**
206
- * Compact conversation history by summarizing older messages.
244
+ * Compact conversation history by asking the agent's own model to summarize
245
+ * the older portion. The summarization call reuses the agent's existing
246
+ * prompt cache via `runInference`, paying only for the trailing instruction
247
+ * (a few hundred tokens) and the summary output — not the whole conversation
248
+ * a second time.
207
249
  */
208
250
  async compact(
209
251
  sessionId: SessionId,
210
252
  agentId: AgentId,
211
253
  messages: LLMMessage[],
254
+ runInference: RunInferenceFn,
255
+ lastActualPromptTokens?: number,
212
256
  ): Promise<Result<CompactionResult, Error>> {
213
- const originalTokens = estimateMessagesTokens(messages)
257
+ const originalTokens = lastActualPromptTokens ?? estimateMessagesTokens(messages)
214
258
 
215
259
  this.logger.info('Starting context compaction', {
216
260
  sessionId,
217
261
  agentId,
218
262
  messageCount: messages.length,
219
- estimatedTokens: originalTokens,
263
+ originalTokens,
264
+ actualTokensReported: lastActualPromptTokens !== undefined,
220
265
  })
221
266
 
222
- // Split messages: keep recent, compact older
223
- // Respect both count limit and token budget (whichever is hit first)
267
+ // Split messages: keep recent, compact older.
268
+ // Respect both count limit and token budget (whichever is hit first).
224
269
  const keepCount = this.computeKeepCount(messages)
225
270
  const toCompact = messages.slice(0, messages.length - keepCount)
226
271
  const toKeep = messages.slice(messages.length - keepCount)
@@ -236,20 +281,16 @@ export class ContextCompactor {
236
281
  })
237
282
  }
238
283
 
239
- // Format messages for summarization
240
- const conversationText = toCompact
241
- .map(formatMessageForSummary)
242
- .join('\n\n')
243
-
244
- // Offload history if enabled
284
+ // Offload the dropped messages to disk for forensics / replay.
285
+ // Best-effort; failures are logged but don't block compaction.
245
286
  let historyPath: string | undefined
246
287
  if (this.config.offloadHistory && this.historyOffloader) {
247
288
  try {
289
+ const conversationText = toCompact.map(formatMessageForSummary).join('\n\n')
248
290
  const pathPrefix = this.config.historyPathPrefix ?? DEFAULT_HISTORY_PATH_PREFIX
249
291
  historyPath = await this.historyOffloader.offload(agentId, conversationText, pathPrefix)
250
292
  this.logger.info('History offloaded', { sessionId, agentId, historyPath })
251
293
  } catch (error) {
252
- // History offloading is best-effort, log and continue
253
294
  this.logger.warn('Failed to offload history', {
254
295
  sessionId,
255
296
  agentId,
@@ -258,18 +299,14 @@ export class ContextCompactor {
258
299
  }
259
300
  }
260
301
 
261
- // Generate summary using LLM
262
- const summaryResult = await this.llmProvider.inference({
263
- model: this.config.model,
264
- systemPrompt: this.config.summaryPrompt ?? CONTEXT_SUMMARY_PROMPT,
265
- messages: [
266
- {
267
- role: 'user',
268
- content: `Please summarize this conversation:\n\n${conversationText}`,
269
- },
270
- ],
271
- tools: [],
272
- })
302
+ // Inline summarization: append the instruction as a trailing user message
303
+ // and let the host run inference with the agent's full live prefix. The
304
+ // agent's prompt cache from the previous turn covers everything up to
305
+ // (but not including) this instruction.
306
+ const summaryInstruction = this.config.summaryPrompt ?? DEFAULT_SUMMARY_INSTRUCTION
307
+ const summaryResult = await runInference([
308
+ { role: 'user', content: summaryInstruction },
309
+ ])
273
310
 
274
311
  if (!summaryResult.ok) {
275
312
  const llmError = summaryResult.error
@@ -283,9 +320,17 @@ export class ContextCompactor {
283
320
 
284
321
  const summary = summaryResult.value.content ?? ''
285
322
 
286
- // Create summary message (with history reference if offloaded)
323
+ if (!summary.trim()) {
324
+ this.logger.warn('Summarization returned empty content', { sessionId, agentId })
325
+ return Err(new Error('Compaction failed: model returned empty summary'))
326
+ }
327
+
328
+ // Replace the compacted portion with a single user-role summary message.
329
+ // Using `user` role (not `system`) so the wrap reads as part of the
330
+ // conversation flow — Anthropic recommends user-role for arbitrary
331
+ // mid-conversation context blocks.
287
332
  const summaryMessage: LLMMessage = {
288
- role: 'system',
333
+ role: 'user',
289
334
  content: wrapContextSummary(summary, historyPath),
290
335
  }
291
336
 
@@ -10,36 +10,45 @@ import { type CompactionConfig, ContextCompactor, createContextCompactedEvent, t
10
10
  import { FileHistoryOffloader } from './history-offloader.js'
11
11
 
12
12
  /**
13
- * Plugin config — session-level compaction settings.
13
+ * Plugin config — session-level (default) compaction settings.
14
+ * Individual agents may override fields via `contextCompactPlugin.configureAgent({ ... })`.
14
15
  */
15
16
  export interface ContextCompactPluginConfig {
16
17
  compaction: CompactionConfig
17
18
  }
18
19
 
20
+ /**
21
+ * Per-agent override. Any field omitted falls back to the session-level config.
22
+ * Used for cases like "orchestrator gets a tighter 50k threshold while subagents
23
+ * keep the default 200k".
24
+ */
25
+ export type ContextCompactAgentConfig = Partial<CompactionConfig>
26
+
19
27
  export const contextCompactPlugin = definePlugin('context-compact')
20
28
  .pluginConfig<ContextCompactPluginConfig>()
29
+ .agentConfig<ContextCompactAgentConfig>()
21
30
  .context(async (ctx, pluginConfig) => {
22
31
  const historyOffloader: HistoryOffloader | undefined = pluginConfig.compaction.offloadHistory
23
32
  ? new FileHistoryOffloader(ctx.environment.sessionDir, ctx.platform.fs)
24
33
  : undefined
25
34
 
26
- const compactor = new ContextCompactor(
27
- ctx.llm,
28
- ctx.logger,
29
- pluginConfig.compaction,
30
- historyOffloader,
31
- )
32
-
33
- return { compactor }
35
+ return { historyOffloader, sessionConfig: pluginConfig.compaction }
34
36
  })
35
37
  .hook('beforeInference', async (ctx) => {
36
- const compactor = ctx.pluginContext.compactor
38
+ const { historyOffloader, sessionConfig } = ctx.pluginContext
39
+ const agentOverrides = ctx.pluginAgentConfig ?? {}
40
+ const effectiveConfig: CompactionConfig = { ...sessionConfig, ...agentOverrides }
41
+
42
+ const compactor = new ContextCompactor(ctx.logger, effectiveConfig, historyOffloader)
37
43
  const historyLLMMessages = ctx.agentState.conversationHistory
44
+ const lastActualPromptTokens = ctx.agentState.lastInferenceMetrics?.promptTokens
38
45
 
39
46
  const result = await compactor.compactIfNeeded(
40
47
  ctx.sessionId,
41
48
  ctx.agentId,
42
49
  historyLLMMessages,
50
+ ctx.runAuxiliaryInference,
51
+ lastActualPromptTokens,
43
52
  )
44
53
 
45
54
  if (result.ok && result.value !== null) {
@@ -22,6 +22,14 @@ beforeAll(() => {
22
22
  fs.writeFileSync(path.join(fixtureDir, 'hello.txt'), 'Hello, world!')
23
23
  fs.writeFileSync(path.join(fixtureDir, 'multiline.txt'), Array.from({ length: 20 }, (_, i) => `Line ${i + 1}`).join('\n'))
24
24
 
25
+ // Create a minimal 1x1 PNG for image tests
26
+ const onePixelPng = Buffer.from(
27
+ '89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c489000000'
28
+ + '0a49444154789c6300010000000500010d0a2db40000000049454e44ae426082',
29
+ 'hex',
30
+ )
31
+ fs.writeFileSync(path.join(fixtureDir, 'pixel.png'), onePixelPng)
32
+
25
33
  // Create subdirectory with files
26
34
  fs.mkdirSync(path.join(fixtureDir, 'subdir'), { recursive: true })
27
35
  fs.writeFileSync(path.join(fixtureDir, 'subdir', 'nested.txt'), 'Nested content')
@@ -154,6 +162,42 @@ describe('filesystem plugin', () => {
154
162
  await harness.shutdown()
155
163
  })
156
164
 
165
+ it('read image file → file:// URL uses agent-visible input path, not resolved real path', async () => {
166
+ // Regression: previously read_file returned file://<realPath>, which the
167
+ // sandboxed FileStore then rejected when re-resolving on the next inference
168
+ // (it only accepts agent-visible paths like /home/user/session/...). The URL
169
+ // must echo input.path so it stays resolvable through fileStore.realPath().
170
+ const filePath = path.join(fixtureDir, 'pixel.png')
171
+ const harness = createFsHarness({
172
+ presets: [createFsPreset()],
173
+ llmProvider: MockLLMProvider.withSequence([
174
+ {
175
+ toolCalls: [{
176
+ id: ToolCallId('tc1'),
177
+ name: 'read_file',
178
+ input: { path: filePath },
179
+ }],
180
+ },
181
+ { content: 'Done', toolCalls: [] },
182
+ ]),
183
+ })
184
+
185
+ const session = await harness.createSession('test')
186
+ await session.sendAndWaitForIdle('Read image')
187
+
188
+ const callHistory = harness.llmProvider.getCallHistory()
189
+ const toolMessages = callHistory[1].messages.filter((m) => m.role === 'tool')
190
+ expect(toolMessages).toHaveLength(1)
191
+ const content = toolMessages[0].content
192
+ expect(Array.isArray(content)).toBe(true)
193
+ const blocks = content as Array<{ type: string; text?: string; imageUrl?: { url: string } }>
194
+ const imageBlock = blocks.find((b) => b.type === 'image_url')
195
+ expect(imageBlock).toBeDefined()
196
+ expect(imageBlock?.imageUrl?.url).toBe(`file://${filePath}`)
197
+
198
+ await harness.shutdown()
199
+ })
200
+
157
201
  it('read a directory path → "is not a file" error', async () => {
158
202
  const dirPath = path.join(fixtureDir, 'subdir')
159
203
  const harness = createFsHarness({
@@ -158,16 +158,18 @@ export const filesystemPlugin = definePlugin('filesystem')
158
158
  })
159
159
  }
160
160
 
161
- // Image files → return as multimodal image content
161
+ // Image files → return as multimodal image content.
162
+ // Store the agent-visible input.path (not the resolved real path):
163
+ // the URL survives into conversationHistory and gets re-resolved
164
+ // via fileStore.realPath() on every subsequent inference. In
165
+ // sandboxed mode, realPath() rejects already-resolved disk paths
166
+ // (only accepts the virtual prefix), so storing realPath would
167
+ // surface as "[Image unavailable: …]" on every later turn.
162
168
  const mimeType = getImageMimeType(input.path)
163
169
  if (mimeType) {
164
- const realPathResult = fileStore.realPath(input.path)
165
- if (!realPathResult.ok) {
166
- return Err({ message: realPathResult.error, recoverable: false })
167
- }
168
170
  return Ok([
169
171
  { type: 'text', text: `Image: ${input.path} (${mimeType}, ${stats.size} bytes)` },
170
- { type: 'image_url', imageUrl: { url: `file://${realPathResult.value}` } },
172
+ { type: 'image_url', imageUrl: { url: `file://${input.path}` } },
171
173
  ])
172
174
  }
173
175
 
@@ -581,7 +581,7 @@ describe('mailbox plugin', () => {
581
581
  await harness.shutdown()
582
582
  })
583
583
 
584
- it('empty-stop LLM response → agent retries; persistent empty → onError reports to parent', async () => {
584
+ it('empty-stop LLM response → agent retries; persistent empty → coalesces to WAITING, no error', async () => {
585
585
  let workerCalls = 0
586
586
  let orchestratorCalls = 0
587
587
 
@@ -603,31 +603,25 @@ describe('mailbox plugin', () => {
603
603
  return { content: 'Done', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
604
604
  }
605
605
  workerCalls++
606
- // Always empty-stop → triggers retry until exhausted, then onError
606
+ // Always empty-stop → triggers retry until exhausted, then coalesces to WAITING
607
607
  return { content: null, toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
608
608
  },
609
609
  })
610
610
 
611
611
  const session = await harness.createSession('test')
612
- await session.sendMessage('Start')
613
-
614
- // Worker ends up errored (not idle); poll for the error message to parent.
615
- const deadline = Date.now() + 5000
616
- let errMsg: { message: { content: string; from: unknown } } | undefined
617
- while (Date.now() < deadline) {
618
- const events = await session.getEventsByType(mailboxEvents, 'mailbox_message')
619
- errMsg = events.find(e =>
620
- e.message.from === AgentId('worker_1')
621
- && typeof e.message.content === 'string'
622
- && e.message.content.startsWith('Agent encountered an error:'),
623
- )
624
- if (errMsg) break
625
- await new Promise((r) => setTimeout(r, 50))
626
- }
612
+ await session.sendAndWaitForIdle('Start')
627
613
 
628
614
  // Initial + 2 retries = 3 worker LLM calls
629
615
  expect(workerCalls).toBe(3)
630
- expect(errMsg).toBeDefined()
616
+
617
+ // No error message to parent — exhaustion coalesces to WAITING, not failure
618
+ const events = await session.getEventsByType(mailboxEvents, 'mailbox_message')
619
+ const errMsg = events.find(e =>
620
+ e.message.from === AgentId('worker_1')
621
+ && typeof e.message.content === 'string'
622
+ && e.message.content.startsWith('Agent encountered an error:'),
623
+ )
624
+ expect(errMsg).toBeUndefined()
631
625
 
632
626
  await harness.shutdown()
633
627
  })
@@ -112,7 +112,10 @@ export const resourcesPlugin = definePlugin('resources')
112
112
  await fs.writeFile(tempPath, input.fileBuffer)
113
113
 
114
114
  try {
115
- await exec('unzip', ['-o', '-q', tempPath, '-d', targetDir])
115
+ // `-x .git .git/*` so a stray .git entry in the ZIP can't overwrite the
116
+ // worktree's gitdir pointer (which silently breaks every subsequent git
117
+ // command in the workspace).
118
+ await exec('unzip', ['-o', '-q', tempPath, '-d', targetDir, '-x', '.git', '.git/*'])
116
119
  } catch (error) {
117
120
  const message = error instanceof Error ? error.message : String(error)
118
121
  // unzip returns exit code 1 for warnings — still usable
@@ -71,6 +71,7 @@ export class ImageClassifierPreprocessor implements Preprocessor {
71
71
  mimeType: string,
72
72
  ctx: PreprocessorContext,
73
73
  ): Promise<Result<PreprocessorResult, Error>> {
74
+ const totalStart = Date.now()
74
75
  try {
75
76
  // Check + stat image file
76
77
  if (!(await this.fs.exists(filePath))) {
@@ -88,15 +89,21 @@ export class ImageClassifierPreprocessor implements Preprocessor {
88
89
  }
89
90
 
90
91
  // Try vision inference
92
+ const inferenceStart = Date.now()
91
93
  const description = await this.describeImage(filePath, mimeType)
94
+ const inferenceDurationMs = Date.now() - inferenceStart
92
95
 
93
96
  if (description) {
94
97
  // Save description to file
95
98
  const writeResult = await ctx.files.write('description.txt', description)
96
99
 
97
- this.logger.debug('Image described successfully', {
100
+ this.logger.info('Image described successfully', {
98
101
  filename,
102
+ mimeType,
103
+ sizeBytes: size,
99
104
  descriptionLength: description.length,
105
+ inferenceDurationMs,
106
+ totalDurationMs: Date.now() - totalStart,
100
107
  })
101
108
 
102
109
  return Ok({
@@ -106,6 +113,12 @@ export class ImageClassifierPreprocessor implements Preprocessor {
106
113
  }
107
114
 
108
115
  // Fallback to basic metadata
116
+ this.logger.warn('Image description unavailable, falling back to metadata', {
117
+ filename,
118
+ mimeType,
119
+ sizeBytes: size,
120
+ inferenceDurationMs,
121
+ })
109
122
  return Ok({
110
123
  extractedContent: `[Image: ${filename}, ${this.formatSize(size)}, ${mimeType}]`,
111
124
  })
@@ -113,7 +126,7 @@ export class ImageClassifierPreprocessor implements Preprocessor {
113
126
  this.logger.error(
114
127
  'Image classification failed',
115
128
  error instanceof Error ? error : undefined,
116
- { filePath },
129
+ { filePath, durationMs: Date.now() - totalStart },
117
130
  )
118
131
 
119
132
  // Return basic info on error instead of failing