@roj-ai/sdk 0.1.14 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/dist/bootstrap.d.ts +1 -0
  2. package/dist/bootstrap.d.ts.map +1 -1
  3. package/dist/core/agents/agent.d.ts +25 -1
  4. package/dist/core/agents/agent.d.ts.map +1 -1
  5. package/dist/core/agents/agent.js +117 -21
  6. package/dist/core/agents/agent.js.map +1 -1
  7. package/dist/core/agents/config.d.ts +7 -0
  8. package/dist/core/agents/config.d.ts.map +1 -1
  9. package/dist/core/agents/context.d.ts +10 -0
  10. package/dist/core/agents/context.d.ts.map +1 -1
  11. package/dist/core/agents/state.d.ts +11 -3
  12. package/dist/core/agents/state.d.ts.map +1 -1
  13. package/dist/core/agents/state.js.map +1 -1
  14. package/dist/core/file-store/file-store.d.ts +5 -1
  15. package/dist/core/file-store/file-store.d.ts.map +1 -1
  16. package/dist/core/file-store/file-store.js +31 -21
  17. package/dist/core/file-store/file-store.js.map +1 -1
  18. package/dist/core/image/vips-resizer.test.js +26 -14
  19. package/dist/core/image/vips-resizer.test.js.map +1 -1
  20. package/dist/core/llm/anthropic.d.ts.map +1 -1
  21. package/dist/core/llm/anthropic.js +11 -8
  22. package/dist/core/llm/anthropic.js.map +1 -1
  23. package/dist/core/llm/cache-breakpoints.d.ts +5 -1
  24. package/dist/core/llm/cache-breakpoints.d.ts.map +1 -1
  25. package/dist/core/llm/cache-breakpoints.js +10 -5
  26. package/dist/core/llm/cache-breakpoints.js.map +1 -1
  27. package/dist/core/sessions/session.d.ts.map +1 -1
  28. package/dist/core/sessions/session.js +10 -0
  29. package/dist/core/sessions/session.js.map +1 -1
  30. package/dist/core/sessions/session.test.js +5 -0
  31. package/dist/core/sessions/session.test.js.map +1 -1
  32. package/dist/core/sessions/state.d.ts.map +1 -1
  33. package/dist/core/sessions/state.js +5 -1
  34. package/dist/core/sessions/state.js.map +1 -1
  35. package/dist/core/tools/executor.test.js +1 -0
  36. package/dist/core/tools/executor.test.js.map +1 -1
  37. package/dist/plugins/agent-status/plugin.d.ts.map +1 -1
  38. package/dist/plugins/agent-status/plugin.js +18 -26
  39. package/dist/plugins/agent-status/plugin.js.map +1 -1
  40. package/dist/plugins/context-compact/compaction-live.test.d.ts +17 -0
  41. package/dist/plugins/context-compact/compaction-live.test.d.ts.map +1 -0
  42. package/dist/plugins/context-compact/compaction-live.test.js +177 -0
  43. package/dist/plugins/context-compact/compaction-live.test.js.map +1 -0
  44. package/dist/plugins/context-compact/context-compact.integration.test.js +123 -3
  45. package/dist/plugins/context-compact/context-compact.integration.test.js.map +1 -1
  46. package/dist/plugins/context-compact/context-compactor.d.ts +47 -17
  47. package/dist/plugins/context-compact/context-compactor.d.ts.map +1 -1
  48. package/dist/plugins/context-compact/context-compactor.js +60 -36
  49. package/dist/plugins/context-compact/context-compactor.js.map +1 -1
  50. package/dist/plugins/context-compact/context-compactor.test.js +69 -103
  51. package/dist/plugins/context-compact/context-compactor.test.js.map +1 -1
  52. package/dist/plugins/context-compact/plugin.d.ts +9 -2
  53. package/dist/plugins/context-compact/plugin.d.ts.map +1 -1
  54. package/dist/plugins/context-compact/plugin.js +8 -4
  55. package/dist/plugins/context-compact/plugin.js.map +1 -1
  56. package/dist/plugins/filesystem/filesystem.integration.test.js +36 -0
  57. package/dist/plugins/filesystem/filesystem.integration.test.js.map +1 -1
  58. package/dist/plugins/filesystem/plugin.d.ts.map +1 -1
  59. package/dist/plugins/filesystem/plugin.js +8 -6
  60. package/dist/plugins/filesystem/plugin.js.map +1 -1
  61. package/dist/plugins/mailbox/mailbox.integration.test.js +9 -16
  62. package/dist/plugins/mailbox/mailbox.integration.test.js.map +1 -1
  63. package/dist/plugins/resources/plugin.d.ts.map +1 -1
  64. package/dist/plugins/resources/plugin.js +4 -1
  65. package/dist/plugins/resources/plugin.js.map +1 -1
  66. package/dist/plugins/user-chat/plugin.d.ts +2 -0
  67. package/dist/plugins/user-chat/plugin.d.ts.map +1 -1
  68. package/dist/plugins/user-chat/plugin.js +47 -3
  69. package/dist/plugins/user-chat/plugin.js.map +1 -1
  70. package/dist/plugins/user-chat/schema.d.ts +10 -0
  71. package/dist/plugins/user-chat/schema.d.ts.map +1 -1
  72. package/dist/plugins/user-chat/schema.js +1 -0
  73. package/dist/plugins/user-chat/schema.js.map +1 -1
  74. package/dist/plugins/user-chat/user-chat.integration.test.js +86 -0
  75. package/dist/plugins/user-chat/user-chat.integration.test.js.map +1 -1
  76. package/package.json +2 -2
  77. package/src/core/agents/agent.ts +134 -20
  78. package/src/core/agents/config.ts +7 -0
  79. package/src/core/agents/context.ts +11 -0
  80. package/src/core/agents/state.ts +11 -4
  81. package/src/core/file-store/file-store.ts +38 -18
  82. package/src/core/image/vips-resizer.test.ts +26 -15
  83. package/src/core/llm/anthropic.ts +19 -12
  84. package/src/core/llm/cache-breakpoints.ts +15 -6
  85. package/src/core/sessions/session.test.ts +6 -0
  86. package/src/core/sessions/session.ts +12 -0
  87. package/src/core/sessions/state.ts +5 -1
  88. package/src/core/tools/executor.test.ts +1 -0
  89. package/src/plugins/agent-status/plugin.ts +18 -25
  90. package/src/plugins/context-compact/compaction-live.test.ts +221 -0
  91. package/src/plugins/context-compact/context-compact.integration.test.ts +135 -3
  92. package/src/plugins/context-compact/context-compactor.test.ts +71 -110
  93. package/src/plugins/context-compact/context-compactor.ts +88 -43
  94. package/src/plugins/context-compact/plugin.ts +19 -10
  95. package/src/plugins/filesystem/filesystem.integration.test.ts +44 -0
  96. package/src/plugins/filesystem/plugin.ts +8 -6
  97. package/src/plugins/mailbox/mailbox.integration.test.ts +12 -18
  98. package/src/plugins/resources/plugin.ts +4 -1
  99. package/src/plugins/user-chat/plugin.ts +60 -3
  100. package/src/plugins/user-chat/schema.ts +10 -1
  101. package/src/plugins/user-chat/user-chat.integration.test.ts +99 -0
@@ -2,14 +2,24 @@ import type { AgentId } from '~/core/agents/schema.js'
2
2
  import type { CompactedConversationMessage, ContextCompactedEvent } from '~/core/context/state.js'
3
3
  import { contextEvents } from '~/core/context/state.js'
4
4
  import { withSessionId } from '~/core/events/test-helpers.js'
5
- import type { LLMMessage, LLMProvider } from '~/core/llm/provider.js'
5
+ import type { InferenceResponse, LLMError, LLMMessage } from '~/core/llm/provider.js'
6
6
  import type { ModelId } from '~/core/llm/schema.js'
7
7
  import { estimateMessagesTokens } from '~/core/llm/tokens.js'
8
8
  import type { SessionId } from '~/core/sessions/schema.js'
9
9
  import type { Result } from '~/lib/utils/result.js'
10
10
  import { Err, Ok } from '~/lib/utils/result.js'
11
11
  import type { Logger } from '../../lib/logger/logger.js'
12
- import { CONTEXT_SUMMARY_PROMPT, wrapContextSummary } from '../../prompts/index.js'
12
+ import { wrapContextSummary } from '../../prompts/index.js'
13
+
14
+ /**
15
+ * Callback used by the compactor to ask the host (an Agent) to run a side-channel
16
+ * inference reusing its own system prompt, tools, and conversation prefix.
17
+ *
18
+ * Implemented in practice by AgentContext.runAuxiliaryInference, which keeps the
19
+ * agent's prompt cache warm — only the trailing `extraMessages` and the response
20
+ * tokens are paid for; the rest of the prefix is served from cache.
21
+ */
22
+ export type RunInferenceFn = (extraMessages: LLMMessage[]) => Promise<Result<InferenceResponse, LLMError>>
13
23
 
14
24
  // ============================================================================
15
25
  // Message formatting for summarization
@@ -76,24 +86,44 @@ function formatToolInput(input: unknown): string {
76
86
  // ============================================================================
77
87
 
78
88
  export interface CompactionConfig {
79
- /** Model ID to use for summarization (required) */
80
- model: ModelId
81
- /** Token threshold to trigger compaction */
89
+ /**
90
+ * @deprecated No longer used. Summarization runs on the agent's own model via
91
+ * the auxiliary inference callback so the agent's prompt cache is reused.
92
+ * Kept in the interface so existing preset configs continue to type-check.
93
+ */
94
+ model?: ModelId
95
+ /** Token threshold to trigger compaction. */
82
96
  maxTokens: number
83
- /** Number of recent messages to keep uncompacted */
97
+ /** Number of recent messages to keep uncompacted. */
84
98
  keepRecentMessages: number
85
- /** Max tokens for kept recent messages (whichever limit is hit first) */
99
+ /** Max tokens for kept recent messages (whichever limit is hit first). */
86
100
  keepRecentTokens?: number
87
- /** Target token count after compaction (informational) */
101
+ /** Target token count after compaction (informational). */
88
102
  targetTokens?: number
89
- /** System prompt for summarization */
103
+ /** Optional override for the trailing summarization instruction sent to the model. */
90
104
  summaryPrompt?: string
91
- /** Enable history offloading before compaction */
105
+ /** Enable history offloading before compaction. */
92
106
  offloadHistory?: boolean
93
- /** Path prefix for offloaded history (default: /session/.history/) */
107
+ /** Path prefix for offloaded history (default: /session/.history/). */
94
108
  historyPathPrefix?: string
95
109
  }
96
110
 
111
+ /**
112
+ * Trailing user-message instruction appended to the agent's full prefix when
113
+ * requesting a summary. The model sees its real system prompt, tools and full
114
+ * conversation, then this instruction last. Phrased to discourage tool calls
115
+ * — Sonnet-class models reliably emit a plain text response under this prompt.
116
+ */
117
+ export const DEFAULT_SUMMARY_INSTRUCTION =
118
+ '[CONTEXT COMPACTION REQUEST]\n'
119
+ + 'The conversation above is approaching the context budget. Produce a concise '
120
+ + 'summary (under 600 words) of everything discussed and decided so far. Cover: '
121
+ + 'completed tasks and their outcomes, key decisions and rationale, current state '
122
+ + 'of any in-progress work, important file paths or identifiers, and outstanding '
123
+ + 'questions.\n\n'
124
+ + 'Reply with plain text only. Do NOT call any tools. Do NOT acknowledge this '
125
+ + 'request — just emit the summary directly.'
126
+
97
127
  // ============================================================================
98
128
  // Compaction Result
99
129
  // ============================================================================
@@ -141,7 +171,6 @@ export interface HistoryOffloader {
141
171
 
142
172
  export class ContextCompactor {
143
173
  constructor(
144
- private readonly llmProvider: LLMProvider,
145
174
  private readonly logger: Logger,
146
175
  private readonly config: CompactionConfig,
147
176
  private readonly historyOffloader?: HistoryOffloader,
@@ -179,10 +208,17 @@ export class ContextCompactor {
179
208
  }
180
209
 
181
210
  /**
182
- * Check if compaction is needed based on token count.
211
+ * Check if compaction is needed.
212
+ *
213
+ * Prefers the provider-reported prompt token count from the previous turn
214
+ * (authoritative — comes straight from the model's tokenizer). Falls back
215
+ * to the in-process estimator when no previous metrics exist (first turn).
216
+ *
217
+ * The estimator under-counts JSON-heavy tool-result history by ~2x, so
218
+ * relying on it alone causes the trigger to never fire in long sessions.
183
219
  */
184
- needsCompaction(messages: LLMMessage[]): boolean {
185
- const tokens = estimateMessagesTokens(messages)
220
+ needsCompaction(messages: LLMMessage[], lastActualPromptTokens?: number): boolean {
221
+ const tokens = lastActualPromptTokens ?? estimateMessagesTokens(messages)
186
222
  return tokens > this.config.maxTokens
187
223
  }
188
224
 
@@ -194,33 +230,42 @@ export class ContextCompactor {
194
230
  sessionId: SessionId,
195
231
  agentId: AgentId,
196
232
  messages: LLMMessage[],
233
+ runInference: RunInferenceFn,
234
+ lastActualPromptTokens?: number,
197
235
  ): Promise<Result<CompactionResult | null, Error>> {
198
- if (!this.needsCompaction(messages)) {
236
+ if (!this.needsCompaction(messages, lastActualPromptTokens)) {
199
237
  return Ok(null)
200
238
  }
201
239
 
202
- return this.compact(sessionId, agentId, messages)
240
+ return this.compact(sessionId, agentId, messages, runInference, lastActualPromptTokens)
203
241
  }
204
242
 
205
243
  /**
206
- * Compact conversation history by summarizing older messages.
244
+ * Compact conversation history by asking the agent's own model to summarize
245
+ * the older portion. The summarization call reuses the agent's existing
246
+ * prompt cache via `runInference`, paying only for the trailing instruction
247
+ * (a few hundred tokens) and the summary output — not the whole conversation
248
+ * a second time.
207
249
  */
208
250
  async compact(
209
251
  sessionId: SessionId,
210
252
  agentId: AgentId,
211
253
  messages: LLMMessage[],
254
+ runInference: RunInferenceFn,
255
+ lastActualPromptTokens?: number,
212
256
  ): Promise<Result<CompactionResult, Error>> {
213
- const originalTokens = estimateMessagesTokens(messages)
257
+ const originalTokens = lastActualPromptTokens ?? estimateMessagesTokens(messages)
214
258
 
215
259
  this.logger.info('Starting context compaction', {
216
260
  sessionId,
217
261
  agentId,
218
262
  messageCount: messages.length,
219
- estimatedTokens: originalTokens,
263
+ originalTokens,
264
+ actualTokensReported: lastActualPromptTokens !== undefined,
220
265
  })
221
266
 
222
- // Split messages: keep recent, compact older
223
- // Respect both count limit and token budget (whichever is hit first)
267
+ // Split messages: keep recent, compact older.
268
+ // Respect both count limit and token budget (whichever is hit first).
224
269
  const keepCount = this.computeKeepCount(messages)
225
270
  const toCompact = messages.slice(0, messages.length - keepCount)
226
271
  const toKeep = messages.slice(messages.length - keepCount)
@@ -236,20 +281,16 @@ export class ContextCompactor {
236
281
  })
237
282
  }
238
283
 
239
- // Format messages for summarization
240
- const conversationText = toCompact
241
- .map(formatMessageForSummary)
242
- .join('\n\n')
243
-
244
- // Offload history if enabled
284
+ // Offload the dropped messages to disk for forensics / replay.
285
+ // Best-effort; failures are logged but don't block compaction.
245
286
  let historyPath: string | undefined
246
287
  if (this.config.offloadHistory && this.historyOffloader) {
247
288
  try {
289
+ const conversationText = toCompact.map(formatMessageForSummary).join('\n\n')
248
290
  const pathPrefix = this.config.historyPathPrefix ?? DEFAULT_HISTORY_PATH_PREFIX
249
291
  historyPath = await this.historyOffloader.offload(agentId, conversationText, pathPrefix)
250
292
  this.logger.info('History offloaded', { sessionId, agentId, historyPath })
251
293
  } catch (error) {
252
- // History offloading is best-effort, log and continue
253
294
  this.logger.warn('Failed to offload history', {
254
295
  sessionId,
255
296
  agentId,
@@ -258,18 +299,14 @@ export class ContextCompactor {
258
299
  }
259
300
  }
260
301
 
261
- // Generate summary using LLM
262
- const summaryResult = await this.llmProvider.inference({
263
- model: this.config.model,
264
- systemPrompt: this.config.summaryPrompt ?? CONTEXT_SUMMARY_PROMPT,
265
- messages: [
266
- {
267
- role: 'user',
268
- content: `Please summarize this conversation:\n\n${conversationText}`,
269
- },
270
- ],
271
- tools: [],
272
- })
302
+ // Inline summarization: append the instruction as a trailing user message
303
+ // and let the host run inference with the agent's full live prefix. The
304
+ // agent's prompt cache from the previous turn covers everything up to
305
+ // (but not including) this instruction.
306
+ const summaryInstruction = this.config.summaryPrompt ?? DEFAULT_SUMMARY_INSTRUCTION
307
+ const summaryResult = await runInference([
308
+ { role: 'user', content: summaryInstruction },
309
+ ])
273
310
 
274
311
  if (!summaryResult.ok) {
275
312
  const llmError = summaryResult.error
@@ -283,9 +320,17 @@ export class ContextCompactor {
283
320
 
284
321
  const summary = summaryResult.value.content ?? ''
285
322
 
286
- // Create summary message (with history reference if offloaded)
323
+ if (!summary.trim()) {
324
+ this.logger.warn('Summarization returned empty content', { sessionId, agentId })
325
+ return Err(new Error('Compaction failed: model returned empty summary'))
326
+ }
327
+
328
+ // Replace the compacted portion with a single user-role summary message.
329
+ // Using `user` role (not `system`) so the wrap reads as part of the
330
+ // conversation flow — Anthropic recommends user-role for arbitrary
331
+ // mid-conversation context blocks.
287
332
  const summaryMessage: LLMMessage = {
288
- role: 'system',
333
+ role: 'user',
289
334
  content: wrapContextSummary(summary, historyPath),
290
335
  }
291
336
 
@@ -10,36 +10,45 @@ import { type CompactionConfig, ContextCompactor, createContextCompactedEvent, t
10
10
  import { FileHistoryOffloader } from './history-offloader.js'
11
11
 
12
12
  /**
13
- * Plugin config — session-level compaction settings.
13
+ * Plugin config — session-level (default) compaction settings.
14
+ * Individual agents may override fields via `contextCompactPlugin.configureAgent({ ... })`.
14
15
  */
15
16
  export interface ContextCompactPluginConfig {
16
17
  compaction: CompactionConfig
17
18
  }
18
19
 
20
+ /**
21
+ * Per-agent override. Any field omitted falls back to the session-level config.
22
+ * Used for cases like "orchestrator gets a tighter 50k threshold while subagents
23
+ * keep the default 200k".
24
+ */
25
+ export type ContextCompactAgentConfig = Partial<CompactionConfig>
26
+
19
27
  export const contextCompactPlugin = definePlugin('context-compact')
20
28
  .pluginConfig<ContextCompactPluginConfig>()
29
+ .agentConfig<ContextCompactAgentConfig>()
21
30
  .context(async (ctx, pluginConfig) => {
22
31
  const historyOffloader: HistoryOffloader | undefined = pluginConfig.compaction.offloadHistory
23
32
  ? new FileHistoryOffloader(ctx.environment.sessionDir, ctx.platform.fs)
24
33
  : undefined
25
34
 
26
- const compactor = new ContextCompactor(
27
- ctx.llm,
28
- ctx.logger,
29
- pluginConfig.compaction,
30
- historyOffloader,
31
- )
32
-
33
- return { compactor }
35
+ return { historyOffloader, sessionConfig: pluginConfig.compaction }
34
36
  })
35
37
  .hook('beforeInference', async (ctx) => {
36
- const compactor = ctx.pluginContext.compactor
38
+ const { historyOffloader, sessionConfig } = ctx.pluginContext
39
+ const agentOverrides = ctx.pluginAgentConfig ?? {}
40
+ const effectiveConfig: CompactionConfig = { ...sessionConfig, ...agentOverrides }
41
+
42
+ const compactor = new ContextCompactor(ctx.logger, effectiveConfig, historyOffloader)
37
43
  const historyLLMMessages = ctx.agentState.conversationHistory
44
+ const lastActualPromptTokens = ctx.agentState.lastInferenceMetrics?.promptTokens
38
45
 
39
46
  const result = await compactor.compactIfNeeded(
40
47
  ctx.sessionId,
41
48
  ctx.agentId,
42
49
  historyLLMMessages,
50
+ ctx.runAuxiliaryInference,
51
+ lastActualPromptTokens,
43
52
  )
44
53
 
45
54
  if (result.ok && result.value !== null) {
@@ -22,6 +22,14 @@ beforeAll(() => {
22
22
  fs.writeFileSync(path.join(fixtureDir, 'hello.txt'), 'Hello, world!')
23
23
  fs.writeFileSync(path.join(fixtureDir, 'multiline.txt'), Array.from({ length: 20 }, (_, i) => `Line ${i + 1}`).join('\n'))
24
24
 
25
+ // Create a minimal 1x1 PNG for image tests
26
+ const onePixelPng = Buffer.from(
27
+ '89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c489000000'
28
+ + '0a49444154789c6300010000000500010d0a2db40000000049454e44ae426082',
29
+ 'hex',
30
+ )
31
+ fs.writeFileSync(path.join(fixtureDir, 'pixel.png'), onePixelPng)
32
+
25
33
  // Create subdirectory with files
26
34
  fs.mkdirSync(path.join(fixtureDir, 'subdir'), { recursive: true })
27
35
  fs.writeFileSync(path.join(fixtureDir, 'subdir', 'nested.txt'), 'Nested content')
@@ -154,6 +162,42 @@ describe('filesystem plugin', () => {
154
162
  await harness.shutdown()
155
163
  })
156
164
 
165
+ it('read image file → file:// URL uses agent-visible input path, not resolved real path', async () => {
166
+ // Regression: previously read_file returned file://<realPath>, which the
167
+ // sandboxed FileStore then rejected when re-resolving on the next inference
168
+ // (it only accepts agent-visible paths like /home/user/session/...). The URL
169
+ // must echo input.path so it stays resolvable through fileStore.realPath().
170
+ const filePath = path.join(fixtureDir, 'pixel.png')
171
+ const harness = createFsHarness({
172
+ presets: [createFsPreset()],
173
+ llmProvider: MockLLMProvider.withSequence([
174
+ {
175
+ toolCalls: [{
176
+ id: ToolCallId('tc1'),
177
+ name: 'read_file',
178
+ input: { path: filePath },
179
+ }],
180
+ },
181
+ { content: 'Done', toolCalls: [] },
182
+ ]),
183
+ })
184
+
185
+ const session = await harness.createSession('test')
186
+ await session.sendAndWaitForIdle('Read image')
187
+
188
+ const callHistory = harness.llmProvider.getCallHistory()
189
+ const toolMessages = callHistory[1].messages.filter((m) => m.role === 'tool')
190
+ expect(toolMessages).toHaveLength(1)
191
+ const content = toolMessages[0].content
192
+ expect(Array.isArray(content)).toBe(true)
193
+ const blocks = content as Array<{ type: string; text?: string; imageUrl?: { url: string } }>
194
+ const imageBlock = blocks.find((b) => b.type === 'image_url')
195
+ expect(imageBlock).toBeDefined()
196
+ expect(imageBlock?.imageUrl?.url).toBe(`file://${filePath}`)
197
+
198
+ await harness.shutdown()
199
+ })
200
+
157
201
  it('read a directory path → "is not a file" error', async () => {
158
202
  const dirPath = path.join(fixtureDir, 'subdir')
159
203
  const harness = createFsHarness({
@@ -158,16 +158,18 @@ export const filesystemPlugin = definePlugin('filesystem')
158
158
  })
159
159
  }
160
160
 
161
- // Image files → return as multimodal image content
161
+ // Image files → return as multimodal image content.
162
+ // Store the agent-visible input.path (not the resolved real path):
163
+ // the URL survives into conversationHistory and gets re-resolved
164
+ // via fileStore.realPath() on every subsequent inference. In
165
+ // sandboxed mode, realPath() rejects already-resolved disk paths
166
+ // (only accepts the virtual prefix), so storing realPath would
167
+ // surface as "[Image unavailable: …]" on every later turn.
162
168
  const mimeType = getImageMimeType(input.path)
163
169
  if (mimeType) {
164
- const realPathResult = fileStore.realPath(input.path)
165
- if (!realPathResult.ok) {
166
- return Err({ message: realPathResult.error, recoverable: false })
167
- }
168
170
  return Ok([
169
171
  { type: 'text', text: `Image: ${input.path} (${mimeType}, ${stats.size} bytes)` },
170
- { type: 'image_url', imageUrl: { url: `file://${realPathResult.value}` } },
172
+ { type: 'image_url', imageUrl: { url: `file://${input.path}` } },
171
173
  ])
172
174
  }
173
175
 
@@ -581,7 +581,7 @@ describe('mailbox plugin', () => {
581
581
  await harness.shutdown()
582
582
  })
583
583
 
584
- it('empty-stop LLM response → agent retries; persistent empty → onError reports to parent', async () => {
584
+ it('empty-stop LLM response → agent retries; persistent empty → coalesces to WAITING, no error', async () => {
585
585
  let workerCalls = 0
586
586
  let orchestratorCalls = 0
587
587
 
@@ -603,31 +603,25 @@ describe('mailbox plugin', () => {
603
603
  return { content: 'Done', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
604
604
  }
605
605
  workerCalls++
606
- // Always empty-stop → triggers retry until exhausted, then onError
606
+ // Always empty-stop → triggers retry until exhausted, then coalesces to WAITING
607
607
  return { content: null, toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
608
608
  },
609
609
  })
610
610
 
611
611
  const session = await harness.createSession('test')
612
- await session.sendMessage('Start')
613
-
614
- // Worker ends up errored (not idle); poll for the error message to parent.
615
- const deadline = Date.now() + 5000
616
- let errMsg: { message: { content: string; from: unknown } } | undefined
617
- while (Date.now() < deadline) {
618
- const events = await session.getEventsByType(mailboxEvents, 'mailbox_message')
619
- errMsg = events.find(e =>
620
- e.message.from === AgentId('worker_1')
621
- && typeof e.message.content === 'string'
622
- && e.message.content.startsWith('Agent encountered an error:'),
623
- )
624
- if (errMsg) break
625
- await new Promise((r) => setTimeout(r, 50))
626
- }
612
+ await session.sendAndWaitForIdle('Start')
627
613
 
628
614
  // Initial + 2 retries = 3 worker LLM calls
629
615
  expect(workerCalls).toBe(3)
630
- expect(errMsg).toBeDefined()
616
+
617
+ // No error message to parent — exhaustion coalesces to WAITING, not failure
618
+ const events = await session.getEventsByType(mailboxEvents, 'mailbox_message')
619
+ const errMsg = events.find(e =>
620
+ e.message.from === AgentId('worker_1')
621
+ && typeof e.message.content === 'string'
622
+ && e.message.content.startsWith('Agent encountered an error:'),
623
+ )
624
+ expect(errMsg).toBeUndefined()
631
625
 
632
626
  await harness.shutdown()
633
627
  })
@@ -112,7 +112,10 @@ export const resourcesPlugin = definePlugin('resources')
112
112
  await fs.writeFile(tempPath, input.fileBuffer)
113
113
 
114
114
  try {
115
- await exec('unzip', ['-o', '-q', tempPath, '-d', targetDir])
115
+ // `-x .git .git/*` so a stray .git entry in the ZIP can't overwrite the
116
+ // worktree's gitdir pointer (which silently breaks every subsequent git
117
+ // command in the workspace).
118
+ await exec('unzip', ['-o', '-q', tempPath, '-d', targetDir, '-x', '.git', '.git/*'])
116
119
  } catch (error) {
117
120
  const message = error instanceof Error ? error.message : String(error)
118
121
  // unzip returns exit code 1 for warnings — still usable
@@ -208,6 +208,12 @@ const askUserFlatInputSchema = z.object({
208
208
  .optional()
209
209
  .describe("Placeholder text for text input"),
210
210
  multiline: z.boolean().optional().describe("Allow multiline text input"),
211
+ allowAttachments: z
212
+ .boolean()
213
+ .optional()
214
+ .describe(
215
+ "Render a file-attach control alongside the text field (text inputType only). Use when the answer needs to come with a file the user already has on hand — logo, brand PDF, source docs, supporting screenshots. Uploaded files reach you the same way as drag-drop attachments in plain chat.",
216
+ ),
211
217
  // rating options
212
218
  min: z
213
219
  .number()
@@ -248,6 +254,7 @@ function transformToAskUserInputType(
248
254
  type: "text",
249
255
  placeholder: input.placeholder,
250
256
  multiline: input.multiline,
257
+ allowAttachments: input.allowAttachments,
251
258
  };
252
259
  case "confirm":
253
260
  return {
@@ -303,6 +310,56 @@ function formatPendingForLLM(pending: PendingInboundMessage[]): string {
303
310
  return parts.join("\n");
304
311
  }
305
312
 
313
+ // Some models (notably routed through OpenRouter — Gemini/Llama/Qwen) emit
314
+ // non-ASCII tool argument strings as double-escaped JSON: the wire form is
315
+ // `"\\u0159"`, which JSON.parse turns into a literal 6-char `ř` instead
316
+ // of `ř`. The user then sees raw escape sequences in their UI. Applied only
317
+ // to user-facing display fields (question, placeholder, labels, message body)
318
+ // — never to identifiers like `option.value` (must round-trip back to the LLM
319
+ // unchanged) or to code-bearing inputs of other tools.
320
+ function decodeUnicodeEscapes(value: string): string;
321
+ function decodeUnicodeEscapes(value: string | undefined): string | undefined;
322
+ function decodeUnicodeEscapes(value: string | undefined): string | undefined {
323
+ if (value === undefined) return undefined;
324
+ if (!value.includes("\\u")) return value;
325
+ return value.replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) =>
326
+ String.fromCharCode(parseInt(hex, 16)),
327
+ );
328
+ }
329
+
330
+ function decodeAskUserDisplayStrings(input: AskUserInputType): AskUserInputType {
331
+ switch (input.type) {
332
+ case "text":
333
+ return { ...input, placeholder: decodeUnicodeEscapes(input.placeholder) };
334
+ case "single_choice":
335
+ case "multi_choice":
336
+ return {
337
+ ...input,
338
+ options: input.options.map((o) => ({
339
+ ...o,
340
+ label: decodeUnicodeEscapes(o.label),
341
+ description: decodeUnicodeEscapes(o.description),
342
+ })),
343
+ };
344
+ case "confirm":
345
+ return {
346
+ ...input,
347
+ confirmLabel: decodeUnicodeEscapes(input.confirmLabel),
348
+ cancelLabel: decodeUnicodeEscapes(input.cancelLabel),
349
+ };
350
+ case "rating":
351
+ return input.labels
352
+ ? {
353
+ ...input,
354
+ labels: {
355
+ min: decodeUnicodeEscapes(input.labels.min),
356
+ max: decodeUnicodeEscapes(input.labels.max),
357
+ },
358
+ }
359
+ : input;
360
+ }
361
+ }
362
+
306
363
  // ============================================================================
307
364
  // Plugin
308
365
  // ============================================================================
@@ -687,7 +744,7 @@ export const userChatPlugin = definePlugin("user-chat")
687
744
  const format = input.format ?? "text";
688
745
  const result = await ctx.self.tellUser({
689
746
  agentId: context.agentId,
690
- message: input.message,
747
+ message: decodeUnicodeEscapes(input.message),
691
748
  format,
692
749
  });
693
750
  if (!result.ok)
@@ -710,8 +767,8 @@ export const userChatPlugin = definePlugin("user-chat")
710
767
  const inputType = transformToAskUserInputType(input);
711
768
  const result = await ctx.self.askQuestion({
712
769
  agentId: context.agentId,
713
- question: input.question,
714
- inputType,
770
+ question: decodeUnicodeEscapes(input.question),
771
+ inputType: decodeAskUserDisplayStrings(inputType),
715
772
  });
716
773
  if (!result.ok)
717
774
  return Err({ message: result.error.message, recoverable: false });
@@ -19,9 +19,17 @@ export type AskUserOption = {
19
19
 
20
20
  /**
21
21
  * Input type for ask_user tool - defines how the user should respond.
22
+ *
23
+ * `text.allowAttachments` opts the question into an inline file-attach control
24
+ * next to the text field — used when the agent needs the answer to come with a
25
+ * file (logo, brand PDF, supporting docs). Attachments piggy-back on the
26
+ * existing pending-attachments machinery: clients reuse `uploadFile()` /
27
+ * `pendingAttachments`, so the answer payload itself stays a plain string and
28
+ * the agent sees uploaded files via the normal `<attachment>` blocks in its
29
+ * inbox alongside the question answer.
22
30
  */
23
31
  export type AskUserInputType =
24
- | { type: 'text'; placeholder?: string; multiline?: boolean }
32
+ | { type: 'text'; placeholder?: string; multiline?: boolean; allowAttachments?: boolean }
25
33
  | { type: 'single_choice'; options: AskUserOption[] }
26
34
  | {
27
35
  type: 'multi_choice'
@@ -47,6 +55,7 @@ export const askUserInputTypeSchema = z.discriminatedUnion('type', [
47
55
  type: z.literal('text'),
48
56
  placeholder: z.string().optional(),
49
57
  multiline: z.boolean().optional(),
58
+ allowAttachments: z.boolean().optional(),
50
59
  }),
51
60
  z.object({
52
61
  type: z.literal('single_choice'),
@@ -564,6 +564,105 @@ describe('user-chat plugin', () => {
564
564
  })
565
565
  })
566
566
 
567
+ // =========================================================================
568
+ // Unicode escape decoding (defensive fix for models that double-escape
569
+ // non-ASCII in tool argument JSON — see plugin.ts decodeUnicodeEscapes).
570
+ // =========================================================================
571
+
572
+ describe('unicode escape decoding in user-facing fields', () => {
573
+ it('ask_user (text) → literal \\uXXXX in question/placeholder is decoded', async () => {
574
+ const harness = new TestHarness({
575
+ presets: [createTestPreset()],
576
+ llmProvider: MockLLMProvider.withSequence([
577
+ {
578
+ toolCalls: [{
579
+ id: ToolCallId('tc1'),
580
+ name: 'ask_user',
581
+ input: {
582
+ question: 'Pro\\u010d ne?',
583
+ inputType: 'text',
584
+ placeholder: 'Nap\\u0159. ano',
585
+ },
586
+ }],
587
+ },
588
+ { content: 'Done', toolCalls: [] },
589
+ ]),
590
+ })
591
+
592
+ const session = await harness.createSession('test')
593
+ await session.sendAndWaitForIdle('Hi')
594
+
595
+ const askNotifications = harness.notifications.getByType('user-chat', 'askUser')
596
+ expect(askNotifications[0].payload).toMatchObject({
597
+ question: 'Proč ne?',
598
+ inputType: { type: 'text', placeholder: 'Např. ano' },
599
+ })
600
+
601
+ await harness.shutdown()
602
+ })
603
+
604
+ it('ask_user (single_choice) → option labels decoded, values left intact', async () => {
605
+ const harness = new TestHarness({
606
+ presets: [createTestPreset()],
607
+ llmProvider: MockLLMProvider.withSequence([
608
+ {
609
+ toolCalls: [{
610
+ id: ToolCallId('tc1'),
611
+ name: 'ask_user',
612
+ input: {
613
+ question: 'Pick',
614
+ inputType: 'single_choice',
615
+ options: [
616
+ { value: 'kun_ze_\\u0159adu', label: 'K\\u016f\\u0148 ze \\u0159adu' },
617
+ ],
618
+ },
619
+ }],
620
+ },
621
+ { content: 'Done', toolCalls: [] },
622
+ ]),
623
+ })
624
+
625
+ const session = await harness.createSession('test')
626
+ await session.sendAndWaitForIdle('Hi')
627
+
628
+ const askNotifications = harness.notifications.getByType('user-chat', 'askUser')
629
+ expect(askNotifications[0].payload).toMatchObject({
630
+ inputType: {
631
+ type: 'single_choice',
632
+ // Value preserved verbatim so answer round-trip stays consistent
633
+ // with what the LLM emitted.
634
+ options: [{ value: 'kun_ze_\\u0159adu', label: 'Kůň ze řadu' }],
635
+ },
636
+ })
637
+
638
+ await harness.shutdown()
639
+ })
640
+
641
+ it('tell_user → literal \\uXXXX in message is decoded', async () => {
642
+ const harness = new TestHarness({
643
+ presets: [createTestPreset()],
644
+ llmProvider: MockLLMProvider.withSequence([
645
+ {
646
+ toolCalls: [{
647
+ id: ToolCallId('tc1'),
648
+ name: 'tell_user',
649
+ input: { message: 'Ahoj sv\\u011bte' },
650
+ }],
651
+ },
652
+ { content: 'Done', toolCalls: [] },
653
+ ]),
654
+ })
655
+
656
+ const session = await harness.createSession('test')
657
+ await session.sendAndWaitForIdle('Hi')
658
+
659
+ const msgs = harness.notifications.getByType('user-chat', 'agentMessage')
660
+ expect(msgs[0].payload).toMatchObject({ content: 'Ahoj světe' })
661
+
662
+ await harness.shutdown()
663
+ })
664
+ })
665
+
567
666
  // =========================================================================
568
667
  // XML mode
569
668
  // =========================================================================