@roj-ai/sdk 0.1.14 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bootstrap.d.ts +1 -0
- package/dist/bootstrap.d.ts.map +1 -1
- package/dist/core/agents/agent.d.ts +25 -1
- package/dist/core/agents/agent.d.ts.map +1 -1
- package/dist/core/agents/agent.js +117 -21
- package/dist/core/agents/agent.js.map +1 -1
- package/dist/core/agents/config.d.ts +7 -0
- package/dist/core/agents/config.d.ts.map +1 -1
- package/dist/core/agents/context.d.ts +10 -0
- package/dist/core/agents/context.d.ts.map +1 -1
- package/dist/core/agents/state.d.ts +11 -3
- package/dist/core/agents/state.d.ts.map +1 -1
- package/dist/core/agents/state.js.map +1 -1
- package/dist/core/file-store/file-store.d.ts +5 -1
- package/dist/core/file-store/file-store.d.ts.map +1 -1
- package/dist/core/file-store/file-store.js +31 -21
- package/dist/core/file-store/file-store.js.map +1 -1
- package/dist/core/image/vips-resizer.test.js +26 -14
- package/dist/core/image/vips-resizer.test.js.map +1 -1
- package/dist/core/llm/anthropic.d.ts.map +1 -1
- package/dist/core/llm/anthropic.js +11 -8
- package/dist/core/llm/anthropic.js.map +1 -1
- package/dist/core/llm/cache-breakpoints.d.ts +5 -1
- package/dist/core/llm/cache-breakpoints.d.ts.map +1 -1
- package/dist/core/llm/cache-breakpoints.js +10 -5
- package/dist/core/llm/cache-breakpoints.js.map +1 -1
- package/dist/core/sessions/session.d.ts.map +1 -1
- package/dist/core/sessions/session.js +10 -0
- package/dist/core/sessions/session.js.map +1 -1
- package/dist/core/sessions/session.test.js +5 -0
- package/dist/core/sessions/session.test.js.map +1 -1
- package/dist/core/sessions/state.d.ts.map +1 -1
- package/dist/core/sessions/state.js +5 -1
- package/dist/core/sessions/state.js.map +1 -1
- package/dist/core/tools/executor.test.js +1 -0
- package/dist/core/tools/executor.test.js.map +1 -1
- package/dist/plugins/agent-status/plugin.d.ts.map +1 -1
- package/dist/plugins/agent-status/plugin.js +18 -26
- package/dist/plugins/agent-status/plugin.js.map +1 -1
- package/dist/plugins/context-compact/compaction-live.test.d.ts +17 -0
- package/dist/plugins/context-compact/compaction-live.test.d.ts.map +1 -0
- package/dist/plugins/context-compact/compaction-live.test.js +177 -0
- package/dist/plugins/context-compact/compaction-live.test.js.map +1 -0
- package/dist/plugins/context-compact/context-compact.integration.test.js +123 -3
- package/dist/plugins/context-compact/context-compact.integration.test.js.map +1 -1
- package/dist/plugins/context-compact/context-compactor.d.ts +47 -17
- package/dist/plugins/context-compact/context-compactor.d.ts.map +1 -1
- package/dist/plugins/context-compact/context-compactor.js +60 -36
- package/dist/plugins/context-compact/context-compactor.js.map +1 -1
- package/dist/plugins/context-compact/context-compactor.test.js +69 -103
- package/dist/plugins/context-compact/context-compactor.test.js.map +1 -1
- package/dist/plugins/context-compact/plugin.d.ts +9 -2
- package/dist/plugins/context-compact/plugin.d.ts.map +1 -1
- package/dist/plugins/context-compact/plugin.js +8 -4
- package/dist/plugins/context-compact/plugin.js.map +1 -1
- package/dist/plugins/filesystem/filesystem.integration.test.js +36 -0
- package/dist/plugins/filesystem/filesystem.integration.test.js.map +1 -1
- package/dist/plugins/filesystem/plugin.d.ts.map +1 -1
- package/dist/plugins/filesystem/plugin.js +8 -6
- package/dist/plugins/filesystem/plugin.js.map +1 -1
- package/dist/plugins/mailbox/mailbox.integration.test.js +9 -16
- package/dist/plugins/mailbox/mailbox.integration.test.js.map +1 -1
- package/dist/plugins/resources/plugin.d.ts.map +1 -1
- package/dist/plugins/resources/plugin.js +4 -1
- package/dist/plugins/resources/plugin.js.map +1 -1
- package/dist/plugins/user-chat/plugin.d.ts +2 -0
- package/dist/plugins/user-chat/plugin.d.ts.map +1 -1
- package/dist/plugins/user-chat/plugin.js +47 -3
- package/dist/plugins/user-chat/plugin.js.map +1 -1
- package/dist/plugins/user-chat/schema.d.ts +10 -0
- package/dist/plugins/user-chat/schema.d.ts.map +1 -1
- package/dist/plugins/user-chat/schema.js +1 -0
- package/dist/plugins/user-chat/schema.js.map +1 -1
- package/dist/plugins/user-chat/user-chat.integration.test.js +86 -0
- package/dist/plugins/user-chat/user-chat.integration.test.js.map +1 -1
- package/package.json +2 -2
- package/src/core/agents/agent.ts +134 -20
- package/src/core/agents/config.ts +7 -0
- package/src/core/agents/context.ts +11 -0
- package/src/core/agents/state.ts +11 -4
- package/src/core/file-store/file-store.ts +38 -18
- package/src/core/image/vips-resizer.test.ts +26 -15
- package/src/core/llm/anthropic.ts +19 -12
- package/src/core/llm/cache-breakpoints.ts +15 -6
- package/src/core/sessions/session.test.ts +6 -0
- package/src/core/sessions/session.ts +12 -0
- package/src/core/sessions/state.ts +5 -1
- package/src/core/tools/executor.test.ts +1 -0
- package/src/plugins/agent-status/plugin.ts +18 -25
- package/src/plugins/context-compact/compaction-live.test.ts +221 -0
- package/src/plugins/context-compact/context-compact.integration.test.ts +135 -3
- package/src/plugins/context-compact/context-compactor.test.ts +71 -110
- package/src/plugins/context-compact/context-compactor.ts +88 -43
- package/src/plugins/context-compact/plugin.ts +19 -10
- package/src/plugins/filesystem/filesystem.integration.test.ts +44 -0
- package/src/plugins/filesystem/plugin.ts +8 -6
- package/src/plugins/mailbox/mailbox.integration.test.ts +12 -18
- package/src/plugins/resources/plugin.ts +4 -1
- package/src/plugins/user-chat/plugin.ts +60 -3
- package/src/plugins/user-chat/schema.ts +10 -1
- package/src/plugins/user-chat/user-chat.integration.test.ts +99 -0
|
@@ -2,14 +2,24 @@ import type { AgentId } from '~/core/agents/schema.js'
|
|
|
2
2
|
import type { CompactedConversationMessage, ContextCompactedEvent } from '~/core/context/state.js'
|
|
3
3
|
import { contextEvents } from '~/core/context/state.js'
|
|
4
4
|
import { withSessionId } from '~/core/events/test-helpers.js'
|
|
5
|
-
import type {
|
|
5
|
+
import type { InferenceResponse, LLMError, LLMMessage } from '~/core/llm/provider.js'
|
|
6
6
|
import type { ModelId } from '~/core/llm/schema.js'
|
|
7
7
|
import { estimateMessagesTokens } from '~/core/llm/tokens.js'
|
|
8
8
|
import type { SessionId } from '~/core/sessions/schema.js'
|
|
9
9
|
import type { Result } from '~/lib/utils/result.js'
|
|
10
10
|
import { Err, Ok } from '~/lib/utils/result.js'
|
|
11
11
|
import type { Logger } from '../../lib/logger/logger.js'
|
|
12
|
-
import {
|
|
12
|
+
import { wrapContextSummary } from '../../prompts/index.js'
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Callback used by the compactor to ask the host (an Agent) to run a side-channel
|
|
16
|
+
* inference reusing its own system prompt, tools, and conversation prefix.
|
|
17
|
+
*
|
|
18
|
+
* Implemented in practice by AgentContext.runAuxiliaryInference, which keeps the
|
|
19
|
+
* agent's prompt cache warm — only the trailing `extraMessages` and the response
|
|
20
|
+
* tokens are paid for; the rest of the prefix is served from cache.
|
|
21
|
+
*/
|
|
22
|
+
export type RunInferenceFn = (extraMessages: LLMMessage[]) => Promise<Result<InferenceResponse, LLMError>>
|
|
13
23
|
|
|
14
24
|
// ============================================================================
|
|
15
25
|
// Message formatting for summarization
|
|
@@ -76,24 +86,44 @@ function formatToolInput(input: unknown): string {
|
|
|
76
86
|
// ============================================================================
|
|
77
87
|
|
|
78
88
|
export interface CompactionConfig {
|
|
79
|
-
/**
|
|
80
|
-
|
|
81
|
-
|
|
89
|
+
/**
|
|
90
|
+
* @deprecated No longer used. Summarization runs on the agent's own model via
|
|
91
|
+
* the auxiliary inference callback so the agent's prompt cache is reused.
|
|
92
|
+
* Kept in the interface so existing preset configs continue to type-check.
|
|
93
|
+
*/
|
|
94
|
+
model?: ModelId
|
|
95
|
+
/** Token threshold to trigger compaction. */
|
|
82
96
|
maxTokens: number
|
|
83
|
-
/** Number of recent messages to keep uncompacted */
|
|
97
|
+
/** Number of recent messages to keep uncompacted. */
|
|
84
98
|
keepRecentMessages: number
|
|
85
|
-
/** Max tokens for kept recent messages (whichever limit is hit first) */
|
|
99
|
+
/** Max tokens for kept recent messages (whichever limit is hit first). */
|
|
86
100
|
keepRecentTokens?: number
|
|
87
|
-
/** Target token count after compaction (informational) */
|
|
101
|
+
/** Target token count after compaction (informational). */
|
|
88
102
|
targetTokens?: number
|
|
89
|
-
/**
|
|
103
|
+
/** Optional override for the trailing summarization instruction sent to the model. */
|
|
90
104
|
summaryPrompt?: string
|
|
91
|
-
/** Enable history offloading before compaction */
|
|
105
|
+
/** Enable history offloading before compaction. */
|
|
92
106
|
offloadHistory?: boolean
|
|
93
|
-
/** Path prefix for offloaded history (default: /session/.history/) */
|
|
107
|
+
/** Path prefix for offloaded history (default: /session/.history/). */
|
|
94
108
|
historyPathPrefix?: string
|
|
95
109
|
}
|
|
96
110
|
|
|
111
|
+
/**
|
|
112
|
+
* Trailing user-message instruction appended to the agent's full prefix when
|
|
113
|
+
* requesting a summary. The model sees its real system prompt, tools and full
|
|
114
|
+
* conversation, then this instruction last. Phrased to discourage tool calls
|
|
115
|
+
* — Sonnet-class models reliably emit a plain text response under this prompt.
|
|
116
|
+
*/
|
|
117
|
+
export const DEFAULT_SUMMARY_INSTRUCTION =
|
|
118
|
+
'[CONTEXT COMPACTION REQUEST]\n'
|
|
119
|
+
+ 'The conversation above is approaching the context budget. Produce a concise '
|
|
120
|
+
+ 'summary (under 600 words) of everything discussed and decided so far. Cover: '
|
|
121
|
+
+ 'completed tasks and their outcomes, key decisions and rationale, current state '
|
|
122
|
+
+ 'of any in-progress work, important file paths or identifiers, and outstanding '
|
|
123
|
+
+ 'questions.\n\n'
|
|
124
|
+
+ 'Reply with plain text only. Do NOT call any tools. Do NOT acknowledge this '
|
|
125
|
+
+ 'request — just emit the summary directly.'
|
|
126
|
+
|
|
97
127
|
// ============================================================================
|
|
98
128
|
// Compaction Result
|
|
99
129
|
// ============================================================================
|
|
@@ -141,7 +171,6 @@ export interface HistoryOffloader {
|
|
|
141
171
|
|
|
142
172
|
export class ContextCompactor {
|
|
143
173
|
constructor(
|
|
144
|
-
private readonly llmProvider: LLMProvider,
|
|
145
174
|
private readonly logger: Logger,
|
|
146
175
|
private readonly config: CompactionConfig,
|
|
147
176
|
private readonly historyOffloader?: HistoryOffloader,
|
|
@@ -179,10 +208,17 @@ export class ContextCompactor {
|
|
|
179
208
|
}
|
|
180
209
|
|
|
181
210
|
/**
|
|
182
|
-
* Check if compaction is needed
|
|
211
|
+
* Check if compaction is needed.
|
|
212
|
+
*
|
|
213
|
+
* Prefers the provider-reported prompt token count from the previous turn
|
|
214
|
+
* (authoritative — comes straight from the model's tokenizer). Falls back
|
|
215
|
+
* to the in-process estimator when no previous metrics exist (first turn).
|
|
216
|
+
*
|
|
217
|
+
* The estimator under-counts JSON-heavy tool-result history by ~2x, so
|
|
218
|
+
* relying on it alone causes the trigger to never fire in long sessions.
|
|
183
219
|
*/
|
|
184
|
-
needsCompaction(messages: LLMMessage[]): boolean {
|
|
185
|
-
const tokens = estimateMessagesTokens(messages)
|
|
220
|
+
needsCompaction(messages: LLMMessage[], lastActualPromptTokens?: number): boolean {
|
|
221
|
+
const tokens = lastActualPromptTokens ?? estimateMessagesTokens(messages)
|
|
186
222
|
return tokens > this.config.maxTokens
|
|
187
223
|
}
|
|
188
224
|
|
|
@@ -194,33 +230,42 @@ export class ContextCompactor {
|
|
|
194
230
|
sessionId: SessionId,
|
|
195
231
|
agentId: AgentId,
|
|
196
232
|
messages: LLMMessage[],
|
|
233
|
+
runInference: RunInferenceFn,
|
|
234
|
+
lastActualPromptTokens?: number,
|
|
197
235
|
): Promise<Result<CompactionResult | null, Error>> {
|
|
198
|
-
if (!this.needsCompaction(messages)) {
|
|
236
|
+
if (!this.needsCompaction(messages, lastActualPromptTokens)) {
|
|
199
237
|
return Ok(null)
|
|
200
238
|
}
|
|
201
239
|
|
|
202
|
-
return this.compact(sessionId, agentId, messages)
|
|
240
|
+
return this.compact(sessionId, agentId, messages, runInference, lastActualPromptTokens)
|
|
203
241
|
}
|
|
204
242
|
|
|
205
243
|
/**
|
|
206
|
-
* Compact conversation history by
|
|
244
|
+
* Compact conversation history by asking the agent's own model to summarize
|
|
245
|
+
* the older portion. The summarization call reuses the agent's existing
|
|
246
|
+
* prompt cache via `runInference`, paying only for the trailing instruction
|
|
247
|
+
* (a few hundred tokens) and the summary output — not the whole conversation
|
|
248
|
+
* a second time.
|
|
207
249
|
*/
|
|
208
250
|
async compact(
|
|
209
251
|
sessionId: SessionId,
|
|
210
252
|
agentId: AgentId,
|
|
211
253
|
messages: LLMMessage[],
|
|
254
|
+
runInference: RunInferenceFn,
|
|
255
|
+
lastActualPromptTokens?: number,
|
|
212
256
|
): Promise<Result<CompactionResult, Error>> {
|
|
213
|
-
const originalTokens = estimateMessagesTokens(messages)
|
|
257
|
+
const originalTokens = lastActualPromptTokens ?? estimateMessagesTokens(messages)
|
|
214
258
|
|
|
215
259
|
this.logger.info('Starting context compaction', {
|
|
216
260
|
sessionId,
|
|
217
261
|
agentId,
|
|
218
262
|
messageCount: messages.length,
|
|
219
|
-
|
|
263
|
+
originalTokens,
|
|
264
|
+
actualTokensReported: lastActualPromptTokens !== undefined,
|
|
220
265
|
})
|
|
221
266
|
|
|
222
|
-
// Split messages: keep recent, compact older
|
|
223
|
-
// Respect both count limit and token budget (whichever is hit first)
|
|
267
|
+
// Split messages: keep recent, compact older.
|
|
268
|
+
// Respect both count limit and token budget (whichever is hit first).
|
|
224
269
|
const keepCount = this.computeKeepCount(messages)
|
|
225
270
|
const toCompact = messages.slice(0, messages.length - keepCount)
|
|
226
271
|
const toKeep = messages.slice(messages.length - keepCount)
|
|
@@ -236,20 +281,16 @@ export class ContextCompactor {
|
|
|
236
281
|
})
|
|
237
282
|
}
|
|
238
283
|
|
|
239
|
-
//
|
|
240
|
-
|
|
241
|
-
.map(formatMessageForSummary)
|
|
242
|
-
.join('\n\n')
|
|
243
|
-
|
|
244
|
-
// Offload history if enabled
|
|
284
|
+
// Offload the dropped messages to disk for forensics / replay.
|
|
285
|
+
// Best-effort; failures are logged but don't block compaction.
|
|
245
286
|
let historyPath: string | undefined
|
|
246
287
|
if (this.config.offloadHistory && this.historyOffloader) {
|
|
247
288
|
try {
|
|
289
|
+
const conversationText = toCompact.map(formatMessageForSummary).join('\n\n')
|
|
248
290
|
const pathPrefix = this.config.historyPathPrefix ?? DEFAULT_HISTORY_PATH_PREFIX
|
|
249
291
|
historyPath = await this.historyOffloader.offload(agentId, conversationText, pathPrefix)
|
|
250
292
|
this.logger.info('History offloaded', { sessionId, agentId, historyPath })
|
|
251
293
|
} catch (error) {
|
|
252
|
-
// History offloading is best-effort, log and continue
|
|
253
294
|
this.logger.warn('Failed to offload history', {
|
|
254
295
|
sessionId,
|
|
255
296
|
agentId,
|
|
@@ -258,18 +299,14 @@ export class ContextCompactor {
|
|
|
258
299
|
}
|
|
259
300
|
}
|
|
260
301
|
|
|
261
|
-
//
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
},
|
|
270
|
-
],
|
|
271
|
-
tools: [],
|
|
272
|
-
})
|
|
302
|
+
// Inline summarization: append the instruction as a trailing user message
|
|
303
|
+
// and let the host run inference with the agent's full live prefix. The
|
|
304
|
+
// agent's prompt cache from the previous turn covers everything up to
|
|
305
|
+
// (but not including) this instruction.
|
|
306
|
+
const summaryInstruction = this.config.summaryPrompt ?? DEFAULT_SUMMARY_INSTRUCTION
|
|
307
|
+
const summaryResult = await runInference([
|
|
308
|
+
{ role: 'user', content: summaryInstruction },
|
|
309
|
+
])
|
|
273
310
|
|
|
274
311
|
if (!summaryResult.ok) {
|
|
275
312
|
const llmError = summaryResult.error
|
|
@@ -283,9 +320,17 @@ export class ContextCompactor {
|
|
|
283
320
|
|
|
284
321
|
const summary = summaryResult.value.content ?? ''
|
|
285
322
|
|
|
286
|
-
|
|
323
|
+
if (!summary.trim()) {
|
|
324
|
+
this.logger.warn('Summarization returned empty content', { sessionId, agentId })
|
|
325
|
+
return Err(new Error('Compaction failed: model returned empty summary'))
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
// Replace the compacted portion with a single user-role summary message.
|
|
329
|
+
// Using `user` role (not `system`) so the wrap reads as part of the
|
|
330
|
+
// conversation flow — Anthropic recommends user-role for arbitrary
|
|
331
|
+
// mid-conversation context blocks.
|
|
287
332
|
const summaryMessage: LLMMessage = {
|
|
288
|
-
role: '
|
|
333
|
+
role: 'user',
|
|
289
334
|
content: wrapContextSummary(summary, historyPath),
|
|
290
335
|
}
|
|
291
336
|
|
|
@@ -10,36 +10,45 @@ import { type CompactionConfig, ContextCompactor, createContextCompactedEvent, t
|
|
|
10
10
|
import { FileHistoryOffloader } from './history-offloader.js'
|
|
11
11
|
|
|
12
12
|
/**
|
|
13
|
-
* Plugin config — session-level compaction settings.
|
|
13
|
+
* Plugin config — session-level (default) compaction settings.
|
|
14
|
+
* Individual agents may override fields via `contextCompactPlugin.configureAgent({ ... })`.
|
|
14
15
|
*/
|
|
15
16
|
export interface ContextCompactPluginConfig {
|
|
16
17
|
compaction: CompactionConfig
|
|
17
18
|
}
|
|
18
19
|
|
|
20
|
+
/**
|
|
21
|
+
* Per-agent override. Any field omitted falls back to the session-level config.
|
|
22
|
+
* Used for cases like "orchestrator gets a tighter 50k threshold while subagents
|
|
23
|
+
* keep the default 200k".
|
|
24
|
+
*/
|
|
25
|
+
export type ContextCompactAgentConfig = Partial<CompactionConfig>
|
|
26
|
+
|
|
19
27
|
export const contextCompactPlugin = definePlugin('context-compact')
|
|
20
28
|
.pluginConfig<ContextCompactPluginConfig>()
|
|
29
|
+
.agentConfig<ContextCompactAgentConfig>()
|
|
21
30
|
.context(async (ctx, pluginConfig) => {
|
|
22
31
|
const historyOffloader: HistoryOffloader | undefined = pluginConfig.compaction.offloadHistory
|
|
23
32
|
? new FileHistoryOffloader(ctx.environment.sessionDir, ctx.platform.fs)
|
|
24
33
|
: undefined
|
|
25
34
|
|
|
26
|
-
|
|
27
|
-
ctx.llm,
|
|
28
|
-
ctx.logger,
|
|
29
|
-
pluginConfig.compaction,
|
|
30
|
-
historyOffloader,
|
|
31
|
-
)
|
|
32
|
-
|
|
33
|
-
return { compactor }
|
|
35
|
+
return { historyOffloader, sessionConfig: pluginConfig.compaction }
|
|
34
36
|
})
|
|
35
37
|
.hook('beforeInference', async (ctx) => {
|
|
36
|
-
const
|
|
38
|
+
const { historyOffloader, sessionConfig } = ctx.pluginContext
|
|
39
|
+
const agentOverrides = ctx.pluginAgentConfig ?? {}
|
|
40
|
+
const effectiveConfig: CompactionConfig = { ...sessionConfig, ...agentOverrides }
|
|
41
|
+
|
|
42
|
+
const compactor = new ContextCompactor(ctx.logger, effectiveConfig, historyOffloader)
|
|
37
43
|
const historyLLMMessages = ctx.agentState.conversationHistory
|
|
44
|
+
const lastActualPromptTokens = ctx.agentState.lastInferenceMetrics?.promptTokens
|
|
38
45
|
|
|
39
46
|
const result = await compactor.compactIfNeeded(
|
|
40
47
|
ctx.sessionId,
|
|
41
48
|
ctx.agentId,
|
|
42
49
|
historyLLMMessages,
|
|
50
|
+
ctx.runAuxiliaryInference,
|
|
51
|
+
lastActualPromptTokens,
|
|
43
52
|
)
|
|
44
53
|
|
|
45
54
|
if (result.ok && result.value !== null) {
|
|
@@ -22,6 +22,14 @@ beforeAll(() => {
|
|
|
22
22
|
fs.writeFileSync(path.join(fixtureDir, 'hello.txt'), 'Hello, world!')
|
|
23
23
|
fs.writeFileSync(path.join(fixtureDir, 'multiline.txt'), Array.from({ length: 20 }, (_, i) => `Line ${i + 1}`).join('\n'))
|
|
24
24
|
|
|
25
|
+
// Create a minimal 1x1 PNG for image tests
|
|
26
|
+
const onePixelPng = Buffer.from(
|
|
27
|
+
'89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c489000000'
|
|
28
|
+
+ '0a49444154789c6300010000000500010d0a2db40000000049454e44ae426082',
|
|
29
|
+
'hex',
|
|
30
|
+
)
|
|
31
|
+
fs.writeFileSync(path.join(fixtureDir, 'pixel.png'), onePixelPng)
|
|
32
|
+
|
|
25
33
|
// Create subdirectory with files
|
|
26
34
|
fs.mkdirSync(path.join(fixtureDir, 'subdir'), { recursive: true })
|
|
27
35
|
fs.writeFileSync(path.join(fixtureDir, 'subdir', 'nested.txt'), 'Nested content')
|
|
@@ -154,6 +162,42 @@ describe('filesystem plugin', () => {
|
|
|
154
162
|
await harness.shutdown()
|
|
155
163
|
})
|
|
156
164
|
|
|
165
|
+
it('read image file → file:// URL uses agent-visible input path, not resolved real path', async () => {
|
|
166
|
+
// Regression: previously read_file returned file://<realPath>, which the
|
|
167
|
+
// sandboxed FileStore then rejected when re-resolving on the next inference
|
|
168
|
+
// (it only accepts agent-visible paths like /home/user/session/...). The URL
|
|
169
|
+
// must echo input.path so it stays resolvable through fileStore.realPath().
|
|
170
|
+
const filePath = path.join(fixtureDir, 'pixel.png')
|
|
171
|
+
const harness = createFsHarness({
|
|
172
|
+
presets: [createFsPreset()],
|
|
173
|
+
llmProvider: MockLLMProvider.withSequence([
|
|
174
|
+
{
|
|
175
|
+
toolCalls: [{
|
|
176
|
+
id: ToolCallId('tc1'),
|
|
177
|
+
name: 'read_file',
|
|
178
|
+
input: { path: filePath },
|
|
179
|
+
}],
|
|
180
|
+
},
|
|
181
|
+
{ content: 'Done', toolCalls: [] },
|
|
182
|
+
]),
|
|
183
|
+
})
|
|
184
|
+
|
|
185
|
+
const session = await harness.createSession('test')
|
|
186
|
+
await session.sendAndWaitForIdle('Read image')
|
|
187
|
+
|
|
188
|
+
const callHistory = harness.llmProvider.getCallHistory()
|
|
189
|
+
const toolMessages = callHistory[1].messages.filter((m) => m.role === 'tool')
|
|
190
|
+
expect(toolMessages).toHaveLength(1)
|
|
191
|
+
const content = toolMessages[0].content
|
|
192
|
+
expect(Array.isArray(content)).toBe(true)
|
|
193
|
+
const blocks = content as Array<{ type: string; text?: string; imageUrl?: { url: string } }>
|
|
194
|
+
const imageBlock = blocks.find((b) => b.type === 'image_url')
|
|
195
|
+
expect(imageBlock).toBeDefined()
|
|
196
|
+
expect(imageBlock?.imageUrl?.url).toBe(`file://${filePath}`)
|
|
197
|
+
|
|
198
|
+
await harness.shutdown()
|
|
199
|
+
})
|
|
200
|
+
|
|
157
201
|
it('read a directory path → "is not a file" error', async () => {
|
|
158
202
|
const dirPath = path.join(fixtureDir, 'subdir')
|
|
159
203
|
const harness = createFsHarness({
|
|
@@ -158,16 +158,18 @@ export const filesystemPlugin = definePlugin('filesystem')
|
|
|
158
158
|
})
|
|
159
159
|
}
|
|
160
160
|
|
|
161
|
-
// Image files → return as multimodal image content
|
|
161
|
+
// Image files → return as multimodal image content.
|
|
162
|
+
// Store the agent-visible input.path (not the resolved real path):
|
|
163
|
+
// the URL survives into conversationHistory and gets re-resolved
|
|
164
|
+
// via fileStore.realPath() on every subsequent inference. In
|
|
165
|
+
// sandboxed mode, realPath() rejects already-resolved disk paths
|
|
166
|
+
// (only accepts the virtual prefix), so storing realPath would
|
|
167
|
+
// surface as "[Image unavailable: …]" on every later turn.
|
|
162
168
|
const mimeType = getImageMimeType(input.path)
|
|
163
169
|
if (mimeType) {
|
|
164
|
-
const realPathResult = fileStore.realPath(input.path)
|
|
165
|
-
if (!realPathResult.ok) {
|
|
166
|
-
return Err({ message: realPathResult.error, recoverable: false })
|
|
167
|
-
}
|
|
168
170
|
return Ok([
|
|
169
171
|
{ type: 'text', text: `Image: ${input.path} (${mimeType}, ${stats.size} bytes)` },
|
|
170
|
-
{ type: 'image_url', imageUrl: { url: `file://${
|
|
172
|
+
{ type: 'image_url', imageUrl: { url: `file://${input.path}` } },
|
|
171
173
|
])
|
|
172
174
|
}
|
|
173
175
|
|
|
@@ -581,7 +581,7 @@ describe('mailbox plugin', () => {
|
|
|
581
581
|
await harness.shutdown()
|
|
582
582
|
})
|
|
583
583
|
|
|
584
|
-
it('empty-stop LLM response → agent retries; persistent empty →
|
|
584
|
+
it('empty-stop LLM response → agent retries; persistent empty → coalesces to WAITING, no error', async () => {
|
|
585
585
|
let workerCalls = 0
|
|
586
586
|
let orchestratorCalls = 0
|
|
587
587
|
|
|
@@ -603,31 +603,25 @@ describe('mailbox plugin', () => {
|
|
|
603
603
|
return { content: 'Done', toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
|
|
604
604
|
}
|
|
605
605
|
workerCalls++
|
|
606
|
-
// Always empty-stop → triggers retry until exhausted, then
|
|
606
|
+
// Always empty-stop → triggers retry until exhausted, then coalesces to WAITING
|
|
607
607
|
return { content: null, toolCalls: [], finishReason: 'stop', metrics: MockLLMProvider.defaultMetrics() }
|
|
608
608
|
},
|
|
609
609
|
})
|
|
610
610
|
|
|
611
611
|
const session = await harness.createSession('test')
|
|
612
|
-
await session.
|
|
613
|
-
|
|
614
|
-
// Worker ends up errored (not idle); poll for the error message to parent.
|
|
615
|
-
const deadline = Date.now() + 5000
|
|
616
|
-
let errMsg: { message: { content: string; from: unknown } } | undefined
|
|
617
|
-
while (Date.now() < deadline) {
|
|
618
|
-
const events = await session.getEventsByType(mailboxEvents, 'mailbox_message')
|
|
619
|
-
errMsg = events.find(e =>
|
|
620
|
-
e.message.from === AgentId('worker_1')
|
|
621
|
-
&& typeof e.message.content === 'string'
|
|
622
|
-
&& e.message.content.startsWith('Agent encountered an error:'),
|
|
623
|
-
)
|
|
624
|
-
if (errMsg) break
|
|
625
|
-
await new Promise((r) => setTimeout(r, 50))
|
|
626
|
-
}
|
|
612
|
+
await session.sendAndWaitForIdle('Start')
|
|
627
613
|
|
|
628
614
|
// Initial + 2 retries = 3 worker LLM calls
|
|
629
615
|
expect(workerCalls).toBe(3)
|
|
630
|
-
|
|
616
|
+
|
|
617
|
+
// No error message to parent — exhaustion coalesces to WAITING, not failure
|
|
618
|
+
const events = await session.getEventsByType(mailboxEvents, 'mailbox_message')
|
|
619
|
+
const errMsg = events.find(e =>
|
|
620
|
+
e.message.from === AgentId('worker_1')
|
|
621
|
+
&& typeof e.message.content === 'string'
|
|
622
|
+
&& e.message.content.startsWith('Agent encountered an error:'),
|
|
623
|
+
)
|
|
624
|
+
expect(errMsg).toBeUndefined()
|
|
631
625
|
|
|
632
626
|
await harness.shutdown()
|
|
633
627
|
})
|
|
@@ -112,7 +112,10 @@ export const resourcesPlugin = definePlugin('resources')
|
|
|
112
112
|
await fs.writeFile(tempPath, input.fileBuffer)
|
|
113
113
|
|
|
114
114
|
try {
|
|
115
|
-
|
|
115
|
+
// `-x .git .git/*` so a stray .git entry in the ZIP can't overwrite the
|
|
116
|
+
// worktree's gitdir pointer (which silently breaks every subsequent git
|
|
117
|
+
// command in the workspace).
|
|
118
|
+
await exec('unzip', ['-o', '-q', tempPath, '-d', targetDir, '-x', '.git', '.git/*'])
|
|
116
119
|
} catch (error) {
|
|
117
120
|
const message = error instanceof Error ? error.message : String(error)
|
|
118
121
|
// unzip returns exit code 1 for warnings — still usable
|
|
@@ -208,6 +208,12 @@ const askUserFlatInputSchema = z.object({
|
|
|
208
208
|
.optional()
|
|
209
209
|
.describe("Placeholder text for text input"),
|
|
210
210
|
multiline: z.boolean().optional().describe("Allow multiline text input"),
|
|
211
|
+
allowAttachments: z
|
|
212
|
+
.boolean()
|
|
213
|
+
.optional()
|
|
214
|
+
.describe(
|
|
215
|
+
"Render a file-attach control alongside the text field (text inputType only). Use when the answer needs to come with a file the user already has on hand — logo, brand PDF, source docs, supporting screenshots. Uploaded files reach you the same way as drag-drop attachments in plain chat.",
|
|
216
|
+
),
|
|
211
217
|
// rating options
|
|
212
218
|
min: z
|
|
213
219
|
.number()
|
|
@@ -248,6 +254,7 @@ function transformToAskUserInputType(
|
|
|
248
254
|
type: "text",
|
|
249
255
|
placeholder: input.placeholder,
|
|
250
256
|
multiline: input.multiline,
|
|
257
|
+
allowAttachments: input.allowAttachments,
|
|
251
258
|
};
|
|
252
259
|
case "confirm":
|
|
253
260
|
return {
|
|
@@ -303,6 +310,56 @@ function formatPendingForLLM(pending: PendingInboundMessage[]): string {
|
|
|
303
310
|
return parts.join("\n");
|
|
304
311
|
}
|
|
305
312
|
|
|
313
|
+
// Some models (notably routed through OpenRouter — Gemini/Llama/Qwen) emit
|
|
314
|
+
// non-ASCII tool argument strings as double-escaped JSON: the wire form is
|
|
315
|
+
// `"\\u0159"`, which JSON.parse turns into a literal 6-char `ř` instead
|
|
316
|
+
// of `ř`. The user then sees raw escape sequences in their UI. Applied only
|
|
317
|
+
// to user-facing display fields (question, placeholder, labels, message body)
|
|
318
|
+
// — never to identifiers like `option.value` (must round-trip back to the LLM
|
|
319
|
+
// unchanged) or to code-bearing inputs of other tools.
|
|
320
|
+
function decodeUnicodeEscapes(value: string): string;
|
|
321
|
+
function decodeUnicodeEscapes(value: string | undefined): string | undefined;
|
|
322
|
+
function decodeUnicodeEscapes(value: string | undefined): string | undefined {
|
|
323
|
+
if (value === undefined) return undefined;
|
|
324
|
+
if (!value.includes("\\u")) return value;
|
|
325
|
+
return value.replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) =>
|
|
326
|
+
String.fromCharCode(parseInt(hex, 16)),
|
|
327
|
+
);
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
function decodeAskUserDisplayStrings(input: AskUserInputType): AskUserInputType {
|
|
331
|
+
switch (input.type) {
|
|
332
|
+
case "text":
|
|
333
|
+
return { ...input, placeholder: decodeUnicodeEscapes(input.placeholder) };
|
|
334
|
+
case "single_choice":
|
|
335
|
+
case "multi_choice":
|
|
336
|
+
return {
|
|
337
|
+
...input,
|
|
338
|
+
options: input.options.map((o) => ({
|
|
339
|
+
...o,
|
|
340
|
+
label: decodeUnicodeEscapes(o.label),
|
|
341
|
+
description: decodeUnicodeEscapes(o.description),
|
|
342
|
+
})),
|
|
343
|
+
};
|
|
344
|
+
case "confirm":
|
|
345
|
+
return {
|
|
346
|
+
...input,
|
|
347
|
+
confirmLabel: decodeUnicodeEscapes(input.confirmLabel),
|
|
348
|
+
cancelLabel: decodeUnicodeEscapes(input.cancelLabel),
|
|
349
|
+
};
|
|
350
|
+
case "rating":
|
|
351
|
+
return input.labels
|
|
352
|
+
? {
|
|
353
|
+
...input,
|
|
354
|
+
labels: {
|
|
355
|
+
min: decodeUnicodeEscapes(input.labels.min),
|
|
356
|
+
max: decodeUnicodeEscapes(input.labels.max),
|
|
357
|
+
},
|
|
358
|
+
}
|
|
359
|
+
: input;
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
306
363
|
// ============================================================================
|
|
307
364
|
// Plugin
|
|
308
365
|
// ============================================================================
|
|
@@ -687,7 +744,7 @@ export const userChatPlugin = definePlugin("user-chat")
|
|
|
687
744
|
const format = input.format ?? "text";
|
|
688
745
|
const result = await ctx.self.tellUser({
|
|
689
746
|
agentId: context.agentId,
|
|
690
|
-
message: input.message,
|
|
747
|
+
message: decodeUnicodeEscapes(input.message),
|
|
691
748
|
format,
|
|
692
749
|
});
|
|
693
750
|
if (!result.ok)
|
|
@@ -710,8 +767,8 @@ export const userChatPlugin = definePlugin("user-chat")
|
|
|
710
767
|
const inputType = transformToAskUserInputType(input);
|
|
711
768
|
const result = await ctx.self.askQuestion({
|
|
712
769
|
agentId: context.agentId,
|
|
713
|
-
question: input.question,
|
|
714
|
-
inputType,
|
|
770
|
+
question: decodeUnicodeEscapes(input.question),
|
|
771
|
+
inputType: decodeAskUserDisplayStrings(inputType),
|
|
715
772
|
});
|
|
716
773
|
if (!result.ok)
|
|
717
774
|
return Err({ message: result.error.message, recoverable: false });
|
|
@@ -19,9 +19,17 @@ export type AskUserOption = {
|
|
|
19
19
|
|
|
20
20
|
/**
|
|
21
21
|
* Input type for ask_user tool - defines how the user should respond.
|
|
22
|
+
*
|
|
23
|
+
* `text.allowAttachments` opts the question into an inline file-attach control
|
|
24
|
+
* next to the text field — used when the agent needs the answer to come with a
|
|
25
|
+
* file (logo, brand PDF, supporting docs). Attachments piggy-back on the
|
|
26
|
+
* existing pending-attachments machinery: clients reuse `uploadFile()` /
|
|
27
|
+
* `pendingAttachments`, so the answer payload itself stays a plain string and
|
|
28
|
+
* the agent sees uploaded files via the normal `<attachment>` blocks in its
|
|
29
|
+
* inbox alongside the question answer.
|
|
22
30
|
*/
|
|
23
31
|
export type AskUserInputType =
|
|
24
|
-
| { type: 'text'; placeholder?: string; multiline?: boolean }
|
|
32
|
+
| { type: 'text'; placeholder?: string; multiline?: boolean; allowAttachments?: boolean }
|
|
25
33
|
| { type: 'single_choice'; options: AskUserOption[] }
|
|
26
34
|
| {
|
|
27
35
|
type: 'multi_choice'
|
|
@@ -47,6 +55,7 @@ export const askUserInputTypeSchema = z.discriminatedUnion('type', [
|
|
|
47
55
|
type: z.literal('text'),
|
|
48
56
|
placeholder: z.string().optional(),
|
|
49
57
|
multiline: z.boolean().optional(),
|
|
58
|
+
allowAttachments: z.boolean().optional(),
|
|
50
59
|
}),
|
|
51
60
|
z.object({
|
|
52
61
|
type: z.literal('single_choice'),
|
|
@@ -564,6 +564,105 @@ describe('user-chat plugin', () => {
|
|
|
564
564
|
})
|
|
565
565
|
})
|
|
566
566
|
|
|
567
|
+
// =========================================================================
|
|
568
|
+
// Unicode escape decoding (defensive fix for models that double-escape
|
|
569
|
+
// non-ASCII in tool argument JSON — see plugin.ts decodeUnicodeEscapes).
|
|
570
|
+
// =========================================================================
|
|
571
|
+
|
|
572
|
+
describe('unicode escape decoding in user-facing fields', () => {
|
|
573
|
+
it('ask_user (text) → literal \\uXXXX in question/placeholder is decoded', async () => {
|
|
574
|
+
const harness = new TestHarness({
|
|
575
|
+
presets: [createTestPreset()],
|
|
576
|
+
llmProvider: MockLLMProvider.withSequence([
|
|
577
|
+
{
|
|
578
|
+
toolCalls: [{
|
|
579
|
+
id: ToolCallId('tc1'),
|
|
580
|
+
name: 'ask_user',
|
|
581
|
+
input: {
|
|
582
|
+
question: 'Pro\\u010d ne?',
|
|
583
|
+
inputType: 'text',
|
|
584
|
+
placeholder: 'Nap\\u0159. ano',
|
|
585
|
+
},
|
|
586
|
+
}],
|
|
587
|
+
},
|
|
588
|
+
{ content: 'Done', toolCalls: [] },
|
|
589
|
+
]),
|
|
590
|
+
})
|
|
591
|
+
|
|
592
|
+
const session = await harness.createSession('test')
|
|
593
|
+
await session.sendAndWaitForIdle('Hi')
|
|
594
|
+
|
|
595
|
+
const askNotifications = harness.notifications.getByType('user-chat', 'askUser')
|
|
596
|
+
expect(askNotifications[0].payload).toMatchObject({
|
|
597
|
+
question: 'Proč ne?',
|
|
598
|
+
inputType: { type: 'text', placeholder: 'Např. ano' },
|
|
599
|
+
})
|
|
600
|
+
|
|
601
|
+
await harness.shutdown()
|
|
602
|
+
})
|
|
603
|
+
|
|
604
|
+
it('ask_user (single_choice) → option labels decoded, values left intact', async () => {
|
|
605
|
+
const harness = new TestHarness({
|
|
606
|
+
presets: [createTestPreset()],
|
|
607
|
+
llmProvider: MockLLMProvider.withSequence([
|
|
608
|
+
{
|
|
609
|
+
toolCalls: [{
|
|
610
|
+
id: ToolCallId('tc1'),
|
|
611
|
+
name: 'ask_user',
|
|
612
|
+
input: {
|
|
613
|
+
question: 'Pick',
|
|
614
|
+
inputType: 'single_choice',
|
|
615
|
+
options: [
|
|
616
|
+
{ value: 'kun_ze_\\u0159adu', label: 'K\\u016f\\u0148 ze \\u0159adu' },
|
|
617
|
+
],
|
|
618
|
+
},
|
|
619
|
+
}],
|
|
620
|
+
},
|
|
621
|
+
{ content: 'Done', toolCalls: [] },
|
|
622
|
+
]),
|
|
623
|
+
})
|
|
624
|
+
|
|
625
|
+
const session = await harness.createSession('test')
|
|
626
|
+
await session.sendAndWaitForIdle('Hi')
|
|
627
|
+
|
|
628
|
+
const askNotifications = harness.notifications.getByType('user-chat', 'askUser')
|
|
629
|
+
expect(askNotifications[0].payload).toMatchObject({
|
|
630
|
+
inputType: {
|
|
631
|
+
type: 'single_choice',
|
|
632
|
+
// Value preserved verbatim so answer round-trip stays consistent
|
|
633
|
+
// with what the LLM emitted.
|
|
634
|
+
options: [{ value: 'kun_ze_\\u0159adu', label: 'Kůň ze řadu' }],
|
|
635
|
+
},
|
|
636
|
+
})
|
|
637
|
+
|
|
638
|
+
await harness.shutdown()
|
|
639
|
+
})
|
|
640
|
+
|
|
641
|
+
it('tell_user → literal \\uXXXX in message is decoded', async () => {
|
|
642
|
+
const harness = new TestHarness({
|
|
643
|
+
presets: [createTestPreset()],
|
|
644
|
+
llmProvider: MockLLMProvider.withSequence([
|
|
645
|
+
{
|
|
646
|
+
toolCalls: [{
|
|
647
|
+
id: ToolCallId('tc1'),
|
|
648
|
+
name: 'tell_user',
|
|
649
|
+
input: { message: 'Ahoj sv\\u011bte' },
|
|
650
|
+
}],
|
|
651
|
+
},
|
|
652
|
+
{ content: 'Done', toolCalls: [] },
|
|
653
|
+
]),
|
|
654
|
+
})
|
|
655
|
+
|
|
656
|
+
const session = await harness.createSession('test')
|
|
657
|
+
await session.sendAndWaitForIdle('Hi')
|
|
658
|
+
|
|
659
|
+
const msgs = harness.notifications.getByType('user-chat', 'agentMessage')
|
|
660
|
+
expect(msgs[0].payload).toMatchObject({ content: 'Ahoj světe' })
|
|
661
|
+
|
|
662
|
+
await harness.shutdown()
|
|
663
|
+
})
|
|
664
|
+
})
|
|
665
|
+
|
|
567
666
|
// =========================================================================
|
|
568
667
|
// XML mode
|
|
569
668
|
// =========================================================================
|