@roj-ai/sdk 0.1.14 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bootstrap.d.ts +1 -0
- package/dist/bootstrap.d.ts.map +1 -1
- package/dist/core/agents/agent.d.ts +25 -1
- package/dist/core/agents/agent.d.ts.map +1 -1
- package/dist/core/agents/agent.js +117 -21
- package/dist/core/agents/agent.js.map +1 -1
- package/dist/core/agents/config.d.ts +7 -0
- package/dist/core/agents/config.d.ts.map +1 -1
- package/dist/core/agents/context.d.ts +10 -0
- package/dist/core/agents/context.d.ts.map +1 -1
- package/dist/core/agents/state.d.ts +11 -3
- package/dist/core/agents/state.d.ts.map +1 -1
- package/dist/core/agents/state.js.map +1 -1
- package/dist/core/file-store/file-store.d.ts +5 -1
- package/dist/core/file-store/file-store.d.ts.map +1 -1
- package/dist/core/file-store/file-store.js +31 -21
- package/dist/core/file-store/file-store.js.map +1 -1
- package/dist/core/image/vips-resizer.test.js +26 -14
- package/dist/core/image/vips-resizer.test.js.map +1 -1
- package/dist/core/llm/anthropic.d.ts.map +1 -1
- package/dist/core/llm/anthropic.js +11 -8
- package/dist/core/llm/anthropic.js.map +1 -1
- package/dist/core/llm/cache-breakpoints.d.ts +5 -1
- package/dist/core/llm/cache-breakpoints.d.ts.map +1 -1
- package/dist/core/llm/cache-breakpoints.js +10 -5
- package/dist/core/llm/cache-breakpoints.js.map +1 -1
- package/dist/core/sessions/session.d.ts.map +1 -1
- package/dist/core/sessions/session.js +3 -0
- package/dist/core/sessions/session.js.map +1 -1
- package/dist/core/sessions/session.test.js +5 -0
- package/dist/core/sessions/session.test.js.map +1 -1
- package/dist/core/sessions/state.d.ts.map +1 -1
- package/dist/core/sessions/state.js +5 -1
- package/dist/core/sessions/state.js.map +1 -1
- package/dist/core/tools/executor.test.js +1 -0
- package/dist/core/tools/executor.test.js.map +1 -1
- package/dist/plugins/agent-status/plugin.d.ts.map +1 -1
- package/dist/plugins/agent-status/plugin.js +18 -26
- package/dist/plugins/agent-status/plugin.js.map +1 -1
- package/dist/plugins/context-compact/compaction-live.test.d.ts +17 -0
- package/dist/plugins/context-compact/compaction-live.test.d.ts.map +1 -0
- package/dist/plugins/context-compact/compaction-live.test.js +177 -0
- package/dist/plugins/context-compact/compaction-live.test.js.map +1 -0
- package/dist/plugins/context-compact/context-compact.integration.test.js +123 -3
- package/dist/plugins/context-compact/context-compact.integration.test.js.map +1 -1
- package/dist/plugins/context-compact/context-compactor.d.ts +47 -17
- package/dist/plugins/context-compact/context-compactor.d.ts.map +1 -1
- package/dist/plugins/context-compact/context-compactor.js +60 -36
- package/dist/plugins/context-compact/context-compactor.js.map +1 -1
- package/dist/plugins/context-compact/context-compactor.test.js +69 -103
- package/dist/plugins/context-compact/context-compactor.test.js.map +1 -1
- package/dist/plugins/context-compact/plugin.d.ts +9 -2
- package/dist/plugins/context-compact/plugin.d.ts.map +1 -1
- package/dist/plugins/context-compact/plugin.js +8 -4
- package/dist/plugins/context-compact/plugin.js.map +1 -1
- package/dist/plugins/filesystem/filesystem.integration.test.js +36 -0
- package/dist/plugins/filesystem/filesystem.integration.test.js.map +1 -1
- package/dist/plugins/filesystem/plugin.d.ts.map +1 -1
- package/dist/plugins/filesystem/plugin.js +8 -6
- package/dist/plugins/filesystem/plugin.js.map +1 -1
- package/dist/plugins/mailbox/mailbox.integration.test.js +9 -16
- package/dist/plugins/mailbox/mailbox.integration.test.js.map +1 -1
- package/dist/plugins/resources/plugin.d.ts.map +1 -1
- package/dist/plugins/resources/plugin.js +4 -1
- package/dist/plugins/resources/plugin.js.map +1 -1
- package/dist/plugins/uploads/preprocessors/image-classifier.d.ts.map +1 -1
- package/dist/plugins/uploads/preprocessors/image-classifier.js +15 -2
- package/dist/plugins/uploads/preprocessors/image-classifier.js.map +1 -1
- package/dist/plugins/uploads/preprocessors/markitdown-preprocessor.d.ts.map +1 -1
- package/dist/plugins/uploads/preprocessors/markitdown-preprocessor.js +72 -19
- package/dist/plugins/uploads/preprocessors/markitdown-preprocessor.js.map +1 -1
- package/dist/plugins/user-chat/plugin.d.ts +2 -0
- package/dist/plugins/user-chat/plugin.d.ts.map +1 -1
- package/dist/plugins/user-chat/plugin.js +47 -3
- package/dist/plugins/user-chat/plugin.js.map +1 -1
- package/dist/plugins/user-chat/schema.d.ts +10 -0
- package/dist/plugins/user-chat/schema.d.ts.map +1 -1
- package/dist/plugins/user-chat/schema.js +1 -0
- package/dist/plugins/user-chat/schema.js.map +1 -1
- package/dist/plugins/user-chat/user-chat.integration.test.js +86 -0
- package/dist/plugins/user-chat/user-chat.integration.test.js.map +1 -1
- package/package.json +2 -2
- package/src/core/agents/agent.ts +134 -20
- package/src/core/agents/config.ts +7 -0
- package/src/core/agents/context.ts +11 -0
- package/src/core/agents/state.ts +11 -4
- package/src/core/file-store/file-store.ts +38 -18
- package/src/core/image/vips-resizer.test.ts +26 -15
- package/src/core/llm/anthropic.ts +19 -12
- package/src/core/llm/cache-breakpoints.ts +15 -6
- package/src/core/sessions/session.test.ts +6 -0
- package/src/core/sessions/session.ts +4 -0
- package/src/core/sessions/state.ts +5 -1
- package/src/core/tools/executor.test.ts +1 -0
- package/src/plugins/agent-status/plugin.ts +18 -25
- package/src/plugins/context-compact/compaction-live.test.ts +221 -0
- package/src/plugins/context-compact/context-compact.integration.test.ts +135 -3
- package/src/plugins/context-compact/context-compactor.test.ts +71 -110
- package/src/plugins/context-compact/context-compactor.ts +88 -43
- package/src/plugins/context-compact/plugin.ts +19 -10
- package/src/plugins/filesystem/filesystem.integration.test.ts +44 -0
- package/src/plugins/filesystem/plugin.ts +8 -6
- package/src/plugins/mailbox/mailbox.integration.test.ts +12 -18
- package/src/plugins/resources/plugin.ts +4 -1
- package/src/plugins/uploads/preprocessors/image-classifier.ts +15 -2
- package/src/plugins/uploads/preprocessors/markitdown-preprocessor.ts +89 -20
- package/src/plugins/user-chat/plugin.ts +60 -3
- package/src/plugins/user-chat/schema.ts +10 -1
- package/src/plugins/user-chat/user-chat.integration.test.ts +99 -0
|
@@ -85,10 +85,15 @@ interface AnthropicErrorResponse {
|
|
|
85
85
|
// Request body types
|
|
86
86
|
// ============================================================================
|
|
87
87
|
|
|
88
|
+
interface AnthropicCacheControl {
|
|
89
|
+
type: 'ephemeral'
|
|
90
|
+
ttl?: '5m' | '1h'
|
|
91
|
+
}
|
|
92
|
+
|
|
88
93
|
interface AnthropicTextBlockParam {
|
|
89
94
|
type: 'text'
|
|
90
95
|
text: string
|
|
91
|
-
cache_control?:
|
|
96
|
+
cache_control?: AnthropicCacheControl
|
|
92
97
|
}
|
|
93
98
|
|
|
94
99
|
interface AnthropicImageBlockParam {
|
|
@@ -96,7 +101,7 @@ interface AnthropicImageBlockParam {
|
|
|
96
101
|
source:
|
|
97
102
|
| { type: 'base64'; media_type: string; data: string }
|
|
98
103
|
| { type: 'url'; url: string }
|
|
99
|
-
cache_control?:
|
|
104
|
+
cache_control?: AnthropicCacheControl
|
|
100
105
|
}
|
|
101
106
|
|
|
102
107
|
interface AnthropicToolUseBlockParam {
|
|
@@ -104,7 +109,7 @@ interface AnthropicToolUseBlockParam {
|
|
|
104
109
|
id: string
|
|
105
110
|
name: string
|
|
106
111
|
input: unknown
|
|
107
|
-
cache_control?:
|
|
112
|
+
cache_control?: AnthropicCacheControl
|
|
108
113
|
}
|
|
109
114
|
|
|
110
115
|
interface AnthropicToolResultBlockParam {
|
|
@@ -112,7 +117,7 @@ interface AnthropicToolResultBlockParam {
|
|
|
112
117
|
tool_use_id: string
|
|
113
118
|
content: string | Array<AnthropicTextBlockParam | AnthropicImageBlockParam>
|
|
114
119
|
is_error?: boolean
|
|
115
|
-
cache_control?:
|
|
120
|
+
cache_control?: AnthropicCacheControl
|
|
116
121
|
}
|
|
117
122
|
|
|
118
123
|
type AnthropicContentBlockParam =
|
|
@@ -127,19 +132,19 @@ interface AnthropicMessageParam {
|
|
|
127
132
|
}
|
|
128
133
|
|
|
129
134
|
/**
|
|
130
|
-
* Add `cache_control
|
|
131
|
-
*
|
|
132
|
-
*
|
|
133
|
-
*
|
|
135
|
+
* Add `cache_control` to the LAST content block of an AnthropicMessageParam,
|
|
136
|
+
* regardless of block type. Converts string content to a single text block
|
|
137
|
+
* first so the mark has a place to live. Mutates in place so the cache
|
|
138
|
+
* breakpoint survives subsequent `mergeConsecutiveMessages`.
|
|
134
139
|
*/
|
|
135
|
-
function applyCacheControlToLastBlock(msg: AnthropicMessageParam): void {
|
|
140
|
+
function applyCacheControlToLastBlock(msg: AnthropicMessageParam, cacheControl: AnthropicCacheControl): void {
|
|
136
141
|
if (typeof msg.content === 'string') {
|
|
137
|
-
msg.content = [{ type: 'text', text: msg.content, cache_control:
|
|
142
|
+
msg.content = [{ type: 'text', text: msg.content, cache_control: cacheControl }]
|
|
138
143
|
return
|
|
139
144
|
}
|
|
140
145
|
if (msg.content.length === 0) return
|
|
141
146
|
const lastIdx = msg.content.length - 1
|
|
142
|
-
msg.content[lastIdx] = { ...msg.content[lastIdx], cache_control:
|
|
147
|
+
msg.content[lastIdx] = { ...msg.content[lastIdx], cache_control: cacheControl }
|
|
143
148
|
}
|
|
144
149
|
|
|
145
150
|
interface AnthropicToolParam {
|
|
@@ -366,7 +371,9 @@ export class AnthropicProvider implements RoutableLLMProvider {
|
|
|
366
371
|
private async mapMessage(msg: LLMMessage, context?: InferenceContext): Promise<AnthropicMessageParam> {
|
|
367
372
|
const mapped = await this.mapMessageContent(msg, context)
|
|
368
373
|
if (msg.cacheControl) {
|
|
369
|
-
|
|
374
|
+
const cc: AnthropicCacheControl = { type: 'ephemeral' }
|
|
375
|
+
if (msg.cacheControl.ttl) cc.ttl = msg.cacheControl.ttl
|
|
376
|
+
applyCacheControlToLastBlock(mapped, cc)
|
|
370
377
|
}
|
|
371
378
|
return mapped
|
|
372
379
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { LLMMessage } from '~/core/agents/state.js'
|
|
1
|
+
import type { LLMMessage, LLMMessageCacheControl } from '~/core/agents/state.js'
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
4
|
* Mark the prompt cache breakpoint on a message list.
|
|
@@ -13,25 +13,34 @@ import type { LLMMessage } from '~/core/agents/state.js'
|
|
|
13
13
|
* Target index is `messages.length - 1 - uncachedSuffixCount`. The suffix is
|
|
14
14
|
* the tail of messages that must remain fresh (e.g. ephemeral session context
|
|
15
15
|
* rebuilt each inference).
|
|
16
|
+
*
|
|
17
|
+
* `ttl` opts into Anthropic's 1-hour cache tier (write cost 2× input, read
|
|
18
|
+
* still 0.1×). Useful for long-lived agents where the default 5-minute TTL
|
|
19
|
+
* would expire between user turns. Omit for the default 5-minute tier.
|
|
16
20
|
*/
|
|
17
|
-
export function applyCacheBreakpoint(
|
|
21
|
+
export function applyCacheBreakpoint(
|
|
22
|
+
messages: LLMMessage[],
|
|
23
|
+
uncachedSuffixCount: number,
|
|
24
|
+
ttl?: '5m' | '1h',
|
|
25
|
+
): LLMMessage[] {
|
|
18
26
|
const idx = messages.length - 1 - uncachedSuffixCount
|
|
19
27
|
if (idx < 0) return messages
|
|
20
28
|
|
|
29
|
+
const cacheControl: LLMMessageCacheControl = ttl ? { type: 'ephemeral', ttl } : { type: 'ephemeral' }
|
|
21
30
|
const target = messages[idx]
|
|
22
31
|
const result = [...messages]
|
|
23
32
|
switch (target.role) {
|
|
24
33
|
case 'user':
|
|
25
|
-
result[idx] = { ...target, cacheControl
|
|
34
|
+
result[idx] = { ...target, cacheControl }
|
|
26
35
|
break
|
|
27
36
|
case 'assistant':
|
|
28
|
-
result[idx] = { ...target, cacheControl
|
|
37
|
+
result[idx] = { ...target, cacheControl }
|
|
29
38
|
break
|
|
30
39
|
case 'system':
|
|
31
|
-
result[idx] = { ...target, cacheControl
|
|
40
|
+
result[idx] = { ...target, cacheControl }
|
|
32
41
|
break
|
|
33
42
|
case 'tool':
|
|
34
|
-
result[idx] = { ...target, cacheControl
|
|
43
|
+
result[idx] = { ...target, cacheControl }
|
|
35
44
|
break
|
|
36
45
|
}
|
|
37
46
|
return result
|
|
@@ -712,6 +712,8 @@ describe('applyEvent', () => {
|
|
|
712
712
|
expect(session.agents.get(agentId)!.pendingMessages).toHaveLength(2)
|
|
713
713
|
expect(session.agents.get(agentId)!.status).toBe('inferring')
|
|
714
714
|
|
|
715
|
+
const historyLenBeforeFailure = session.agents.get(agentId)!.conversationHistory.length
|
|
716
|
+
|
|
715
717
|
// 6. Inference fails
|
|
716
718
|
session = applyEvent(
|
|
717
719
|
session,
|
|
@@ -735,6 +737,10 @@ describe('applyEvent', () => {
|
|
|
735
737
|
expect(getAgentMailbox(selectMailboxState(session), agentId)[0].consumed).toBe(false)
|
|
736
738
|
// status is errored
|
|
737
739
|
expect(agent.status).toBe('errored')
|
|
740
|
+
// conversationHistory NOT extended — pendingMessages are dropped, not promoted.
|
|
741
|
+
// Otherwise tool results would appear both in history and in pendingToolResults,
|
|
742
|
+
// duplicating them on the next inference (Bedrock-style provider rejects 400).
|
|
743
|
+
expect(agent.conversationHistory).toHaveLength(historyLenBeforeFailure)
|
|
738
744
|
})
|
|
739
745
|
})
|
|
740
746
|
|
|
@@ -494,6 +494,7 @@ export class Session {
|
|
|
494
494
|
}
|
|
495
495
|
|
|
496
496
|
const result = await methodDef.handler(ctx, parsed.data)
|
|
497
|
+
|
|
497
498
|
return result
|
|
498
499
|
}
|
|
499
500
|
|
|
@@ -795,6 +796,7 @@ export class Session {
|
|
|
795
796
|
checkIntervalMs: orch.checkIntervalMs,
|
|
796
797
|
input: orch.input,
|
|
797
798
|
plugins: withServicePluginConfig(orch),
|
|
799
|
+
cacheTtl: orch.cacheTtl,
|
|
798
800
|
}
|
|
799
801
|
}
|
|
800
802
|
|
|
@@ -810,6 +812,7 @@ export class Session {
|
|
|
810
812
|
checkIntervalMs: comm.checkIntervalMs,
|
|
811
813
|
input: comm.input,
|
|
812
814
|
plugins: withServicePluginConfig(comm),
|
|
815
|
+
cacheTtl: comm.cacheTtl,
|
|
813
816
|
}
|
|
814
817
|
}
|
|
815
818
|
|
|
@@ -828,6 +831,7 @@ export class Session {
|
|
|
828
831
|
checkIntervalMs: agentDef.checkIntervalMs,
|
|
829
832
|
input: agentDef.input,
|
|
830
833
|
plugins: withServicePluginConfig(agentDef),
|
|
834
|
+
cacheTtl: agentDef.cacheTtl,
|
|
831
835
|
}
|
|
832
836
|
}
|
|
833
837
|
}
|
|
@@ -207,15 +207,19 @@ export const coreReducer = createTypedReducer(
|
|
|
207
207
|
pendingToolCalls: toolCalls,
|
|
208
208
|
pendingMessages: [],
|
|
209
209
|
pendingToolResults: [],
|
|
210
|
+
lastInferenceMetrics: event.metrics,
|
|
210
211
|
}
|
|
211
212
|
})
|
|
212
213
|
}
|
|
213
214
|
|
|
214
215
|
case 'inference_failed':
|
|
216
|
+
// Failure is a clean rollback: pendingMessages are dropped (not promoted to history)
|
|
217
|
+
// and pendingToolResults / mailbox tokens stay intact so the next inference
|
|
218
|
+
// rebuilds the same turn. Runtime must skip markConsumed on failure to preserve
|
|
219
|
+
// mailbox tokens — see runInference().
|
|
215
220
|
return updateAgent(state, event.agentId, (agent) => ({
|
|
216
221
|
...agent,
|
|
217
222
|
status: 'errored',
|
|
218
|
-
conversationHistory: [...agent.conversationHistory, ...agent.pendingMessages],
|
|
219
223
|
pendingMessages: [],
|
|
220
224
|
}))
|
|
221
225
|
|
|
@@ -51,6 +51,7 @@ const createTestContext = (): ToolContext => {
|
|
|
51
51
|
agentConfig: { systemPrompt: 'test', model: ModelId('test'), spawnableAgents: [] },
|
|
52
52
|
input: undefined,
|
|
53
53
|
parentId: null,
|
|
54
|
+
runAuxiliaryInference: async () => Err({ type: 'invalid_request', message: 'not implemented in test' }),
|
|
54
55
|
}
|
|
55
56
|
}
|
|
56
57
|
|
|
@@ -38,40 +38,33 @@ export const agentStatusPlugin = definePlugin('agent-status')
|
|
|
38
38
|
sessionId: ctx.sessionId,
|
|
39
39
|
agentId: ctx.agentId,
|
|
40
40
|
status: 'thinking',
|
|
41
|
+
definitionName: ctx.agentState.definitionName,
|
|
41
42
|
timestamp: Date.now(),
|
|
42
43
|
})
|
|
43
44
|
}
|
|
44
45
|
return null
|
|
45
46
|
})
|
|
46
47
|
.hook('onComplete', async (ctx) => {
|
|
47
|
-
|
|
48
|
-
ctx.
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
})
|
|
55
|
-
} else {
|
|
56
|
-
ctx.notify('agentStatus', {
|
|
57
|
-
sessionId: ctx.sessionId,
|
|
58
|
-
agentId: ctx.agentId,
|
|
59
|
-
status: 'idle',
|
|
60
|
-
timestamp: Date.now(),
|
|
61
|
-
})
|
|
62
|
-
}
|
|
48
|
+
ctx.notify('agentStatus', {
|
|
49
|
+
sessionId: ctx.sessionId,
|
|
50
|
+
agentId: ctx.agentId,
|
|
51
|
+
status: 'idle',
|
|
52
|
+
definitionName: ctx.agentState.definitionName,
|
|
53
|
+
timestamp: Date.now(),
|
|
54
|
+
})
|
|
63
55
|
return null
|
|
64
56
|
})
|
|
65
57
|
.hook('onError', async (ctx) => {
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
58
|
+
// Emit idle for both entry and sub-agents — without this the client's
|
|
59
|
+
// `activeAgents` map keeps the agent flagged thinking until reconnect,
|
|
60
|
+
// since the session-store no longer clears it on chat_message/ask_user.
|
|
61
|
+
ctx.notify('agentStatus', {
|
|
62
|
+
sessionId: ctx.sessionId,
|
|
63
|
+
agentId: ctx.agentId,
|
|
64
|
+
status: 'idle',
|
|
65
|
+
definitionName: ctx.agentState.definitionName,
|
|
66
|
+
timestamp: Date.now(),
|
|
67
|
+
})
|
|
75
68
|
return null
|
|
76
69
|
})
|
|
77
70
|
.build()
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Live compaction tests against the real Anthropic API.
|
|
3
|
+
*
|
|
4
|
+
* Opt-in: run with `LIVE_TESTS=1 ANTHROPIC_API_KEY=…`. Skipped otherwise so the
|
|
5
|
+
* default `bun test` run stays hermetic.
|
|
6
|
+
*
|
|
7
|
+
* These cover the two real-API claims of the context-compact rewrite (5c27ab7):
|
|
8
|
+
* 1. The auxiliary-inference call (used by `ContextCompactor`) reuses the
|
|
9
|
+
* agent's warm prompt cache — only the trailing instruction + output are
|
|
10
|
+
* paid for, not the whole conversation a second time.
|
|
11
|
+
* 2. `DEFAULT_SUMMARY_INSTRUCTION` actually elicits a plain-text summary from
|
|
12
|
+
* a real Sonnet/Haiku-class model (no tool calls, non-empty content), so
|
|
13
|
+
* the end-to-end `ContextCompactor.compact()` path produces a usable
|
|
14
|
+
* `[CONVERSATION SUMMARY]` block.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { describe, expect, test } from 'bun:test'
|
|
18
|
+
import { generateTestAgentId } from '~/core/agents/schema.js'
|
|
19
|
+
import type { ImageProcessor } from '~/core/image/types.js'
|
|
20
|
+
import type { LLMMessage } from '~/core/agents/state.js'
|
|
21
|
+
import { AnthropicProvider } from '~/core/llm/anthropic.js'
|
|
22
|
+
import { applyCacheBreakpoint } from '~/core/llm/cache-breakpoints.js'
|
|
23
|
+
import type { InferenceResponse, LLMError } from '~/core/llm/provider.js'
|
|
24
|
+
import { ModelId } from '~/core/llm/schema.js'
|
|
25
|
+
import { generateSessionId } from '~/core/sessions/schema.js'
|
|
26
|
+
import { silentLogger } from '~/lib/logger/logger.js'
|
|
27
|
+
import type { Result } from '~/lib/utils/result.js'
|
|
28
|
+
import {
|
|
29
|
+
ContextCompactor,
|
|
30
|
+
DEFAULT_SUMMARY_INSTRUCTION,
|
|
31
|
+
type RunInferenceFn,
|
|
32
|
+
} from './context-compactor.js'
|
|
33
|
+
|
|
34
|
+
const liveEnabled = process.env.LIVE_TESTS === '1'
|
|
35
|
+
const anthropicApiKey = liveEnabled ? process.env.ANTHROPIC_API_KEY : undefined
|
|
36
|
+
|
|
37
|
+
const describeLive = (name: string, apiKey: string | undefined, fn: () => void) => {
|
|
38
|
+
if (!apiKey) {
|
|
39
|
+
describe.skip(`${name} (skipped — API key missing)`, fn)
|
|
40
|
+
return
|
|
41
|
+
}
|
|
42
|
+
describe(name, fn)
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const noopImageProcessor: ImageProcessor = {
|
|
46
|
+
resolveContent: async (content) => content,
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const MODEL = ModelId('claude-haiku-4-5-20251001')
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Padded system prompt so the cacheable prefix comfortably exceeds Anthropic's
|
|
53
|
+
* 1024-token minimum for Haiku. Deterministic content so identical calls reuse
|
|
54
|
+
* the exact same prefix.
|
|
55
|
+
*/
|
|
56
|
+
const LARGE_SYSTEM_PROMPT = [
|
|
57
|
+
'You are a meticulous assistant helping a developer review a long conversation.',
|
|
58
|
+
'Always respond concisely and accurately. Never speculate beyond the data.',
|
|
59
|
+
'Follow these rules strictly:',
|
|
60
|
+
...Array.from(
|
|
61
|
+
{ length: 120 },
|
|
62
|
+
(_, i) => `- Rule ${i + 1}: When asked about topic ${i + 1}, prefer factual sources and decline to speculate.`,
|
|
63
|
+
),
|
|
64
|
+
'End of instructions.',
|
|
65
|
+
].join('\n')
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Deterministic multi-turn history padded well past 1024 tokens so cache writes
|
|
69
|
+
* and reads are unambiguous. Includes both user and assistant messages, which
|
|
70
|
+
* is the shape an inline-compaction call sees in practice.
|
|
71
|
+
*/
|
|
72
|
+
function buildLongHistory(): LLMMessage[] {
|
|
73
|
+
const history: LLMMessage[] = []
|
|
74
|
+
for (let i = 0; i < 8; i++) {
|
|
75
|
+
history.push({
|
|
76
|
+
role: 'user',
|
|
77
|
+
content:
|
|
78
|
+
`Turn ${i + 1} user: please analyze topic ${i + 1}. `
|
|
79
|
+
+ 'context detail '.repeat(30),
|
|
80
|
+
})
|
|
81
|
+
history.push({
|
|
82
|
+
role: 'assistant',
|
|
83
|
+
content:
|
|
84
|
+
`Turn ${i + 1} assistant: here is my analysis of topic ${i + 1}. `
|
|
85
|
+
+ 'analysis detail '.repeat(30),
|
|
86
|
+
})
|
|
87
|
+
}
|
|
88
|
+
return history
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
describeLive('context-compact live: auxiliary inference cache reuse', anthropicApiKey, () => {
|
|
92
|
+
test('priming inference then auxiliary summary call hits the prompt cache', async () => {
|
|
93
|
+
const provider = new AnthropicProvider({
|
|
94
|
+
apiKey: anthropicApiKey!,
|
|
95
|
+
imageProcessor: noopImageProcessor,
|
|
96
|
+
defaultModel: 'claude-haiku-4-5-20251001',
|
|
97
|
+
})
|
|
98
|
+
|
|
99
|
+
const history = buildLongHistory()
|
|
100
|
+
|
|
101
|
+
// Call 1: regular inference, breakpoint on the last history message.
|
|
102
|
+
// Mirrors what an Agent.advance() turn does just before requesting a
|
|
103
|
+
// compaction summary.
|
|
104
|
+
const primeMessages = applyCacheBreakpoint(history, 0, '1h')
|
|
105
|
+
const prime = await provider.inference({
|
|
106
|
+
model: MODEL,
|
|
107
|
+
systemPrompt: LARGE_SYSTEM_PROMPT,
|
|
108
|
+
messages: primeMessages,
|
|
109
|
+
})
|
|
110
|
+
if (!prime.ok) {
|
|
111
|
+
if (prime.error.message?.includes('credit balance')) {
|
|
112
|
+
console.warn('⚠️ Live compaction cache test skipped: credit balance too low')
|
|
113
|
+
return
|
|
114
|
+
}
|
|
115
|
+
throw new Error(`prime call failed: ${JSON.stringify(prime.error)}`)
|
|
116
|
+
}
|
|
117
|
+
expect(prime.value.metrics.promptTokens).toBeGreaterThan(1024)
|
|
118
|
+
|
|
119
|
+
// Call 2: simulate Agent.runAuxiliaryInference — append the summary
|
|
120
|
+
// instruction as a trailing user message and place the breakpoint at
|
|
121
|
+
// the same position as the prime call (last history message), so the
|
|
122
|
+
// whole prefix lands as a cache read.
|
|
123
|
+
const summaryInstruction: LLMMessage = { role: 'user', content: DEFAULT_SUMMARY_INSTRUCTION }
|
|
124
|
+
const auxMessages = applyCacheBreakpoint([...history, summaryInstruction], 1, '1h')
|
|
125
|
+
const aux = await provider.inference({
|
|
126
|
+
model: MODEL,
|
|
127
|
+
systemPrompt: LARGE_SYSTEM_PROMPT,
|
|
128
|
+
messages: auxMessages,
|
|
129
|
+
})
|
|
130
|
+
if (!aux.ok) throw new Error(`auxiliary call failed: ${JSON.stringify(aux.error)}`)
|
|
131
|
+
|
|
132
|
+
// Core claim: the auxiliary call served the prefix from cache (otherwise
|
|
133
|
+
// we'd re-upload the entire conversation just to get a summary).
|
|
134
|
+
expect(aux.value.metrics.cachedTokens ?? 0).toBeGreaterThan(1024)
|
|
135
|
+
|
|
136
|
+
// Sonnet/Haiku reliably emit plain text under DEFAULT_SUMMARY_INSTRUCTION.
|
|
137
|
+
// If this regresses, the prompt is no longer fit for purpose.
|
|
138
|
+
expect(aux.value.toolCalls).toHaveLength(0)
|
|
139
|
+
expect(aux.value.finishReason).toBe('stop')
|
|
140
|
+
expect(aux.value.content ?? '').not.toBe('')
|
|
141
|
+
}, 60_000)
|
|
142
|
+
})
|
|
143
|
+
|
|
144
|
+
describeLive('context-compact live: end-to-end compactor', anthropicApiKey, () => {
|
|
145
|
+
test('ContextCompactor.compact() produces a real summary from a real model', async () => {
|
|
146
|
+
const provider = new AnthropicProvider({
|
|
147
|
+
apiKey: anthropicApiKey!,
|
|
148
|
+
imageProcessor: noopImageProcessor,
|
|
149
|
+
defaultModel: 'claude-haiku-4-5-20251001',
|
|
150
|
+
})
|
|
151
|
+
|
|
152
|
+
const history = buildLongHistory()
|
|
153
|
+
|
|
154
|
+
// Wraps the provider exactly the way AgentContext.runAuxiliaryInference
|
|
155
|
+
// does: full prefix + extraMessages, breakpoint pinned to the last
|
|
156
|
+
// pre-extraMessages position so the agent's cache is reused.
|
|
157
|
+
const auxCalls: InferenceResponse[] = []
|
|
158
|
+
const runInference: RunInferenceFn = async (
|
|
159
|
+
extraMessages,
|
|
160
|
+
): Promise<Result<InferenceResponse, LLMError>> => {
|
|
161
|
+
const messages = applyCacheBreakpoint(
|
|
162
|
+
[...history, ...extraMessages],
|
|
163
|
+
extraMessages.length,
|
|
164
|
+
'1h',
|
|
165
|
+
)
|
|
166
|
+
const result = await provider.inference({
|
|
167
|
+
model: MODEL,
|
|
168
|
+
systemPrompt: LARGE_SYSTEM_PROMPT,
|
|
169
|
+
messages,
|
|
170
|
+
})
|
|
171
|
+
if (result.ok) auxCalls.push(result.value)
|
|
172
|
+
return result
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Prime the cache with one regular inference first, otherwise the
|
|
176
|
+
// auxiliary call would pay full write cost — same as a real session
|
|
177
|
+
// where compaction always runs after at least one normal turn.
|
|
178
|
+
const prime = await provider.inference({
|
|
179
|
+
model: MODEL,
|
|
180
|
+
systemPrompt: LARGE_SYSTEM_PROMPT,
|
|
181
|
+
messages: applyCacheBreakpoint(history, 0, '1h'),
|
|
182
|
+
})
|
|
183
|
+
if (!prime.ok) {
|
|
184
|
+
if (prime.error.message?.includes('credit balance')) {
|
|
185
|
+
console.warn('⚠️ Live compaction end-to-end test skipped: credit balance too low')
|
|
186
|
+
return
|
|
187
|
+
}
|
|
188
|
+
throw new Error(`prime call failed: ${JSON.stringify(prime.error)}`)
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
const compactor = new ContextCompactor(silentLogger, {
|
|
192
|
+
// Force compaction regardless of estimator vs actual tokens.
|
|
193
|
+
maxTokens: 10,
|
|
194
|
+
keepRecentMessages: 2,
|
|
195
|
+
})
|
|
196
|
+
|
|
197
|
+
const result = await compactor.compact(
|
|
198
|
+
generateSessionId(),
|
|
199
|
+
generateTestAgentId(),
|
|
200
|
+
history,
|
|
201
|
+
runInference,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
expect(result.ok).toBe(true)
|
|
205
|
+
if (!result.ok) return
|
|
206
|
+
|
|
207
|
+
expect(result.value.messagesRemoved).toBeGreaterThan(0)
|
|
208
|
+
expect(result.value.summary.trim().length).toBeGreaterThan(0)
|
|
209
|
+
// keepRecentMessages=2 → summary message + 2 kept = 3 total
|
|
210
|
+
expect(result.value.compactedMessages).toHaveLength(3)
|
|
211
|
+
expect(result.value.compactedMessages[0].role).toBe('user')
|
|
212
|
+
expect(result.value.compactedMessages[0].content as string).toContain(
|
|
213
|
+
'[CONVERSATION SUMMARY]',
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
// The auxiliary inference under the compactor should have served the
|
|
217
|
+
// prefix from cache (priming call wrote it).
|
|
218
|
+
expect(auxCalls).toHaveLength(1)
|
|
219
|
+
expect(auxCalls[0].metrics.cachedTokens ?? 0).toBeGreaterThan(1024)
|
|
220
|
+
}, 60_000)
|
|
221
|
+
})
|
|
@@ -1,10 +1,27 @@
|
|
|
1
1
|
import { describe, expect, it } from 'bun:test'
|
|
2
|
+
import z from 'zod/v4'
|
|
2
3
|
import { contextEvents } from '~/core/context/state.js'
|
|
3
4
|
import { MockLLMProvider } from '~/core/llm/mock.js'
|
|
5
|
+
import type { InferenceRequest } from '~/core/llm/provider.js'
|
|
4
6
|
import { ModelId } from '~/core/llm/schema.js'
|
|
7
|
+
import type { Preset } from '~/core/preset/index.js'
|
|
8
|
+
import { createTool } from '~/core/tools/definition.js'
|
|
9
|
+
import { ToolCallId } from '~/core/tools/schema.js'
|
|
5
10
|
import { createTestPreset, TestHarness } from '~/testing/index.js'
|
|
6
11
|
import { contextCompactPlugin } from './index.js'
|
|
7
12
|
|
|
13
|
+
/**
|
|
14
|
+
* Inline compaction sends the agent's regular systemPrompt and full conversation
|
|
15
|
+
* to the LLM, with a trailing user message containing the summarization
|
|
16
|
+
* instruction. We detect compaction calls by looking at that trailing message.
|
|
17
|
+
*/
|
|
18
|
+
function isSummarizationRequest(request: InferenceRequest): boolean {
|
|
19
|
+
const last = request.messages[request.messages.length - 1]
|
|
20
|
+
if (!last || last.role !== 'user') return false
|
|
21
|
+
const content = typeof last.content === 'string' ? last.content : JSON.stringify(last.content)
|
|
22
|
+
return content.includes('[CONTEXT COMPACTION REQUEST]')
|
|
23
|
+
}
|
|
24
|
+
|
|
8
25
|
// ============================================================================
|
|
9
26
|
// Helpers
|
|
10
27
|
// ============================================================================
|
|
@@ -50,7 +67,7 @@ describe('context-compact plugin', () => {
|
|
|
50
67
|
presets: [createCompactPreset(10)],
|
|
51
68
|
mockHandler: (request) => {
|
|
52
69
|
// Compaction requests use CONTEXT_SUMMARY_PROMPT which contains "summarizer".
|
|
53
|
-
if (
|
|
70
|
+
if (isSummarizationRequest(request)) {
|
|
54
71
|
return {
|
|
55
72
|
content: 'Summary of conversation so far.',
|
|
56
73
|
toolCalls: [],
|
|
@@ -119,7 +136,7 @@ describe('context-compact plugin', () => {
|
|
|
119
136
|
inferenceCallCount++
|
|
120
137
|
|
|
121
138
|
// Summarization requests (from context-compact plugin)
|
|
122
|
-
if (
|
|
139
|
+
if (isSummarizationRequest(request)) {
|
|
123
140
|
return {
|
|
124
141
|
content: 'Conversation summary.',
|
|
125
142
|
toolCalls: [],
|
|
@@ -157,6 +174,121 @@ describe('context-compact plugin', () => {
|
|
|
157
174
|
})
|
|
158
175
|
})
|
|
159
176
|
|
|
177
|
+
// =========================================================================
|
|
178
|
+
// Pending tool results regression
|
|
179
|
+
// =========================================================================
|
|
180
|
+
|
|
181
|
+
describe('pending tool results', () => {
|
|
182
|
+
it('aux inference after a tool turn includes the tool_result before the summary instruction', async () => {
|
|
183
|
+
// Regression for the bug where context-compact's auxiliary inference call
|
|
184
|
+
// runs at a moment where `conversationHistory` ends with an assistant
|
|
185
|
+
// `tool_use` block but the corresponding tool_result is still in
|
|
186
|
+
// `pendingToolResults` (not yet committed to history). Sending
|
|
187
|
+
// `[..., assistant(tool_use), user(summary)]` to Anthropic 400s with
|
|
188
|
+
// "tool_use blocks must be followed by tool_result blocks".
|
|
189
|
+
|
|
190
|
+
const myTool = createTool({
|
|
191
|
+
name: 'my_tool',
|
|
192
|
+
description: 'returns a fixed value',
|
|
193
|
+
input: z.object({}),
|
|
194
|
+
execute: async () => ({ ok: true, value: 'tool result content' }),
|
|
195
|
+
})
|
|
196
|
+
|
|
197
|
+
const preset: Preset = {
|
|
198
|
+
id: 'test',
|
|
199
|
+
name: 'Tool Compaction Test',
|
|
200
|
+
orchestrator: {
|
|
201
|
+
system: 'You are a test agent.',
|
|
202
|
+
model: ModelId('mock'),
|
|
203
|
+
tools: [myTool],
|
|
204
|
+
agents: [],
|
|
205
|
+
debounceMs: 0,
|
|
206
|
+
},
|
|
207
|
+
agents: [],
|
|
208
|
+
plugins: [
|
|
209
|
+
contextCompactPlugin.configure({
|
|
210
|
+
compaction: {
|
|
211
|
+
model: ModelId('mock'),
|
|
212
|
+
maxTokens: 10,
|
|
213
|
+
// 1 so that after the tool turn, [user, assistant(tool_use)]
|
|
214
|
+
// splits into toCompact=[user], toKeep=[assistant(tool_use)] —
|
|
215
|
+
// the aux call actually runs and gets the buggy prefix.
|
|
216
|
+
keepRecentMessages: 1,
|
|
217
|
+
},
|
|
218
|
+
}),
|
|
219
|
+
],
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
let capturedAuxRequest: InferenceRequest | undefined
|
|
223
|
+
|
|
224
|
+
const harness = new TestHarness({
|
|
225
|
+
systemPlugins: [contextCompactPlugin],
|
|
226
|
+
presets: [preset],
|
|
227
|
+
mockHandler: (request) => {
|
|
228
|
+
if (isSummarizationRequest(request)) {
|
|
229
|
+
capturedAuxRequest = request
|
|
230
|
+
return {
|
|
231
|
+
content: 'Summary of conversation.',
|
|
232
|
+
toolCalls: [],
|
|
233
|
+
finishReason: 'stop',
|
|
234
|
+
metrics: MockLLMProvider.defaultMetrics(),
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
// First inference (no tool messages in history yet) → emit a tool call.
|
|
238
|
+
const hasToolMessages = request.messages.some((m) => m.role === 'tool')
|
|
239
|
+
if (!hasToolMessages) {
|
|
240
|
+
return {
|
|
241
|
+
content: '',
|
|
242
|
+
toolCalls: [{ id: ToolCallId('tc1'), name: 'my_tool', input: {} }],
|
|
243
|
+
finishReason: 'tool_calls',
|
|
244
|
+
metrics: MockLLMProvider.defaultMetrics(),
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
return {
|
|
248
|
+
content: 'Done.',
|
|
249
|
+
toolCalls: [],
|
|
250
|
+
finishReason: 'stop',
|
|
251
|
+
metrics: MockLLMProvider.defaultMetrics(),
|
|
252
|
+
}
|
|
253
|
+
},
|
|
254
|
+
})
|
|
255
|
+
|
|
256
|
+
const session = await harness.createSession('test')
|
|
257
|
+
await session.sendAndWaitForIdle('Please call my_tool')
|
|
258
|
+
|
|
259
|
+
expect(capturedAuxRequest).toBeDefined()
|
|
260
|
+
|
|
261
|
+
// Every assistant message with toolCalls must be followed by a contiguous
|
|
262
|
+
// run of tool messages covering each tool_use id before any further
|
|
263
|
+
// user/assistant message. This mirrors Anthropic's API contract.
|
|
264
|
+
const msgs = capturedAuxRequest!.messages
|
|
265
|
+
for (let i = 0; i < msgs.length; i++) {
|
|
266
|
+
const m = msgs[i]
|
|
267
|
+
if (m.role !== 'assistant' || !m.toolCalls?.length) continue
|
|
268
|
+
|
|
269
|
+
const expected = new Set(m.toolCalls.map((tc) => tc.id))
|
|
270
|
+
const seen = new Set<string>()
|
|
271
|
+
for (let j = i + 1; j < msgs.length; j++) {
|
|
272
|
+
const next = msgs[j]
|
|
273
|
+
if (next.role !== 'tool') break
|
|
274
|
+
seen.add(next.toolCallId)
|
|
275
|
+
}
|
|
276
|
+
for (const id of expected) {
|
|
277
|
+
expect(seen.has(id)).toBe(true)
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// Compaction must have actually succeeded — pre-fix it would Err-out
|
|
282
|
+
// in production (mock accepts it but the assertion above already
|
|
283
|
+
// catches the malformed-prefix case).
|
|
284
|
+
const compactedEvents = await session.getEventsByType(contextEvents, 'context_compacted')
|
|
285
|
+
const actualCompactions = compactedEvents.filter((e) => e.messagesRemoved > 0)
|
|
286
|
+
expect(actualCompactions.length).toBeGreaterThanOrEqual(1)
|
|
287
|
+
|
|
288
|
+
await harness.shutdown()
|
|
289
|
+
})
|
|
290
|
+
})
|
|
291
|
+
|
|
160
292
|
// =========================================================================
|
|
161
293
|
// Compaction failure
|
|
162
294
|
// =========================================================================
|
|
@@ -170,7 +302,7 @@ describe('context-compact plugin', () => {
|
|
|
170
302
|
mockHandler: (request) => {
|
|
171
303
|
// Summarization requests — throw to simulate LLM failure.
|
|
172
304
|
// MockLLMProvider only returns Err() when the handler throws.
|
|
173
|
-
if (
|
|
305
|
+
if (isSummarizationRequest(request)) {
|
|
174
306
|
throw { type: 'server_error', message: 'LLM summarization failed' }
|
|
175
307
|
}
|
|
176
308
|
|