@roj-ai/sdk 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. package/dist/bootstrap.d.ts +13 -0
  2. package/dist/bootstrap.d.ts.map +1 -1
  3. package/dist/bootstrap.js +3 -1
  4. package/dist/bootstrap.js.map +1 -1
  5. package/dist/config.d.ts +2 -0
  6. package/dist/config.d.ts.map +1 -1
  7. package/dist/config.js +3 -0
  8. package/dist/config.js.map +1 -1
  9. package/dist/core/agents/agent.d.ts +25 -1
  10. package/dist/core/agents/agent.d.ts.map +1 -1
  11. package/dist/core/agents/agent.js +117 -21
  12. package/dist/core/agents/agent.js.map +1 -1
  13. package/dist/core/agents/config.d.ts +7 -0
  14. package/dist/core/agents/config.d.ts.map +1 -1
  15. package/dist/core/agents/context.d.ts +10 -0
  16. package/dist/core/agents/context.d.ts.map +1 -1
  17. package/dist/core/agents/state.d.ts +11 -3
  18. package/dist/core/agents/state.d.ts.map +1 -1
  19. package/dist/core/agents/state.js.map +1 -1
  20. package/dist/core/file-store/file-store.d.ts +5 -1
  21. package/dist/core/file-store/file-store.d.ts.map +1 -1
  22. package/dist/core/file-store/file-store.js +31 -21
  23. package/dist/core/file-store/file-store.js.map +1 -1
  24. package/dist/core/image/vips-resizer.test.js +26 -14
  25. package/dist/core/image/vips-resizer.test.js.map +1 -1
  26. package/dist/core/llm/anthropic.d.ts.map +1 -1
  27. package/dist/core/llm/anthropic.js +11 -8
  28. package/dist/core/llm/anthropic.js.map +1 -1
  29. package/dist/core/llm/cache-breakpoints.d.ts +5 -1
  30. package/dist/core/llm/cache-breakpoints.d.ts.map +1 -1
  31. package/dist/core/llm/cache-breakpoints.js +10 -5
  32. package/dist/core/llm/cache-breakpoints.js.map +1 -1
  33. package/dist/core/sessions/session.d.ts.map +1 -1
  34. package/dist/core/sessions/session.js +10 -0
  35. package/dist/core/sessions/session.js.map +1 -1
  36. package/dist/core/sessions/session.test.js +5 -0
  37. package/dist/core/sessions/session.test.js.map +1 -1
  38. package/dist/core/sessions/state.d.ts.map +1 -1
  39. package/dist/core/sessions/state.js +5 -1
  40. package/dist/core/sessions/state.js.map +1 -1
  41. package/dist/core/tools/executor.test.js +1 -0
  42. package/dist/core/tools/executor.test.js.map +1 -1
  43. package/dist/lib/utils/concurrency.d.ts +25 -0
  44. package/dist/lib/utils/concurrency.d.ts.map +1 -0
  45. package/dist/lib/utils/concurrency.js +69 -0
  46. package/dist/lib/utils/concurrency.js.map +1 -0
  47. package/dist/lib/utils/concurrency.test.d.ts +2 -0
  48. package/dist/lib/utils/concurrency.test.d.ts.map +1 -0
  49. package/dist/lib/utils/concurrency.test.js +135 -0
  50. package/dist/lib/utils/concurrency.test.js.map +1 -0
  51. package/dist/plugins/agent-status/plugin.d.ts.map +1 -1
  52. package/dist/plugins/agent-status/plugin.js +18 -26
  53. package/dist/plugins/agent-status/plugin.js.map +1 -1
  54. package/dist/plugins/context-compact/compaction-live.test.d.ts +17 -0
  55. package/dist/plugins/context-compact/compaction-live.test.d.ts.map +1 -0
  56. package/dist/plugins/context-compact/compaction-live.test.js +177 -0
  57. package/dist/plugins/context-compact/compaction-live.test.js.map +1 -0
  58. package/dist/plugins/context-compact/context-compact.integration.test.js +123 -3
  59. package/dist/plugins/context-compact/context-compact.integration.test.js.map +1 -1
  60. package/dist/plugins/context-compact/context-compactor.d.ts +47 -17
  61. package/dist/plugins/context-compact/context-compactor.d.ts.map +1 -1
  62. package/dist/plugins/context-compact/context-compactor.js +60 -36
  63. package/dist/plugins/context-compact/context-compactor.js.map +1 -1
  64. package/dist/plugins/context-compact/context-compactor.test.js +69 -103
  65. package/dist/plugins/context-compact/context-compactor.test.js.map +1 -1
  66. package/dist/plugins/context-compact/plugin.d.ts +9 -2
  67. package/dist/plugins/context-compact/plugin.d.ts.map +1 -1
  68. package/dist/plugins/context-compact/plugin.js +8 -4
  69. package/dist/plugins/context-compact/plugin.js.map +1 -1
  70. package/dist/plugins/filesystem/filesystem.integration.test.js +36 -0
  71. package/dist/plugins/filesystem/filesystem.integration.test.js.map +1 -1
  72. package/dist/plugins/filesystem/plugin.d.ts.map +1 -1
  73. package/dist/plugins/filesystem/plugin.js +8 -6
  74. package/dist/plugins/filesystem/plugin.js.map +1 -1
  75. package/dist/plugins/mailbox/mailbox.integration.test.js +9 -16
  76. package/dist/plugins/mailbox/mailbox.integration.test.js.map +1 -1
  77. package/dist/plugins/resources/plugin.d.ts.map +1 -1
  78. package/dist/plugins/resources/plugin.js +4 -1
  79. package/dist/plugins/resources/plugin.js.map +1 -1
  80. package/dist/plugins/uploads/plugin.d.ts +12 -0
  81. package/dist/plugins/uploads/plugin.d.ts.map +1 -1
  82. package/dist/plugins/uploads/plugin.js +188 -44
  83. package/dist/plugins/uploads/plugin.js.map +1 -1
  84. package/dist/plugins/uploads/preprocessors/image-classifier.d.ts +9 -0
  85. package/dist/plugins/uploads/preprocessors/image-classifier.d.ts.map +1 -1
  86. package/dist/plugins/uploads/preprocessors/image-classifier.js +4 -1
  87. package/dist/plugins/uploads/preprocessors/image-classifier.js.map +1 -1
  88. package/dist/plugins/uploads/preprocessors/image-classifier.test.d.ts +2 -0
  89. package/dist/plugins/uploads/preprocessors/image-classifier.test.d.ts.map +1 -0
  90. package/dist/plugins/uploads/preprocessors/image-classifier.test.js +113 -0
  91. package/dist/plugins/uploads/preprocessors/image-classifier.test.js.map +1 -0
  92. package/dist/plugins/uploads/preprocessors/markitdown-preprocessor.d.ts.map +1 -1
  93. package/dist/plugins/uploads/preprocessors/markitdown-preprocessor.js +8 -7
  94. package/dist/plugins/uploads/preprocessors/markitdown-preprocessor.js.map +1 -1
  95. package/dist/plugins/uploads/preprocessors/zip-preprocessor.d.ts.map +1 -1
  96. package/dist/plugins/uploads/preprocessors/zip-preprocessor.js +35 -15
  97. package/dist/plugins/uploads/preprocessors/zip-preprocessor.js.map +1 -1
  98. package/dist/plugins/uploads/state.d.ts +1 -0
  99. package/dist/plugins/uploads/state.d.ts.map +1 -1
  100. package/dist/plugins/uploads/state.js +1 -1
  101. package/dist/plugins/uploads/state.js.map +1 -1
  102. package/dist/plugins/uploads/uploads.integration.test.js +97 -0
  103. package/dist/plugins/uploads/uploads.integration.test.js.map +1 -1
  104. package/dist/plugins/user-chat/plugin.d.ts +2 -0
  105. package/dist/plugins/user-chat/plugin.d.ts.map +1 -1
  106. package/dist/plugins/user-chat/plugin.js +47 -3
  107. package/dist/plugins/user-chat/plugin.js.map +1 -1
  108. package/dist/plugins/user-chat/schema.d.ts +10 -0
  109. package/dist/plugins/user-chat/schema.d.ts.map +1 -1
  110. package/dist/plugins/user-chat/schema.js +1 -0
  111. package/dist/plugins/user-chat/schema.js.map +1 -1
  112. package/dist/plugins/user-chat/user-chat.integration.test.js +86 -0
  113. package/dist/plugins/user-chat/user-chat.integration.test.js.map +1 -1
  114. package/dist/transport/http/routes/upload.d.ts.map +1 -1
  115. package/dist/transport/http/routes/upload.js +60 -0
  116. package/dist/transport/http/routes/upload.js.map +1 -1
  117. package/package.json +2 -2
  118. package/src/bootstrap.ts +3 -1
  119. package/src/config.ts +6 -0
  120. package/src/core/agents/agent.ts +134 -20
  121. package/src/core/agents/config.ts +7 -0
  122. package/src/core/agents/context.ts +11 -0
  123. package/src/core/agents/state.ts +11 -4
  124. package/src/core/file-store/file-store.ts +38 -18
  125. package/src/core/image/vips-resizer.test.ts +26 -15
  126. package/src/core/llm/anthropic.ts +19 -12
  127. package/src/core/llm/cache-breakpoints.ts +15 -6
  128. package/src/core/sessions/session.test.ts +6 -0
  129. package/src/core/sessions/session.ts +12 -0
  130. package/src/core/sessions/state.ts +5 -1
  131. package/src/core/tools/executor.test.ts +1 -0
  132. package/src/lib/utils/concurrency.test.ts +169 -0
  133. package/src/lib/utils/concurrency.ts +72 -0
  134. package/src/plugins/agent-status/plugin.ts +18 -25
  135. package/src/plugins/context-compact/compaction-live.test.ts +221 -0
  136. package/src/plugins/context-compact/context-compact.integration.test.ts +135 -3
  137. package/src/plugins/context-compact/context-compactor.test.ts +71 -110
  138. package/src/plugins/context-compact/context-compactor.ts +88 -43
  139. package/src/plugins/context-compact/plugin.ts +19 -10
  140. package/src/plugins/filesystem/filesystem.integration.test.ts +44 -0
  141. package/src/plugins/filesystem/plugin.ts +8 -6
  142. package/src/plugins/mailbox/mailbox.integration.test.ts +12 -18
  143. package/src/plugins/resources/plugin.ts +4 -1
  144. package/src/plugins/uploads/plugin.ts +212 -47
  145. package/src/plugins/uploads/preprocessors/image-classifier.test.ts +142 -0
  146. package/src/plugins/uploads/preprocessors/image-classifier.ts +13 -1
  147. package/src/plugins/uploads/preprocessors/markitdown-preprocessor.ts +8 -8
  148. package/src/plugins/uploads/preprocessors/zip-preprocessor.ts +37 -17
  149. package/src/plugins/uploads/state.ts +1 -1
  150. package/src/plugins/uploads/uploads.integration.test.ts +123 -0
  151. package/src/plugins/user-chat/plugin.ts +60 -3
  152. package/src/plugins/user-chat/schema.ts +10 -1
  153. package/src/plugins/user-chat/user-chat.integration.test.ts +99 -0
  154. package/src/transport/http/routes/upload.ts +87 -0
@@ -0,0 +1,169 @@
1
+ import { describe, expect, it } from 'bun:test'
2
+ import { mapWithConcurrency, Semaphore } from './concurrency.js'
3
+
4
+ function defer<T = void>(): { promise: Promise<T>; resolve: (v: T) => void; reject: (e: unknown) => void } {
5
+ let resolve!: (v: T) => void
6
+ let reject!: (e: unknown) => void
7
+ const promise = new Promise<T>((res, rej) => {
8
+ resolve = res
9
+ reject = rej
10
+ })
11
+ return { promise, resolve, reject }
12
+ }
13
+
14
+ describe('mapWithConcurrency', () => {
15
+ it('preserves input order regardless of completion order', async () => {
16
+ const delays = [50, 10, 30, 5, 20]
17
+ const results = await mapWithConcurrency(delays, 3, async (ms, i) => {
18
+ await new Promise(r => setTimeout(r, ms))
19
+ return `${i}:${ms}`
20
+ })
21
+ expect(results).toEqual(['0:50', '1:10', '2:30', '3:5', '4:20'])
22
+ })
23
+
24
+ it('does not exceed the concurrency limit', async () => {
25
+ let active = 0
26
+ let peak = 0
27
+ const items = Array.from({ length: 20 }, (_, i) => i)
28
+
29
+ await mapWithConcurrency(items, 4, async () => {
30
+ active++
31
+ peak = Math.max(peak, active)
32
+ await new Promise(r => setTimeout(r, 5))
33
+ active--
34
+ })
35
+
36
+ expect(peak).toBeLessThanOrEqual(4)
37
+ expect(peak).toBe(4)
38
+ })
39
+
40
+ it('handles empty input without spawning workers', async () => {
41
+ let calls = 0
42
+ const results = await mapWithConcurrency([], 5, async () => {
43
+ calls++
44
+ return 1
45
+ })
46
+ expect(results).toEqual([])
47
+ expect(calls).toBe(0)
48
+ })
49
+
50
+ it('caps worker count at item count when concurrency > items', async () => {
51
+ let active = 0
52
+ let peak = 0
53
+ const items = [1, 2]
54
+
55
+ await mapWithConcurrency(items, 10, async () => {
56
+ active++
57
+ peak = Math.max(peak, active)
58
+ await new Promise(r => setTimeout(r, 5))
59
+ active--
60
+ })
61
+
62
+ expect(peak).toBe(2)
63
+ })
64
+
65
+ it('propagates errors thrown by the worker fn', async () => {
66
+ await expect(
67
+ mapWithConcurrency([1, 2, 3], 2, async (n) => {
68
+ if (n === 2) throw new Error('boom')
69
+ return n
70
+ }),
71
+ ).rejects.toThrow('boom')
72
+ })
73
+ })
74
+
75
+ describe('Semaphore', () => {
76
+ it('rejects invalid limits', () => {
77
+ expect(() => new Semaphore(0)).toThrow()
78
+ expect(() => new Semaphore(-1)).toThrow()
79
+ expect(() => new Semaphore(1.5)).toThrow()
80
+ })
81
+
82
+ it('caps concurrent executions at limit', async () => {
83
+ const gate = new Semaphore(3)
84
+ let active = 0
85
+ let peak = 0
86
+
87
+ const tasks = Array.from({ length: 12 }, () =>
88
+ gate.run(async () => {
89
+ active++
90
+ peak = Math.max(peak, active)
91
+ await new Promise(r => setTimeout(r, 10))
92
+ active--
93
+ }),
94
+ )
95
+
96
+ await Promise.all(tasks)
97
+ expect(peak).toBe(3)
98
+ expect(active).toBe(0)
99
+ })
100
+
101
+ it('admits waiters in FIFO order', async () => {
102
+ const gate = new Semaphore(1)
103
+ const order: number[] = []
104
+ const blocker = defer()
105
+
106
+ // Hold the only slot
107
+ const held = gate.run(async () => {
108
+ await blocker.promise
109
+ })
110
+
111
+ // Queue three waiters in known order, with their own blockers so the test
112
+ // can observe entry order without relying on real timers.
113
+ const gates = [defer(), defer(), defer()]
114
+ const queued = gates.map((g, i) =>
115
+ gate.run(async () => {
116
+ order.push(i)
117
+ await g.promise
118
+ }),
119
+ )
120
+
121
+ // Yield so all three are queued.
122
+ await new Promise(r => setTimeout(r, 0))
123
+ expect(order).toEqual([])
124
+
125
+ // Release the holder; waiter 0 should run first.
126
+ blocker.resolve()
127
+ await new Promise(r => setTimeout(r, 0))
128
+ expect(order).toEqual([0])
129
+
130
+ gates[0]!.resolve()
131
+ await new Promise(r => setTimeout(r, 0))
132
+ expect(order).toEqual([0, 1])
133
+
134
+ gates[1]!.resolve()
135
+ await new Promise(r => setTimeout(r, 0))
136
+ expect(order).toEqual([0, 1, 2])
137
+
138
+ gates[2]!.resolve()
139
+ await Promise.all([held, ...queued])
140
+ })
141
+
142
+ it('releases the slot when the body throws', async () => {
143
+ const gate = new Semaphore(1)
144
+
145
+ await expect(gate.run(async () => {
146
+ throw new Error('boom')
147
+ })).rejects.toThrow('boom')
148
+
149
+ // Slot must be free again — this would deadlock otherwise.
150
+ const result = await gate.run(async () => 42)
151
+ expect(result).toBe(42)
152
+ })
153
+
154
+ it('serializes work under limit=1', async () => {
155
+ const gate = new Semaphore(1)
156
+ const events: string[] = []
157
+
158
+ const tasks = [0, 1, 2].map(i =>
159
+ gate.run(async () => {
160
+ events.push(`start:${i}`)
161
+ await new Promise(r => setTimeout(r, 5))
162
+ events.push(`end:${i}`)
163
+ }),
164
+ )
165
+
166
+ await Promise.all(tasks)
167
+ expect(events).toEqual(['start:0', 'end:0', 'start:1', 'end:1', 'start:2', 'end:2'])
168
+ })
169
+ })
@@ -0,0 +1,72 @@
1
+ /**
2
+ * Run an async mapping with bounded concurrency.
3
+ *
4
+ * Spawns up to `concurrency` workers that pull from a shared cursor.
5
+ * Results preserve input order.
6
+ */
7
+ export async function mapWithConcurrency<T, R>(
8
+ items: readonly T[],
9
+ concurrency: number,
10
+ fn: (item: T, index: number) => Promise<R>,
11
+ ): Promise<R[]> {
12
+ const results: R[] = new Array(items.length)
13
+ let next = 0
14
+ const workerCount = Math.max(1, Math.min(concurrency, items.length))
15
+ const workers = Array.from({ length: workerCount }, async () => {
16
+ while (true) {
17
+ const i = next++
18
+ if (i >= items.length) return
19
+ results[i] = await fn(items[i] as T, i)
20
+ }
21
+ })
22
+ await Promise.all(workers)
23
+ return results
24
+ }
25
+
26
+ /**
27
+ * Counting semaphore with FIFO waiter queue.
28
+ *
29
+ * Use `run(fn)` to execute work under the gate — acquires before invoking,
30
+ * releases on resolve/reject. Suitable for bounding contention on a shared
31
+ * resource (e.g. concurrent LLM calls) regardless of how many call sites
32
+ * compete for it.
33
+ */
34
+ export class Semaphore {
35
+ private active = 0
36
+ private readonly waiters: Array<() => void> = []
37
+
38
+ constructor(private readonly limit: number) {
39
+ if (!Number.isInteger(limit) || limit < 1) {
40
+ throw new Error(`Semaphore limit must be a positive integer, got ${limit}`)
41
+ }
42
+ }
43
+
44
+ async run<T>(fn: () => Promise<T>): Promise<T> {
45
+ await this.acquire()
46
+ try {
47
+ return await fn()
48
+ } finally {
49
+ this.release()
50
+ }
51
+ }
52
+
53
+ private acquire(): Promise<void> {
54
+ if (this.active < this.limit) {
55
+ this.active++
56
+ return Promise.resolve()
57
+ }
58
+ return new Promise<void>(resolve => {
59
+ this.waiters.push(resolve)
60
+ })
61
+ }
62
+
63
+ private release(): void {
64
+ const next = this.waiters.shift()
65
+ if (next) {
66
+ // Slot transfers directly to the next waiter; active stays unchanged.
67
+ next()
68
+ } else {
69
+ this.active--
70
+ }
71
+ }
72
+ }
@@ -38,40 +38,33 @@ export const agentStatusPlugin = definePlugin('agent-status')
38
38
  sessionId: ctx.sessionId,
39
39
  agentId: ctx.agentId,
40
40
  status: 'thinking',
41
+ definitionName: ctx.agentState.definitionName,
41
42
  timestamp: Date.now(),
42
43
  })
43
44
  }
44
45
  return null
45
46
  })
46
47
  .hook('onComplete', async (ctx) => {
47
- if (ctx.parentId !== null) {
48
- ctx.notify('agentStatus', {
49
- sessionId: ctx.sessionId,
50
- agentId: ctx.agentId,
51
- status: 'idle',
52
- definitionName: ctx.agentState.definitionName,
53
- timestamp: Date.now(),
54
- })
55
- } else {
56
- ctx.notify('agentStatus', {
57
- sessionId: ctx.sessionId,
58
- agentId: ctx.agentId,
59
- status: 'idle',
60
- timestamp: Date.now(),
61
- })
62
- }
48
+ ctx.notify('agentStatus', {
49
+ sessionId: ctx.sessionId,
50
+ agentId: ctx.agentId,
51
+ status: 'idle',
52
+ definitionName: ctx.agentState.definitionName,
53
+ timestamp: Date.now(),
54
+ })
63
55
  return null
64
56
  })
65
57
  .hook('onError', async (ctx) => {
66
- if (ctx.parentId !== null) {
67
- ctx.notify('agentStatus', {
68
- sessionId: ctx.sessionId,
69
- agentId: ctx.agentId,
70
- status: 'idle',
71
- definitionName: ctx.agentState.definitionName,
72
- timestamp: Date.now(),
73
- })
74
- }
58
+ // Emit idle for both entry and sub-agents — without this the client's
59
+ // `activeAgents` map keeps the agent flagged thinking until reconnect,
60
+ // since the session-store no longer clears it on chat_message/ask_user.
61
+ ctx.notify('agentStatus', {
62
+ sessionId: ctx.sessionId,
63
+ agentId: ctx.agentId,
64
+ status: 'idle',
65
+ definitionName: ctx.agentState.definitionName,
66
+ timestamp: Date.now(),
67
+ })
75
68
  return null
76
69
  })
77
70
  .build()
@@ -0,0 +1,221 @@
1
+ /**
2
+ * Live compaction tests against the real Anthropic API.
3
+ *
4
+ * Opt-in: run with `LIVE_TESTS=1 ANTHROPIC_API_KEY=…`. Skipped otherwise so the
5
+ * default `bun test` run stays hermetic.
6
+ *
7
+ * These cover the two real-API claims of the context-compact rewrite (5c27ab7):
8
+ * 1. The auxiliary-inference call (used by `ContextCompactor`) reuses the
9
+ * agent's warm prompt cache — only the trailing instruction + output are
10
+ * paid for, not the whole conversation a second time.
11
+ * 2. `DEFAULT_SUMMARY_INSTRUCTION` actually elicits a plain-text summary from
12
+ * a real Sonnet/Haiku-class model (no tool calls, non-empty content), so
13
+ * the end-to-end `ContextCompactor.compact()` path produces a usable
14
+ * `[CONVERSATION SUMMARY]` block.
15
+ */
16
+
17
+ import { describe, expect, test } from 'bun:test'
18
+ import { generateTestAgentId } from '~/core/agents/schema.js'
19
+ import type { ImageProcessor } from '~/core/image/types.js'
20
+ import type { LLMMessage } from '~/core/agents/state.js'
21
+ import { AnthropicProvider } from '~/core/llm/anthropic.js'
22
+ import { applyCacheBreakpoint } from '~/core/llm/cache-breakpoints.js'
23
+ import type { InferenceResponse, LLMError } from '~/core/llm/provider.js'
24
+ import { ModelId } from '~/core/llm/schema.js'
25
+ import { generateSessionId } from '~/core/sessions/schema.js'
26
+ import { silentLogger } from '~/lib/logger/logger.js'
27
+ import type { Result } from '~/lib/utils/result.js'
28
+ import {
29
+ ContextCompactor,
30
+ DEFAULT_SUMMARY_INSTRUCTION,
31
+ type RunInferenceFn,
32
+ } from './context-compactor.js'
33
+
34
+ const liveEnabled = process.env.LIVE_TESTS === '1'
35
+ const anthropicApiKey = liveEnabled ? process.env.ANTHROPIC_API_KEY : undefined
36
+
37
+ const describeLive = (name: string, apiKey: string | undefined, fn: () => void) => {
38
+ if (!apiKey) {
39
+ describe.skip(`${name} (skipped — API key missing)`, fn)
40
+ return
41
+ }
42
+ describe(name, fn)
43
+ }
44
+
45
+ const noopImageProcessor: ImageProcessor = {
46
+ resolveContent: async (content) => content,
47
+ }
48
+
49
+ const MODEL = ModelId('claude-haiku-4-5-20251001')
50
+
51
+ /**
52
+ * Padded system prompt so the cacheable prefix comfortably exceeds Anthropic's
53
+ * 1024-token minimum for Haiku. Deterministic content so identical calls reuse
54
+ * the exact same prefix.
55
+ */
56
+ const LARGE_SYSTEM_PROMPT = [
57
+ 'You are a meticulous assistant helping a developer review a long conversation.',
58
+ 'Always respond concisely and accurately. Never speculate beyond the data.',
59
+ 'Follow these rules strictly:',
60
+ ...Array.from(
61
+ { length: 120 },
62
+ (_, i) => `- Rule ${i + 1}: When asked about topic ${i + 1}, prefer factual sources and decline to speculate.`,
63
+ ),
64
+ 'End of instructions.',
65
+ ].join('\n')
66
+
67
+ /**
68
+ * Deterministic multi-turn history padded well past 1024 tokens so cache writes
69
+ * and reads are unambiguous. Includes both user and assistant messages, which
70
+ * is the shape an inline-compaction call sees in practice.
71
+ */
72
+ function buildLongHistory(): LLMMessage[] {
73
+ const history: LLMMessage[] = []
74
+ for (let i = 0; i < 8; i++) {
75
+ history.push({
76
+ role: 'user',
77
+ content:
78
+ `Turn ${i + 1} user: please analyze topic ${i + 1}. `
79
+ + 'context detail '.repeat(30),
80
+ })
81
+ history.push({
82
+ role: 'assistant',
83
+ content:
84
+ `Turn ${i + 1} assistant: here is my analysis of topic ${i + 1}. `
85
+ + 'analysis detail '.repeat(30),
86
+ })
87
+ }
88
+ return history
89
+ }
90
+
91
+ describeLive('context-compact live: auxiliary inference cache reuse', anthropicApiKey, () => {
92
+ test('priming inference then auxiliary summary call hits the prompt cache', async () => {
93
+ const provider = new AnthropicProvider({
94
+ apiKey: anthropicApiKey!,
95
+ imageProcessor: noopImageProcessor,
96
+ defaultModel: 'claude-haiku-4-5-20251001',
97
+ })
98
+
99
+ const history = buildLongHistory()
100
+
101
+ // Call 1: regular inference, breakpoint on the last history message.
102
+ // Mirrors what an Agent.advance() turn does just before requesting a
103
+ // compaction summary.
104
+ const primeMessages = applyCacheBreakpoint(history, 0, '1h')
105
+ const prime = await provider.inference({
106
+ model: MODEL,
107
+ systemPrompt: LARGE_SYSTEM_PROMPT,
108
+ messages: primeMessages,
109
+ })
110
+ if (!prime.ok) {
111
+ if (prime.error.message?.includes('credit balance')) {
112
+ console.warn('⚠️ Live compaction cache test skipped: credit balance too low')
113
+ return
114
+ }
115
+ throw new Error(`prime call failed: ${JSON.stringify(prime.error)}`)
116
+ }
117
+ expect(prime.value.metrics.promptTokens).toBeGreaterThan(1024)
118
+
119
+ // Call 2: simulate Agent.runAuxiliaryInference — append the summary
120
+ // instruction as a trailing user message and place the breakpoint at
121
+ // the same position as the prime call (last history message), so the
122
+ // whole prefix lands as a cache read.
123
+ const summaryInstruction: LLMMessage = { role: 'user', content: DEFAULT_SUMMARY_INSTRUCTION }
124
+ const auxMessages = applyCacheBreakpoint([...history, summaryInstruction], 1, '1h')
125
+ const aux = await provider.inference({
126
+ model: MODEL,
127
+ systemPrompt: LARGE_SYSTEM_PROMPT,
128
+ messages: auxMessages,
129
+ })
130
+ if (!aux.ok) throw new Error(`auxiliary call failed: ${JSON.stringify(aux.error)}`)
131
+
132
+ // Core claim: the auxiliary call served the prefix from cache (otherwise
133
+ // we'd re-upload the entire conversation just to get a summary).
134
+ expect(aux.value.metrics.cachedTokens ?? 0).toBeGreaterThan(1024)
135
+
136
+ // Sonnet/Haiku reliably emit plain text under DEFAULT_SUMMARY_INSTRUCTION.
137
+ // If this regresses, the prompt is no longer fit for purpose.
138
+ expect(aux.value.toolCalls).toHaveLength(0)
139
+ expect(aux.value.finishReason).toBe('stop')
140
+ expect(aux.value.content ?? '').not.toBe('')
141
+ }, 60_000)
142
+ })
143
+
144
+ describeLive('context-compact live: end-to-end compactor', anthropicApiKey, () => {
145
+ test('ContextCompactor.compact() produces a real summary from a real model', async () => {
146
+ const provider = new AnthropicProvider({
147
+ apiKey: anthropicApiKey!,
148
+ imageProcessor: noopImageProcessor,
149
+ defaultModel: 'claude-haiku-4-5-20251001',
150
+ })
151
+
152
+ const history = buildLongHistory()
153
+
154
+ // Wraps the provider exactly the way AgentContext.runAuxiliaryInference
155
+ // does: full prefix + extraMessages, breakpoint pinned to the last
156
+ // pre-extraMessages position so the agent's cache is reused.
157
+ const auxCalls: InferenceResponse[] = []
158
+ const runInference: RunInferenceFn = async (
159
+ extraMessages,
160
+ ): Promise<Result<InferenceResponse, LLMError>> => {
161
+ const messages = applyCacheBreakpoint(
162
+ [...history, ...extraMessages],
163
+ extraMessages.length,
164
+ '1h',
165
+ )
166
+ const result = await provider.inference({
167
+ model: MODEL,
168
+ systemPrompt: LARGE_SYSTEM_PROMPT,
169
+ messages,
170
+ })
171
+ if (result.ok) auxCalls.push(result.value)
172
+ return result
173
+ }
174
+
175
+ // Prime the cache with one regular inference first, otherwise the
176
+ // auxiliary call would pay full write cost — same as a real session
177
+ // where compaction always runs after at least one normal turn.
178
+ const prime = await provider.inference({
179
+ model: MODEL,
180
+ systemPrompt: LARGE_SYSTEM_PROMPT,
181
+ messages: applyCacheBreakpoint(history, 0, '1h'),
182
+ })
183
+ if (!prime.ok) {
184
+ if (prime.error.message?.includes('credit balance')) {
185
+ console.warn('⚠️ Live compaction end-to-end test skipped: credit balance too low')
186
+ return
187
+ }
188
+ throw new Error(`prime call failed: ${JSON.stringify(prime.error)}`)
189
+ }
190
+
191
+ const compactor = new ContextCompactor(silentLogger, {
192
+ // Force compaction regardless of estimator vs actual tokens.
193
+ maxTokens: 10,
194
+ keepRecentMessages: 2,
195
+ })
196
+
197
+ const result = await compactor.compact(
198
+ generateSessionId(),
199
+ generateTestAgentId(),
200
+ history,
201
+ runInference,
202
+ )
203
+
204
+ expect(result.ok).toBe(true)
205
+ if (!result.ok) return
206
+
207
+ expect(result.value.messagesRemoved).toBeGreaterThan(0)
208
+ expect(result.value.summary.trim().length).toBeGreaterThan(0)
209
+ // keepRecentMessages=2 → summary message + 2 kept = 3 total
210
+ expect(result.value.compactedMessages).toHaveLength(3)
211
+ expect(result.value.compactedMessages[0].role).toBe('user')
212
+ expect(result.value.compactedMessages[0].content as string).toContain(
213
+ '[CONVERSATION SUMMARY]',
214
+ )
215
+
216
+ // The auxiliary inference under the compactor should have served the
217
+ // prefix from cache (priming call wrote it).
218
+ expect(auxCalls).toHaveLength(1)
219
+ expect(auxCalls[0].metrics.cachedTokens ?? 0).toBeGreaterThan(1024)
220
+ }, 60_000)
221
+ })
@@ -1,10 +1,27 @@
1
1
  import { describe, expect, it } from 'bun:test'
2
+ import z from 'zod/v4'
2
3
  import { contextEvents } from '~/core/context/state.js'
3
4
  import { MockLLMProvider } from '~/core/llm/mock.js'
5
+ import type { InferenceRequest } from '~/core/llm/provider.js'
4
6
  import { ModelId } from '~/core/llm/schema.js'
7
+ import type { Preset } from '~/core/preset/index.js'
8
+ import { createTool } from '~/core/tools/definition.js'
9
+ import { ToolCallId } from '~/core/tools/schema.js'
5
10
  import { createTestPreset, TestHarness } from '~/testing/index.js'
6
11
  import { contextCompactPlugin } from './index.js'
7
12
 
13
+ /**
14
+ * Inline compaction sends the agent's regular systemPrompt and full conversation
15
+ * to the LLM, with a trailing user message containing the summarization
16
+ * instruction. We detect compaction calls by looking at that trailing message.
17
+ */
18
+ function isSummarizationRequest(request: InferenceRequest): boolean {
19
+ const last = request.messages[request.messages.length - 1]
20
+ if (!last || last.role !== 'user') return false
21
+ const content = typeof last.content === 'string' ? last.content : JSON.stringify(last.content)
22
+ return content.includes('[CONTEXT COMPACTION REQUEST]')
23
+ }
24
+
8
25
  // ============================================================================
9
26
  // Helpers
10
27
  // ============================================================================
@@ -50,7 +67,7 @@ describe('context-compact plugin', () => {
50
67
  presets: [createCompactPreset(10)],
51
68
  mockHandler: (request) => {
52
69
  // Compaction requests use CONTEXT_SUMMARY_PROMPT which contains "summarizer".
53
- if (request.systemPrompt.includes('summary') || request.systemPrompt.includes('Summarize')) {
70
+ if (isSummarizationRequest(request)) {
54
71
  return {
55
72
  content: 'Summary of conversation so far.',
56
73
  toolCalls: [],
@@ -119,7 +136,7 @@ describe('context-compact plugin', () => {
119
136
  inferenceCallCount++
120
137
 
121
138
  // Summarization requests (from context-compact plugin)
122
- if (request.systemPrompt.includes('summary') || request.systemPrompt.includes('Summarize')) {
139
+ if (isSummarizationRequest(request)) {
123
140
  return {
124
141
  content: 'Conversation summary.',
125
142
  toolCalls: [],
@@ -157,6 +174,121 @@ describe('context-compact plugin', () => {
157
174
  })
158
175
  })
159
176
 
177
+ // =========================================================================
178
+ // Pending tool results regression
179
+ // =========================================================================
180
+
181
+ describe('pending tool results', () => {
182
+ it('aux inference after a tool turn includes the tool_result before the summary instruction', async () => {
183
+ // Regression for the bug where context-compact's auxiliary inference call
184
+ // runs at a moment where `conversationHistory` ends with an assistant
185
+ // `tool_use` block but the corresponding tool_result is still in
186
+ // `pendingToolResults` (not yet committed to history). Sending
187
+ // `[..., assistant(tool_use), user(summary)]` to Anthropic 400s with
188
+ // "tool_use blocks must be followed by tool_result blocks".
189
+
190
+ const myTool = createTool({
191
+ name: 'my_tool',
192
+ description: 'returns a fixed value',
193
+ input: z.object({}),
194
+ execute: async () => ({ ok: true, value: 'tool result content' }),
195
+ })
196
+
197
+ const preset: Preset = {
198
+ id: 'test',
199
+ name: 'Tool Compaction Test',
200
+ orchestrator: {
201
+ system: 'You are a test agent.',
202
+ model: ModelId('mock'),
203
+ tools: [myTool],
204
+ agents: [],
205
+ debounceMs: 0,
206
+ },
207
+ agents: [],
208
+ plugins: [
209
+ contextCompactPlugin.configure({
210
+ compaction: {
211
+ model: ModelId('mock'),
212
+ maxTokens: 10,
213
+ // 1 so that after the tool turn, [user, assistant(tool_use)]
214
+ // splits into toCompact=[user], toKeep=[assistant(tool_use)] —
215
+ // the aux call actually runs and gets the buggy prefix.
216
+ keepRecentMessages: 1,
217
+ },
218
+ }),
219
+ ],
220
+ }
221
+
222
+ let capturedAuxRequest: InferenceRequest | undefined
223
+
224
+ const harness = new TestHarness({
225
+ systemPlugins: [contextCompactPlugin],
226
+ presets: [preset],
227
+ mockHandler: (request) => {
228
+ if (isSummarizationRequest(request)) {
229
+ capturedAuxRequest = request
230
+ return {
231
+ content: 'Summary of conversation.',
232
+ toolCalls: [],
233
+ finishReason: 'stop',
234
+ metrics: MockLLMProvider.defaultMetrics(),
235
+ }
236
+ }
237
+ // First inference (no tool messages in history yet) → emit a tool call.
238
+ const hasToolMessages = request.messages.some((m) => m.role === 'tool')
239
+ if (!hasToolMessages) {
240
+ return {
241
+ content: '',
242
+ toolCalls: [{ id: ToolCallId('tc1'), name: 'my_tool', input: {} }],
243
+ finishReason: 'tool_calls',
244
+ metrics: MockLLMProvider.defaultMetrics(),
245
+ }
246
+ }
247
+ return {
248
+ content: 'Done.',
249
+ toolCalls: [],
250
+ finishReason: 'stop',
251
+ metrics: MockLLMProvider.defaultMetrics(),
252
+ }
253
+ },
254
+ })
255
+
256
+ const session = await harness.createSession('test')
257
+ await session.sendAndWaitForIdle('Please call my_tool')
258
+
259
+ expect(capturedAuxRequest).toBeDefined()
260
+
261
+ // Every assistant message with toolCalls must be followed by a contiguous
262
+ // run of tool messages covering each tool_use id before any further
263
+ // user/assistant message. This mirrors Anthropic's API contract.
264
+ const msgs = capturedAuxRequest!.messages
265
+ for (let i = 0; i < msgs.length; i++) {
266
+ const m = msgs[i]
267
+ if (m.role !== 'assistant' || !m.toolCalls?.length) continue
268
+
269
+ const expected = new Set(m.toolCalls.map((tc) => tc.id))
270
+ const seen = new Set<string>()
271
+ for (let j = i + 1; j < msgs.length; j++) {
272
+ const next = msgs[j]
273
+ if (next.role !== 'tool') break
274
+ seen.add(next.toolCallId)
275
+ }
276
+ for (const id of expected) {
277
+ expect(seen.has(id)).toBe(true)
278
+ }
279
+ }
280
+
281
+ // Compaction must have actually succeeded — pre-fix it would Err-out
282
+ // in production (mock accepts it but the assertion above already
283
+ // catches the malformed-prefix case).
284
+ const compactedEvents = await session.getEventsByType(contextEvents, 'context_compacted')
285
+ const actualCompactions = compactedEvents.filter((e) => e.messagesRemoved > 0)
286
+ expect(actualCompactions.length).toBeGreaterThanOrEqual(1)
287
+
288
+ await harness.shutdown()
289
+ })
290
+ })
291
+
160
292
  // =========================================================================
161
293
  // Compaction failure
162
294
  // =========================================================================
@@ -170,7 +302,7 @@ describe('context-compact plugin', () => {
170
302
  mockHandler: (request) => {
171
303
  // Summarization requests — throw to simulate LLM failure.
172
304
  // MockLLMProvider only returns Err() when the handler throws.
173
- if (request.systemPrompt.includes('summary') || request.systemPrompt.includes('Summarize')) {
305
+ if (isSummarizationRequest(request)) {
174
306
  throw { type: 'server_error', message: 'LLM summarization failed' }
175
307
  }
176
308