@roj-ai/sdk 0.1.14 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/dist/bootstrap.d.ts +1 -0
  2. package/dist/bootstrap.d.ts.map +1 -1
  3. package/dist/core/agents/agent.d.ts +25 -1
  4. package/dist/core/agents/agent.d.ts.map +1 -1
  5. package/dist/core/agents/agent.js +117 -21
  6. package/dist/core/agents/agent.js.map +1 -1
  7. package/dist/core/agents/config.d.ts +7 -0
  8. package/dist/core/agents/config.d.ts.map +1 -1
  9. package/dist/core/agents/context.d.ts +10 -0
  10. package/dist/core/agents/context.d.ts.map +1 -1
  11. package/dist/core/agents/state.d.ts +11 -3
  12. package/dist/core/agents/state.d.ts.map +1 -1
  13. package/dist/core/agents/state.js.map +1 -1
  14. package/dist/core/file-store/file-store.d.ts +5 -1
  15. package/dist/core/file-store/file-store.d.ts.map +1 -1
  16. package/dist/core/file-store/file-store.js +31 -21
  17. package/dist/core/file-store/file-store.js.map +1 -1
  18. package/dist/core/image/vips-resizer.test.js +26 -14
  19. package/dist/core/image/vips-resizer.test.js.map +1 -1
  20. package/dist/core/llm/anthropic.d.ts.map +1 -1
  21. package/dist/core/llm/anthropic.js +11 -8
  22. package/dist/core/llm/anthropic.js.map +1 -1
  23. package/dist/core/llm/cache-breakpoints.d.ts +5 -1
  24. package/dist/core/llm/cache-breakpoints.d.ts.map +1 -1
  25. package/dist/core/llm/cache-breakpoints.js +10 -5
  26. package/dist/core/llm/cache-breakpoints.js.map +1 -1
  27. package/dist/core/sessions/session.d.ts.map +1 -1
  28. package/dist/core/sessions/session.js +10 -0
  29. package/dist/core/sessions/session.js.map +1 -1
  30. package/dist/core/sessions/session.test.js +5 -0
  31. package/dist/core/sessions/session.test.js.map +1 -1
  32. package/dist/core/sessions/state.d.ts.map +1 -1
  33. package/dist/core/sessions/state.js +5 -1
  34. package/dist/core/sessions/state.js.map +1 -1
  35. package/dist/core/tools/executor.test.js +1 -0
  36. package/dist/core/tools/executor.test.js.map +1 -1
  37. package/dist/plugins/agent-status/plugin.d.ts.map +1 -1
  38. package/dist/plugins/agent-status/plugin.js +18 -26
  39. package/dist/plugins/agent-status/plugin.js.map +1 -1
  40. package/dist/plugins/context-compact/compaction-live.test.d.ts +17 -0
  41. package/dist/plugins/context-compact/compaction-live.test.d.ts.map +1 -0
  42. package/dist/plugins/context-compact/compaction-live.test.js +177 -0
  43. package/dist/plugins/context-compact/compaction-live.test.js.map +1 -0
  44. package/dist/plugins/context-compact/context-compact.integration.test.js +123 -3
  45. package/dist/plugins/context-compact/context-compact.integration.test.js.map +1 -1
  46. package/dist/plugins/context-compact/context-compactor.d.ts +47 -17
  47. package/dist/plugins/context-compact/context-compactor.d.ts.map +1 -1
  48. package/dist/plugins/context-compact/context-compactor.js +60 -36
  49. package/dist/plugins/context-compact/context-compactor.js.map +1 -1
  50. package/dist/plugins/context-compact/context-compactor.test.js +69 -103
  51. package/dist/plugins/context-compact/context-compactor.test.js.map +1 -1
  52. package/dist/plugins/context-compact/plugin.d.ts +9 -2
  53. package/dist/plugins/context-compact/plugin.d.ts.map +1 -1
  54. package/dist/plugins/context-compact/plugin.js +8 -4
  55. package/dist/plugins/context-compact/plugin.js.map +1 -1
  56. package/dist/plugins/filesystem/filesystem.integration.test.js +36 -0
  57. package/dist/plugins/filesystem/filesystem.integration.test.js.map +1 -1
  58. package/dist/plugins/filesystem/plugin.d.ts.map +1 -1
  59. package/dist/plugins/filesystem/plugin.js +8 -6
  60. package/dist/plugins/filesystem/plugin.js.map +1 -1
  61. package/dist/plugins/mailbox/mailbox.integration.test.js +9 -16
  62. package/dist/plugins/mailbox/mailbox.integration.test.js.map +1 -1
  63. package/dist/plugins/resources/plugin.d.ts.map +1 -1
  64. package/dist/plugins/resources/plugin.js +4 -1
  65. package/dist/plugins/resources/plugin.js.map +1 -1
  66. package/dist/plugins/user-chat/plugin.d.ts +2 -0
  67. package/dist/plugins/user-chat/plugin.d.ts.map +1 -1
  68. package/dist/plugins/user-chat/plugin.js +47 -3
  69. package/dist/plugins/user-chat/plugin.js.map +1 -1
  70. package/dist/plugins/user-chat/schema.d.ts +10 -0
  71. package/dist/plugins/user-chat/schema.d.ts.map +1 -1
  72. package/dist/plugins/user-chat/schema.js +1 -0
  73. package/dist/plugins/user-chat/schema.js.map +1 -1
  74. package/dist/plugins/user-chat/user-chat.integration.test.js +86 -0
  75. package/dist/plugins/user-chat/user-chat.integration.test.js.map +1 -1
  76. package/package.json +2 -2
  77. package/src/core/agents/agent.ts +134 -20
  78. package/src/core/agents/config.ts +7 -0
  79. package/src/core/agents/context.ts +11 -0
  80. package/src/core/agents/state.ts +11 -4
  81. package/src/core/file-store/file-store.ts +38 -18
  82. package/src/core/image/vips-resizer.test.ts +26 -15
  83. package/src/core/llm/anthropic.ts +19 -12
  84. package/src/core/llm/cache-breakpoints.ts +15 -6
  85. package/src/core/sessions/session.test.ts +6 -0
  86. package/src/core/sessions/session.ts +12 -0
  87. package/src/core/sessions/state.ts +5 -1
  88. package/src/core/tools/executor.test.ts +1 -0
  89. package/src/plugins/agent-status/plugin.ts +18 -25
  90. package/src/plugins/context-compact/compaction-live.test.ts +221 -0
  91. package/src/plugins/context-compact/context-compact.integration.test.ts +135 -3
  92. package/src/plugins/context-compact/context-compactor.test.ts +71 -110
  93. package/src/plugins/context-compact/context-compactor.ts +88 -43
  94. package/src/plugins/context-compact/plugin.ts +19 -10
  95. package/src/plugins/filesystem/filesystem.integration.test.ts +44 -0
  96. package/src/plugins/filesystem/plugin.ts +8 -6
  97. package/src/plugins/mailbox/mailbox.integration.test.ts +12 -18
  98. package/src/plugins/resources/plugin.ts +4 -1
  99. package/src/plugins/user-chat/plugin.ts +60 -3
  100. package/src/plugins/user-chat/schema.ts +10 -1
  101. package/src/plugins/user-chat/user-chat.integration.test.ts +99 -0
@@ -46,7 +46,7 @@ import { toolEvents } from '~/core/tools/state.js'
46
46
  import { getAgentUnconsumedMailbox, selectMailboxState } from '~/plugins/mailbox/query.js'
47
47
  import { AGENT_BASE_BRIEFING } from '~/prompts/base.js'
48
48
  import { buildEnvironmentSection } from '~/prompts/builder.js'
49
- import { Err, type Result } from '~/lib/utils/result.js'
49
+ import { Err, Ok, type Result } from '~/lib/utils/result.js'
50
50
  import type { Logger } from '../../lib/logger/logger.js'
51
51
  import type { SessionContext } from '../sessions/context.js'
52
52
  import type { SessionStore } from '../sessions/session-store.js'
@@ -54,7 +54,7 @@ import type { SessionState } from '../sessions/state.js'
54
54
  import type { SessionEnvironment, ToolExecutor } from '../tools/index.js'
55
55
  import type { AgentContext } from './context.js'
56
56
  import { sanitizeLLMResponse } from './response-sanitizer.js'
57
- import { withLLMRetry } from './retry.js'
57
+ import { isRetryableLLMError, withLLMRetry } from './retry.js'
58
58
 
59
59
  // ============================================================================
60
60
  // Types
@@ -81,6 +81,14 @@ export interface AgentConfig<TInput = unknown> {
81
81
  input?: z.ZodType<TInput>
82
82
  /** Per-plugin agent-level configs */
83
83
  plugins?: AgentPluginConfig[]
84
+ /**
85
+ * Prompt cache TTL for breakpoints emitted by this agent's inference calls.
86
+ * '1h' opts into Anthropic's extended cache tier (write 2× input, read 0.1×)
87
+ * — useful for long-lived agents (e.g. an orchestrator that waits minutes
88
+ * between user turns) where the default 5-minute TTL would expire and force
89
+ * full re-uploads. Omit (or '5m') for the standard tier.
90
+ */
91
+ cacheTtl?: '5m' | '1h'
84
92
  }
85
93
 
86
94
  /**
@@ -378,6 +386,58 @@ export class Agent {
378
386
  return this.scheduled
379
387
  }
380
388
 
389
+ /**
390
+ * Run a one-off LLM call using the agent's current system prompt, tools, and
391
+ * conversation prefix, with extra trailing messages appended. Does not emit
392
+ * agent inference events and does not mutate conversation history; the call
393
+ * is logged via the LLM provider's normal logging pipeline.
394
+ *
395
+ * Intended for plugins that need a constrained side-channel inference
396
+ * leveraging the agent's already-warm prompt cache — e.g. context-compact
397
+ * uses this to ask the same model for a summary, paying only the trailing
398
+ * uncached portion plus output tokens.
399
+ *
400
+ * The cache breakpoint is placed so that everything up to (but excluding)
401
+ * `extraMessages` is cacheable, matching the previous regular inference.
402
+ */
403
+ async runAuxiliaryInference(extraMessages: LLMMessage[]): Promise<Result<InferenceResponse, LLMError>> {
404
+ const agentState = this.state
405
+ if (!agentState) {
406
+ return Err({ type: 'invalid_request', message: `Agent ${this.id} has no state` })
407
+ }
408
+
409
+ // pendingToolResults aren't in conversationHistory yet (they get committed
410
+ // by the next inference_completed), but the assistant tool_use that
411
+ // demands them IS at the tail of history. Without these inlined, an
412
+ // aux call placed mid-tool-turn (e.g. by the context-compact plugin's
413
+ // beforeInference hook) lands as `[…, assistant(tool_use), extraMessages]`
414
+ // and Anthropic rejects with "tool_use blocks must be followed by
415
+ // tool_result blocks".
416
+ const pendingToolResultMessages = this.buildPendingMessages(agentState)
417
+ const baseMessages: LLMMessage[] = [
418
+ ...agentState.preamble,
419
+ ...agentState.conversationHistory,
420
+ ...pendingToolResultMessages,
421
+ ]
422
+ const messages = [...baseMessages, ...extraMessages]
423
+ const cachedMessages = applyCacheBreakpoint(messages, extraMessages.length, this.config.cacheTtl)
424
+
425
+ const request: InferenceRequest = {
426
+ model: this.config.model,
427
+ systemPrompt: this.buildSystemPrompt(),
428
+ messages: cachedMessages,
429
+ tools: this.tools.size > 0 ? [...this.tools.values()] : undefined,
430
+ }
431
+
432
+ return this.llmProvider.inference(request, {
433
+ sessionId: this.store.sessionId,
434
+ agentId: this.id,
435
+ signal: this.abortController.signal,
436
+ fileStore: this.fileStore,
437
+ providers: this.llmProviders,
438
+ })
439
+ }
440
+
381
441
  // ============================================================================
382
442
  // Private methods - Processing
383
443
  // ============================================================================
@@ -485,7 +545,11 @@ export class Agent {
485
545
 
486
546
  // Mark cache breakpoint — ephemeral session-context suffix is excluded
487
547
  // so it doesn't invalidate the cache on every inference.
488
- const cachedMessages = applyCacheBreakpoint(messages, ephemeralParts.length > 0 ? 1 : 0)
548
+ const cachedMessages = applyCacheBreakpoint(
549
+ messages,
550
+ ephemeralParts.length > 0 ? 1 : 0,
551
+ this.config.cacheTtl,
552
+ )
489
553
 
490
554
  // 5. LLM inference (with retry)
491
555
  const request: InferenceRequest = {
@@ -513,6 +577,7 @@ export class Agent {
513
577
  // can notify the parent.
514
578
  let llmResponse: Result<InferenceResponse, LLMError>
515
579
  let emptyAttempts = 0
580
+ let nudgeInjected = false
516
581
  while (true) {
517
582
  llmResponse = await withLLMRetry(
518
583
  () =>
@@ -538,13 +603,20 @@ export class Agent {
538
603
  if (!isEmptyStop) break
539
604
 
540
605
  if (emptyAttempts >= Agent.MAX_EMPTY_RESPONSE_RETRIES) {
541
- this.logger.error('LLM returned empty stop response after retries', undefined, {
606
+ this.logger.warn('LLM returned empty stop response after retries, coalescing to WAITING', {
542
607
  agentId: this.id,
543
608
  attempts: emptyAttempts + 1,
544
609
  })
545
- llmResponse = Err({
546
- type: 'server_error',
547
- message: `LLM returned empty response (no content, no tool calls) after ${emptyAttempts + 1} attempts`,
610
+ // Coalesce to WAITING instead of hard-erroring. The LLM accepted the
611
+ // message but couldn't produce output 3× in a row — treating this as
612
+ // a terminal failure was too aggressive (errored state, error message
613
+ // to parent, mailbox tokens stuck unconsumed). Synthetic WAITING goes
614
+ // through the normal success path: mailbox tokens get consumed,
615
+ // limits-guard skips it in dedup (existing WAITING exception), and
616
+ // the agent quietly transitions to pending → complete.
617
+ llmResponse = Ok({
618
+ ...llmResponse.value,
619
+ content: 'WAITING',
548
620
  })
549
621
  break
550
622
  }
@@ -553,24 +625,51 @@ export class Agent {
553
625
  agentId: this.id,
554
626
  attempt: emptyAttempts,
555
627
  })
556
- }
557
628
 
558
- // Mark plugin messages as consumed (regardless of inference outcome
559
- // messages are already appended to conversationHistory via inference_started)
560
- {
561
- const currentAgentState = this.state
562
- if (currentAgentState) {
563
- const ctx = this.buildAgentContext(currentAgentState)
564
- for (const dequeued of pluginDequeued) {
565
- if (!dequeued.plugin.dequeue) continue
566
- const pluginCtx = this.buildPluginHookContext(dequeued.plugin, ctx)
567
- await dequeued.plugin.dequeue.markConsumed(pluginCtx, dequeued.token)
568
- }
629
+ // Inject a one-shot nudge after the first empty response. Appended after the
630
+ // existing cache breakpoint, so the cached prefix still hits — only the new
631
+ // tail is uncached. Uses the canonical "WAITING" literal so the response is
632
+ // recognized by the sanitizer and limits-guard plugin.
633
+ if (!nudgeInjected) {
634
+ request.messages.push({
635
+ role: 'user',
636
+ content:
637
+ '<system-nudge>Your previous response was empty (no text and no tool calls). '
638
+ + 'Either produce a meaningful response — text or tool calls — or, if you '
639
+ + 'have nothing to do, output only the word WAITING on its own line per the '
640
+ + 'waiting protocol.</system-nudge>',
641
+ })
642
+ nudgeInjected = true
569
643
  }
570
644
  }
571
645
 
572
646
  if (!llmResponse.ok) {
573
- // 4a. Inference failed
647
+ // Aborted (shutdown / interruption): bail silently. Emitting inference_failed
648
+ // would leave the agent in 'errored' with unconsumed plugin tokens — decide()
649
+ // would then loop resume_from_error ↔ infer forever (each retry re-aborts).
650
+ if (llmResponse.error.type === 'aborted') return
651
+
652
+ // Non-retryable failures (invalid_request, context_length) will fail the same
653
+ // way on every retry. Mark plugin tokens consumed before emitting
654
+ // inference_failed so decide()'s resume_from_error path doesn't re-feed the
655
+ // same message into a doomed retry loop. Retryable errors (rate_limit,
656
+ // server_error, network_error, timeout) keep the preserve-for-retry semantics.
657
+ if (!isRetryableLLMError(llmResponse.error)) {
658
+ const errorAgentState = this.state
659
+ if (errorAgentState) {
660
+ const errorCtx = this.buildAgentContext(errorAgentState)
661
+ for (const dequeued of pluginDequeued) {
662
+ if (!dequeued.plugin.dequeue) continue
663
+ const pluginCtx = this.buildPluginHookContext(dequeued.plugin, errorCtx)
664
+ await dequeued.plugin.dequeue.markConsumed(pluginCtx, dequeued.token)
665
+ }
666
+ }
667
+ }
668
+
669
+ // 4a. Inference failed — emit inference_failed without marking plugin messages
670
+ // consumed. The reducer leaves pendingToolResults / mailbox tokens intact so the
671
+ // next inference rebuilds the same turn; marking consumed here would drop the
672
+ // mailbox tokens and the retry would be missing the user message.
574
673
  await this.store.emit(withSessionId(
575
674
  this.store.sessionId,
576
675
  llmEvents.create('inference_failed', {
@@ -587,6 +686,20 @@ export class Agent {
587
686
  return
588
687
  }
589
688
 
689
+ // Mark plugin messages as consumed only after successful inference. They've been
690
+ // appended to conversationHistory via the inference_completed reducer below.
691
+ {
692
+ const currentAgentState = this.state
693
+ if (currentAgentState) {
694
+ const ctx = this.buildAgentContext(currentAgentState)
695
+ for (const dequeued of pluginDequeued) {
696
+ if (!dequeued.plugin.dequeue) continue
697
+ const pluginCtx = this.buildPluginHookContext(dequeued.plugin, ctx)
698
+ await dequeued.plugin.dequeue.markConsumed(pluginCtx, dequeued.token)
699
+ }
700
+ }
701
+ }
702
+
590
703
  // 4c. Sanitize response to prevent hallucination
591
704
  const sanitized = sanitizeLLMResponse(llmResponse.value.content)
592
705
 
@@ -866,6 +979,7 @@ export class Agent {
866
979
  agentConfig: this.config,
867
980
  input: agentState.typedInput,
868
981
  parentId: agentState.parentId,
982
+ runAuxiliaryInference: (extraMessages) => this.runAuxiliaryInference(extraMessages),
869
983
  }
870
984
  }
871
985
 
@@ -52,6 +52,13 @@ export interface BaseAgentConfig<TInput = unknown> {
52
52
  services?: ServiceConfig[]
53
53
  /** LLM middleware chain applied per-agent (runs after preset-level middleware) */
54
54
  llmMiddleware?: LLMMiddleware[]
55
+ /**
56
+ * Prompt cache TTL for this agent's inference breakpoints.
57
+ * '1h' opts into Anthropic's extended cache tier for long-lived agents
58
+ * (e.g. an orchestrator that waits minutes between user turns). Defaults
59
+ * to the standard 5-minute tier.
60
+ */
61
+ cacheTtl?: '5m' | '1h'
55
62
  }
56
63
 
57
64
  /**
@@ -1,3 +1,5 @@
1
+ import type { InferenceResponse, LLMError, LLMMessage } from '~/core/llm/provider.js'
2
+ import type { Result } from '~/lib/utils/result.js'
1
3
  import { SessionContext } from '../sessions/context.js'
2
4
  import { AgentConfig } from './agent.js'
3
5
  import { AgentId } from './schema.js'
@@ -16,4 +18,13 @@ export type AgentContext<TInput = unknown> =
16
18
  input: TInput
17
19
  /** The parent agent ID (null for root agents) */
18
20
  parentId: AgentId | null
21
+
22
+ /**
23
+ * Run a one-off LLM call reusing the agent's current system prompt, tools,
24
+ * and conversation prefix, with extra trailing messages appended. Lets
25
+ * plugins do side-channel inferences (e.g. summarization) while sharing
26
+ * the agent's warm prompt cache. See Agent.runAuxiliaryInference for the
27
+ * full contract.
28
+ */
29
+ runAuxiliaryInference: (extraMessages: LLMMessage[]) => Promise<Result<InferenceResponse, LLMError>>
19
30
  }
@@ -8,6 +8,7 @@ import { agentIdSchema } from '~/core/agents/schema.js'
8
8
  import { createEventsFactory } from '~/core/events/types'
9
9
  import type { ToolResultContent } from '~/core/llm/llm-log-types.js'
10
10
  import type { ChatMessageContentItem } from '~/core/llm/llm-log-types.js'
11
+ import type { LLMMetrics } from '~/core/llm/state.js'
11
12
  import type { PendingToolResult, ToolCallId } from '~/core/tools/schema.js'
12
13
  import { MessageId } from '../../plugins/mailbox/schema.js'
13
14
 
@@ -107,11 +108,15 @@ export type AgentPauseReason = 'limit' | 'handler' | 'manual'
107
108
 
108
109
  /**
109
110
  * Prompt cache breakpoint marker.
110
- * When set on an LLMMessage, providers place `cache_control: { type: 'ephemeral' }`
111
- * on the LAST content block of the mapped message (regardless of block type),
112
- * marking it as a prompt cache checkpoint.
111
+ * When set on an LLMMessage, providers place `cache_control` on the LAST
112
+ * content block of the mapped message (regardless of block type), marking it
113
+ * as a prompt cache checkpoint.
114
+ *
115
+ * `ttl: '1h'` opts into Anthropic's 1-hour cache tier (write cost 2× input,
116
+ * read still 0.1×). Useful for long-lived agents whose prompt cache would
117
+ * otherwise expire between user turns. Omit for the default 5-minute tier.
113
118
  */
114
- export type LLMMessageCacheControl = { type: 'ephemeral' }
119
+ export type LLMMessageCacheControl = { type: 'ephemeral'; ttl?: '5m' | '1h' }
115
120
 
116
121
  /**
117
122
  * User message - from mailbox or direct input.
@@ -204,6 +209,8 @@ export interface AgentState {
204
209
  pauseReason?: AgentPauseReason
205
210
  /** Human-readable pause message */
206
211
  pauseMessage?: string
212
+ /** Metrics from the most recent completed inference — used by plugins (e.g. context-compact) to size context against the provider-reported truth. */
213
+ lastInferenceMetrics?: LLMMetrics
207
214
  }
208
215
 
209
216
  // ============================================================================
@@ -92,35 +92,55 @@ export class SessionFileStore implements FileStore {
92
92
  }
93
93
  }
94
94
 
95
- async list(path: string): Promise<Result<FileEntry[], string>> {
95
+ async list(
96
+ path: string,
97
+ options?: { maxDepth?: number; gitIgnore?: boolean },
98
+ ): Promise<Result<FileEntry[], string>> {
96
99
  const resolved = this.resolvePath(path)
97
100
  if (!resolved.ok) return resolved
98
101
 
102
+ const maxDepth = options?.maxDepth ?? 1
103
+ if (maxDepth < 1) return Ok([])
104
+
99
105
  try {
100
- const items = await this.fs.readdir(resolved.value, { withFileTypes: true })
101
106
  const entries: FileEntry[] = []
102
- for (const item of items) {
103
- let type: FileEntry['type']
104
- let size: number | undefined
105
- if (item.isFile()) {
106
- type = 'file'
107
- const s = await this.fs.stat(join(resolved.value, item.name))
108
- size = s.size
109
- } else if (item.isDirectory()) {
110
- type = 'directory'
111
- } else if (item.isSymbolicLink()) {
112
- type = 'symlink'
113
- } else {
114
- type = 'other'
115
- }
116
- entries.push({ name: item.name, type, size })
117
- }
107
+ await this.walkInto(resolved.value, '', maxDepth, entries)
118
108
  return Ok(entries)
119
109
  } catch {
120
110
  return Err(`Directory not found: ${path}`)
121
111
  }
122
112
  }
123
113
 
114
+ private async walkInto(
115
+ absDir: string,
116
+ relPrefix: string,
117
+ remainingDepth: number,
118
+ out: FileEntry[],
119
+ ): Promise<void> {
120
+ const items = await this.fs.readdir(absDir, { withFileTypes: true })
121
+ for (const item of items) {
122
+ const relName = relPrefix ? `${relPrefix}/${item.name}` : item.name
123
+ let type: FileEntry['type']
124
+ let size: number | undefined
125
+ if (item.isFile()) {
126
+ type = 'file'
127
+ const s = await this.fs.stat(join(absDir, item.name))
128
+ size = s.size
129
+ } else if (item.isDirectory()) {
130
+ type = 'directory'
131
+ } else if (item.isSymbolicLink()) {
132
+ type = 'symlink'
133
+ } else {
134
+ type = 'other'
135
+ }
136
+ out.push({ name: relName, type, size })
137
+
138
+ if (type === 'directory' && remainingDepth > 1) {
139
+ await this.walkInto(join(absDir, item.name), relName, remainingDepth - 1, out)
140
+ }
141
+ }
142
+ }
143
+
124
144
  async remove(path: string): Promise<Result<void, string>> {
125
145
  const resolved = this.resolvePath(path)
126
146
  if (!resolved.ok) return resolved
@@ -1,25 +1,36 @@
1
- import { afterEach, describe, expect, it, mock, spyOn } from 'bun:test'
2
- import * as childProcess from 'node:child_process'
1
+ import { afterEach, describe, expect, it, spyOn } from 'bun:test'
2
+ import { tmpdir } from 'node:os'
3
+ import { VipsImageResizer } from './vips-resizer.js'
4
+ import { createNodeFileSystem } from '~/testing/node-platform.js'
5
+ import type { ExecFileResult, ProcessRunner } from '~/platform/process.js'
3
6
 
4
7
  type ExecFileCallback = (error: Error | null, stdout: string, stderr: string) => void
5
8
  let execFileImpl: (cmd: string, args: string[], opts: unknown, cb: ExecFileCallback) => void = () => {}
6
9
 
7
- mock.module('node:child_process', () => ({
8
- ...childProcess,
9
- execFile: (cmd: string, args: string[], opts: unknown, cb: ExecFileCallback) => execFileImpl(cmd, args, opts, cb),
10
- }))
11
-
12
- const { VipsImageResizer } = await import('./vips-resizer.js')
13
- const { createNodePlatform } = await import('~/testing/node-platform.js')
10
+ // Fake ProcessRunner — wires execFile calls through the test-controlled
11
+ // `execFileImpl`. Avoids `mock.module('node:child_process')` because
12
+ // node-platform.ts promisifies execFile at module-load and other test files
13
+ // may load it first, freezing the binding to the real implementation.
14
+ function createFakeProcessRunner(): ProcessRunner {
15
+ return {
16
+ execFile: (file, args, options) =>
17
+ new Promise<ExecFileResult>((resolve, reject) => {
18
+ execFileImpl(file, args, options ?? {}, (err, stdout, stderr) => {
19
+ if (err) reject(err)
20
+ else resolve({ stdout: stdout ?? '', stderr: stderr ?? '' })
21
+ })
22
+ }),
23
+ spawn: () => {
24
+ throw new Error('spawn not implemented in test fake')
25
+ },
26
+ }
27
+ }
14
28
 
15
- // Test-scoped helper — routes through createNodePlatform so the module-level
16
- // node:child_process mock still intercepts execFile calls made by ProcessRunner.
17
29
  function createResizer(maxDimension?: number): InstanceType<typeof VipsImageResizer> {
18
- const platform = createNodePlatform()
19
30
  return new VipsImageResizer({
20
- fs: platform.fs,
21
- process: platform.process,
22
- tmpDir: platform.tmpDir,
31
+ fs: createNodeFileSystem(),
32
+ process: createFakeProcessRunner(),
33
+ tmpDir: tmpdir(),
23
34
  maxDimension,
24
35
  })
25
36
  }
@@ -85,10 +85,15 @@ interface AnthropicErrorResponse {
85
85
  // Request body types
86
86
  // ============================================================================
87
87
 
88
+ interface AnthropicCacheControl {
89
+ type: 'ephemeral'
90
+ ttl?: '5m' | '1h'
91
+ }
92
+
88
93
  interface AnthropicTextBlockParam {
89
94
  type: 'text'
90
95
  text: string
91
- cache_control?: { type: 'ephemeral' }
96
+ cache_control?: AnthropicCacheControl
92
97
  }
93
98
 
94
99
  interface AnthropicImageBlockParam {
@@ -96,7 +101,7 @@ interface AnthropicImageBlockParam {
96
101
  source:
97
102
  | { type: 'base64'; media_type: string; data: string }
98
103
  | { type: 'url'; url: string }
99
- cache_control?: { type: 'ephemeral' }
104
+ cache_control?: AnthropicCacheControl
100
105
  }
101
106
 
102
107
  interface AnthropicToolUseBlockParam {
@@ -104,7 +109,7 @@ interface AnthropicToolUseBlockParam {
104
109
  id: string
105
110
  name: string
106
111
  input: unknown
107
- cache_control?: { type: 'ephemeral' }
112
+ cache_control?: AnthropicCacheControl
108
113
  }
109
114
 
110
115
  interface AnthropicToolResultBlockParam {
@@ -112,7 +117,7 @@ interface AnthropicToolResultBlockParam {
112
117
  tool_use_id: string
113
118
  content: string | Array<AnthropicTextBlockParam | AnthropicImageBlockParam>
114
119
  is_error?: boolean
115
- cache_control?: { type: 'ephemeral' }
120
+ cache_control?: AnthropicCacheControl
116
121
  }
117
122
 
118
123
  type AnthropicContentBlockParam =
@@ -127,19 +132,19 @@ interface AnthropicMessageParam {
127
132
  }
128
133
 
129
134
  /**
130
- * Add `cache_control: { type: 'ephemeral' }` to the LAST content block of an
131
- * AnthropicMessageParam, regardless of block type. Converts string content to
132
- * a single text block first so the mark has a place to live. Mutates in place
133
- * so the cache breakpoint survives subsequent `mergeConsecutiveMessages`.
135
+ * Add `cache_control` to the LAST content block of an AnthropicMessageParam,
136
+ * regardless of block type. Converts string content to a single text block
137
+ * first so the mark has a place to live. Mutates in place so the cache
138
+ * breakpoint survives subsequent `mergeConsecutiveMessages`.
134
139
  */
135
- function applyCacheControlToLastBlock(msg: AnthropicMessageParam): void {
140
+ function applyCacheControlToLastBlock(msg: AnthropicMessageParam, cacheControl: AnthropicCacheControl): void {
136
141
  if (typeof msg.content === 'string') {
137
- msg.content = [{ type: 'text', text: msg.content, cache_control: { type: 'ephemeral' } }]
142
+ msg.content = [{ type: 'text', text: msg.content, cache_control: cacheControl }]
138
143
  return
139
144
  }
140
145
  if (msg.content.length === 0) return
141
146
  const lastIdx = msg.content.length - 1
142
- msg.content[lastIdx] = { ...msg.content[lastIdx], cache_control: { type: 'ephemeral' } }
147
+ msg.content[lastIdx] = { ...msg.content[lastIdx], cache_control: cacheControl }
143
148
  }
144
149
 
145
150
  interface AnthropicToolParam {
@@ -366,7 +371,9 @@ export class AnthropicProvider implements RoutableLLMProvider {
366
371
  private async mapMessage(msg: LLMMessage, context?: InferenceContext): Promise<AnthropicMessageParam> {
367
372
  const mapped = await this.mapMessageContent(msg, context)
368
373
  if (msg.cacheControl) {
369
- applyCacheControlToLastBlock(mapped)
374
+ const cc: AnthropicCacheControl = { type: 'ephemeral' }
375
+ if (msg.cacheControl.ttl) cc.ttl = msg.cacheControl.ttl
376
+ applyCacheControlToLastBlock(mapped, cc)
370
377
  }
371
378
  return mapped
372
379
  }
@@ -1,4 +1,4 @@
1
- import type { LLMMessage } from '~/core/agents/state.js'
1
+ import type { LLMMessage, LLMMessageCacheControl } from '~/core/agents/state.js'
2
2
 
3
3
  /**
4
4
  * Mark the prompt cache breakpoint on a message list.
@@ -13,25 +13,34 @@ import type { LLMMessage } from '~/core/agents/state.js'
13
13
  * Target index is `messages.length - 1 - uncachedSuffixCount`. The suffix is
14
14
  * the tail of messages that must remain fresh (e.g. ephemeral session context
15
15
  * rebuilt each inference).
16
+ *
17
+ * `ttl` opts into Anthropic's 1-hour cache tier (write cost 2× input, read
18
+ * still 0.1×). Useful for long-lived agents where the default 5-minute TTL
19
+ * would expire between user turns. Omit for the default 5-minute tier.
16
20
  */
17
- export function applyCacheBreakpoint(messages: LLMMessage[], uncachedSuffixCount: number): LLMMessage[] {
21
+ export function applyCacheBreakpoint(
22
+ messages: LLMMessage[],
23
+ uncachedSuffixCount: number,
24
+ ttl?: '5m' | '1h',
25
+ ): LLMMessage[] {
18
26
  const idx = messages.length - 1 - uncachedSuffixCount
19
27
  if (idx < 0) return messages
20
28
 
29
+ const cacheControl: LLMMessageCacheControl = ttl ? { type: 'ephemeral', ttl } : { type: 'ephemeral' }
21
30
  const target = messages[idx]
22
31
  const result = [...messages]
23
32
  switch (target.role) {
24
33
  case 'user':
25
- result[idx] = { ...target, cacheControl: { type: 'ephemeral' } }
34
+ result[idx] = { ...target, cacheControl }
26
35
  break
27
36
  case 'assistant':
28
- result[idx] = { ...target, cacheControl: { type: 'ephemeral' } }
37
+ result[idx] = { ...target, cacheControl }
29
38
  break
30
39
  case 'system':
31
- result[idx] = { ...target, cacheControl: { type: 'ephemeral' } }
40
+ result[idx] = { ...target, cacheControl }
32
41
  break
33
42
  case 'tool':
34
- result[idx] = { ...target, cacheControl: { type: 'ephemeral' } }
43
+ result[idx] = { ...target, cacheControl }
35
44
  break
36
45
  }
37
46
  return result
@@ -712,6 +712,8 @@ describe('applyEvent', () => {
712
712
  expect(session.agents.get(agentId)!.pendingMessages).toHaveLength(2)
713
713
  expect(session.agents.get(agentId)!.status).toBe('inferring')
714
714
 
715
+ const historyLenBeforeFailure = session.agents.get(agentId)!.conversationHistory.length
716
+
715
717
  // 6. Inference fails
716
718
  session = applyEvent(
717
719
  session,
@@ -735,6 +737,10 @@ describe('applyEvent', () => {
735
737
  expect(getAgentMailbox(selectMailboxState(session), agentId)[0].consumed).toBe(false)
736
738
  // status is errored
737
739
  expect(agent.status).toBe('errored')
740
+ // conversationHistory NOT extended — pendingMessages are dropped, not promoted.
741
+ // Otherwise tool results would appear both in history and in pendingToolResults,
742
+ // duplicating them on the next inference (Bedrock-style provider rejects 400).
743
+ expect(agent.conversationHistory).toHaveLength(historyLenBeforeFailure)
738
744
  })
739
745
  })
740
746
 
@@ -494,6 +494,15 @@ export class Session {
494
494
  }
495
495
 
496
496
  const result = await methodDef.handler(ctx, parsed.data)
497
+
498
+ // Plugin methods can mutate dequeue state (uploads.upload adds to pending,
499
+ // resources.inject can too) without explicitly calling ctx.scheduleAgent.
500
+ // Schedule every agent — scheduleProcessing is idempotent + debounced, and
501
+ // decide() shortcircuits to idle/complete when no work is actually pending.
502
+ for (const agent of this.agents.values()) {
503
+ agent.scheduleProcessing()
504
+ }
505
+
497
506
  return result
498
507
  }
499
508
 
@@ -795,6 +804,7 @@ export class Session {
795
804
  checkIntervalMs: orch.checkIntervalMs,
796
805
  input: orch.input,
797
806
  plugins: withServicePluginConfig(orch),
807
+ cacheTtl: orch.cacheTtl,
798
808
  }
799
809
  }
800
810
 
@@ -810,6 +820,7 @@ export class Session {
810
820
  checkIntervalMs: comm.checkIntervalMs,
811
821
  input: comm.input,
812
822
  plugins: withServicePluginConfig(comm),
823
+ cacheTtl: comm.cacheTtl,
813
824
  }
814
825
  }
815
826
 
@@ -828,6 +839,7 @@ export class Session {
828
839
  checkIntervalMs: agentDef.checkIntervalMs,
829
840
  input: agentDef.input,
830
841
  plugins: withServicePluginConfig(agentDef),
842
+ cacheTtl: agentDef.cacheTtl,
831
843
  }
832
844
  }
833
845
  }
@@ -207,15 +207,19 @@ export const coreReducer = createTypedReducer(
207
207
  pendingToolCalls: toolCalls,
208
208
  pendingMessages: [],
209
209
  pendingToolResults: [],
210
+ lastInferenceMetrics: event.metrics,
210
211
  }
211
212
  })
212
213
  }
213
214
 
214
215
  case 'inference_failed':
216
+ // Failure is a clean rollback: pendingMessages are dropped (not promoted to history)
217
+ // and pendingToolResults / mailbox tokens stay intact so the next inference
218
+ // rebuilds the same turn. Runtime must skip markConsumed on failure to preserve
219
+ // mailbox tokens — see runInference().
215
220
  return updateAgent(state, event.agentId, (agent) => ({
216
221
  ...agent,
217
222
  status: 'errored',
218
- conversationHistory: [...agent.conversationHistory, ...agent.pendingMessages],
219
223
  pendingMessages: [],
220
224
  }))
221
225
 
@@ -51,6 +51,7 @@ const createTestContext = (): ToolContext => {
51
51
  agentConfig: { systemPrompt: 'test', model: ModelId('test'), spawnableAgents: [] },
52
52
  input: undefined,
53
53
  parentId: null,
54
+ runAuxiliaryInference: async () => Err({ type: 'invalid_request', message: 'not implemented in test' }),
54
55
  }
55
56
  }
56
57