@roj-ai/sdk 0.1.14 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bootstrap.d.ts +1 -0
- package/dist/bootstrap.d.ts.map +1 -1
- package/dist/core/agents/agent.d.ts +25 -1
- package/dist/core/agents/agent.d.ts.map +1 -1
- package/dist/core/agents/agent.js +117 -21
- package/dist/core/agents/agent.js.map +1 -1
- package/dist/core/agents/config.d.ts +7 -0
- package/dist/core/agents/config.d.ts.map +1 -1
- package/dist/core/agents/context.d.ts +10 -0
- package/dist/core/agents/context.d.ts.map +1 -1
- package/dist/core/agents/state.d.ts +11 -3
- package/dist/core/agents/state.d.ts.map +1 -1
- package/dist/core/agents/state.js.map +1 -1
- package/dist/core/file-store/file-store.d.ts +5 -1
- package/dist/core/file-store/file-store.d.ts.map +1 -1
- package/dist/core/file-store/file-store.js +31 -21
- package/dist/core/file-store/file-store.js.map +1 -1
- package/dist/core/image/vips-resizer.test.js +26 -14
- package/dist/core/image/vips-resizer.test.js.map +1 -1
- package/dist/core/llm/anthropic.d.ts.map +1 -1
- package/dist/core/llm/anthropic.js +11 -8
- package/dist/core/llm/anthropic.js.map +1 -1
- package/dist/core/llm/cache-breakpoints.d.ts +5 -1
- package/dist/core/llm/cache-breakpoints.d.ts.map +1 -1
- package/dist/core/llm/cache-breakpoints.js +10 -5
- package/dist/core/llm/cache-breakpoints.js.map +1 -1
- package/dist/core/sessions/session.d.ts.map +1 -1
- package/dist/core/sessions/session.js +10 -0
- package/dist/core/sessions/session.js.map +1 -1
- package/dist/core/sessions/session.test.js +5 -0
- package/dist/core/sessions/session.test.js.map +1 -1
- package/dist/core/sessions/state.d.ts.map +1 -1
- package/dist/core/sessions/state.js +5 -1
- package/dist/core/sessions/state.js.map +1 -1
- package/dist/core/tools/executor.test.js +1 -0
- package/dist/core/tools/executor.test.js.map +1 -1
- package/dist/plugins/agent-status/plugin.d.ts.map +1 -1
- package/dist/plugins/agent-status/plugin.js +18 -26
- package/dist/plugins/agent-status/plugin.js.map +1 -1
- package/dist/plugins/context-compact/compaction-live.test.d.ts +17 -0
- package/dist/plugins/context-compact/compaction-live.test.d.ts.map +1 -0
- package/dist/plugins/context-compact/compaction-live.test.js +177 -0
- package/dist/plugins/context-compact/compaction-live.test.js.map +1 -0
- package/dist/plugins/context-compact/context-compact.integration.test.js +123 -3
- package/dist/plugins/context-compact/context-compact.integration.test.js.map +1 -1
- package/dist/plugins/context-compact/context-compactor.d.ts +47 -17
- package/dist/plugins/context-compact/context-compactor.d.ts.map +1 -1
- package/dist/plugins/context-compact/context-compactor.js +60 -36
- package/dist/plugins/context-compact/context-compactor.js.map +1 -1
- package/dist/plugins/context-compact/context-compactor.test.js +69 -103
- package/dist/plugins/context-compact/context-compactor.test.js.map +1 -1
- package/dist/plugins/context-compact/plugin.d.ts +9 -2
- package/dist/plugins/context-compact/plugin.d.ts.map +1 -1
- package/dist/plugins/context-compact/plugin.js +8 -4
- package/dist/plugins/context-compact/plugin.js.map +1 -1
- package/dist/plugins/filesystem/filesystem.integration.test.js +36 -0
- package/dist/plugins/filesystem/filesystem.integration.test.js.map +1 -1
- package/dist/plugins/filesystem/plugin.d.ts.map +1 -1
- package/dist/plugins/filesystem/plugin.js +8 -6
- package/dist/plugins/filesystem/plugin.js.map +1 -1
- package/dist/plugins/mailbox/mailbox.integration.test.js +9 -16
- package/dist/plugins/mailbox/mailbox.integration.test.js.map +1 -1
- package/dist/plugins/resources/plugin.d.ts.map +1 -1
- package/dist/plugins/resources/plugin.js +4 -1
- package/dist/plugins/resources/plugin.js.map +1 -1
- package/dist/plugins/user-chat/plugin.d.ts +2 -0
- package/dist/plugins/user-chat/plugin.d.ts.map +1 -1
- package/dist/plugins/user-chat/plugin.js +47 -3
- package/dist/plugins/user-chat/plugin.js.map +1 -1
- package/dist/plugins/user-chat/schema.d.ts +10 -0
- package/dist/plugins/user-chat/schema.d.ts.map +1 -1
- package/dist/plugins/user-chat/schema.js +1 -0
- package/dist/plugins/user-chat/schema.js.map +1 -1
- package/dist/plugins/user-chat/user-chat.integration.test.js +86 -0
- package/dist/plugins/user-chat/user-chat.integration.test.js.map +1 -1
- package/package.json +2 -2
- package/src/core/agents/agent.ts +134 -20
- package/src/core/agents/config.ts +7 -0
- package/src/core/agents/context.ts +11 -0
- package/src/core/agents/state.ts +11 -4
- package/src/core/file-store/file-store.ts +38 -18
- package/src/core/image/vips-resizer.test.ts +26 -15
- package/src/core/llm/anthropic.ts +19 -12
- package/src/core/llm/cache-breakpoints.ts +15 -6
- package/src/core/sessions/session.test.ts +6 -0
- package/src/core/sessions/session.ts +12 -0
- package/src/core/sessions/state.ts +5 -1
- package/src/core/tools/executor.test.ts +1 -0
- package/src/plugins/agent-status/plugin.ts +18 -25
- package/src/plugins/context-compact/compaction-live.test.ts +221 -0
- package/src/plugins/context-compact/context-compact.integration.test.ts +135 -3
- package/src/plugins/context-compact/context-compactor.test.ts +71 -110
- package/src/plugins/context-compact/context-compactor.ts +88 -43
- package/src/plugins/context-compact/plugin.ts +19 -10
- package/src/plugins/filesystem/filesystem.integration.test.ts +44 -0
- package/src/plugins/filesystem/plugin.ts +8 -6
- package/src/plugins/mailbox/mailbox.integration.test.ts +12 -18
- package/src/plugins/resources/plugin.ts +4 -1
- package/src/plugins/user-chat/plugin.ts +60 -3
- package/src/plugins/user-chat/schema.ts +10 -1
- package/src/plugins/user-chat/user-chat.integration.test.ts +99 -0
package/src/core/agents/agent.ts
CHANGED
|
@@ -46,7 +46,7 @@ import { toolEvents } from '~/core/tools/state.js'
|
|
|
46
46
|
import { getAgentUnconsumedMailbox, selectMailboxState } from '~/plugins/mailbox/query.js'
|
|
47
47
|
import { AGENT_BASE_BRIEFING } from '~/prompts/base.js'
|
|
48
48
|
import { buildEnvironmentSection } from '~/prompts/builder.js'
|
|
49
|
-
import { Err, type Result } from '~/lib/utils/result.js'
|
|
49
|
+
import { Err, Ok, type Result } from '~/lib/utils/result.js'
|
|
50
50
|
import type { Logger } from '../../lib/logger/logger.js'
|
|
51
51
|
import type { SessionContext } from '../sessions/context.js'
|
|
52
52
|
import type { SessionStore } from '../sessions/session-store.js'
|
|
@@ -54,7 +54,7 @@ import type { SessionState } from '../sessions/state.js'
|
|
|
54
54
|
import type { SessionEnvironment, ToolExecutor } from '../tools/index.js'
|
|
55
55
|
import type { AgentContext } from './context.js'
|
|
56
56
|
import { sanitizeLLMResponse } from './response-sanitizer.js'
|
|
57
|
-
import { withLLMRetry } from './retry.js'
|
|
57
|
+
import { isRetryableLLMError, withLLMRetry } from './retry.js'
|
|
58
58
|
|
|
59
59
|
// ============================================================================
|
|
60
60
|
// Types
|
|
@@ -81,6 +81,14 @@ export interface AgentConfig<TInput = unknown> {
|
|
|
81
81
|
input?: z.ZodType<TInput>
|
|
82
82
|
/** Per-plugin agent-level configs */
|
|
83
83
|
plugins?: AgentPluginConfig[]
|
|
84
|
+
/**
|
|
85
|
+
* Prompt cache TTL for breakpoints emitted by this agent's inference calls.
|
|
86
|
+
* '1h' opts into Anthropic's extended cache tier (write 2× input, read 0.1×)
|
|
87
|
+
* — useful for long-lived agents (e.g. an orchestrator that waits minutes
|
|
88
|
+
* between user turns) where the default 5-minute TTL would expire and force
|
|
89
|
+
* full re-uploads. Omit (or '5m') for the standard tier.
|
|
90
|
+
*/
|
|
91
|
+
cacheTtl?: '5m' | '1h'
|
|
84
92
|
}
|
|
85
93
|
|
|
86
94
|
/**
|
|
@@ -378,6 +386,58 @@ export class Agent {
|
|
|
378
386
|
return this.scheduled
|
|
379
387
|
}
|
|
380
388
|
|
|
389
|
+
/**
|
|
390
|
+
* Run a one-off LLM call using the agent's current system prompt, tools, and
|
|
391
|
+
* conversation prefix, with extra trailing messages appended. Does not emit
|
|
392
|
+
* agent inference events and does not mutate conversation history; the call
|
|
393
|
+
* is logged via the LLM provider's normal logging pipeline.
|
|
394
|
+
*
|
|
395
|
+
* Intended for plugins that need a constrained side-channel inference
|
|
396
|
+
* leveraging the agent's already-warm prompt cache — e.g. context-compact
|
|
397
|
+
* uses this to ask the same model for a summary, paying only the trailing
|
|
398
|
+
* uncached portion plus output tokens.
|
|
399
|
+
*
|
|
400
|
+
* The cache breakpoint is placed so that everything up to (but excluding)
|
|
401
|
+
* `extraMessages` is cacheable, matching the previous regular inference.
|
|
402
|
+
*/
|
|
403
|
+
async runAuxiliaryInference(extraMessages: LLMMessage[]): Promise<Result<InferenceResponse, LLMError>> {
|
|
404
|
+
const agentState = this.state
|
|
405
|
+
if (!agentState) {
|
|
406
|
+
return Err({ type: 'invalid_request', message: `Agent ${this.id} has no state` })
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
// pendingToolResults aren't in conversationHistory yet (they get committed
|
|
410
|
+
// by the next inference_completed), but the assistant tool_use that
|
|
411
|
+
// demands them IS at the tail of history. Without these inlined, an
|
|
412
|
+
// aux call placed mid-tool-turn (e.g. by the context-compact plugin's
|
|
413
|
+
// beforeInference hook) lands as `[…, assistant(tool_use), extraMessages]`
|
|
414
|
+
// and Anthropic rejects with "tool_use blocks must be followed by
|
|
415
|
+
// tool_result blocks".
|
|
416
|
+
const pendingToolResultMessages = this.buildPendingMessages(agentState)
|
|
417
|
+
const baseMessages: LLMMessage[] = [
|
|
418
|
+
...agentState.preamble,
|
|
419
|
+
...agentState.conversationHistory,
|
|
420
|
+
...pendingToolResultMessages,
|
|
421
|
+
]
|
|
422
|
+
const messages = [...baseMessages, ...extraMessages]
|
|
423
|
+
const cachedMessages = applyCacheBreakpoint(messages, extraMessages.length, this.config.cacheTtl)
|
|
424
|
+
|
|
425
|
+
const request: InferenceRequest = {
|
|
426
|
+
model: this.config.model,
|
|
427
|
+
systemPrompt: this.buildSystemPrompt(),
|
|
428
|
+
messages: cachedMessages,
|
|
429
|
+
tools: this.tools.size > 0 ? [...this.tools.values()] : undefined,
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
return this.llmProvider.inference(request, {
|
|
433
|
+
sessionId: this.store.sessionId,
|
|
434
|
+
agentId: this.id,
|
|
435
|
+
signal: this.abortController.signal,
|
|
436
|
+
fileStore: this.fileStore,
|
|
437
|
+
providers: this.llmProviders,
|
|
438
|
+
})
|
|
439
|
+
}
|
|
440
|
+
|
|
381
441
|
// ============================================================================
|
|
382
442
|
// Private methods - Processing
|
|
383
443
|
// ============================================================================
|
|
@@ -485,7 +545,11 @@ export class Agent {
|
|
|
485
545
|
|
|
486
546
|
// Mark cache breakpoint — ephemeral session-context suffix is excluded
|
|
487
547
|
// so it doesn't invalidate the cache on every inference.
|
|
488
|
-
const cachedMessages = applyCacheBreakpoint(
|
|
548
|
+
const cachedMessages = applyCacheBreakpoint(
|
|
549
|
+
messages,
|
|
550
|
+
ephemeralParts.length > 0 ? 1 : 0,
|
|
551
|
+
this.config.cacheTtl,
|
|
552
|
+
)
|
|
489
553
|
|
|
490
554
|
// 5. LLM inference (with retry)
|
|
491
555
|
const request: InferenceRequest = {
|
|
@@ -513,6 +577,7 @@ export class Agent {
|
|
|
513
577
|
// can notify the parent.
|
|
514
578
|
let llmResponse: Result<InferenceResponse, LLMError>
|
|
515
579
|
let emptyAttempts = 0
|
|
580
|
+
let nudgeInjected = false
|
|
516
581
|
while (true) {
|
|
517
582
|
llmResponse = await withLLMRetry(
|
|
518
583
|
() =>
|
|
@@ -538,13 +603,20 @@ export class Agent {
|
|
|
538
603
|
if (!isEmptyStop) break
|
|
539
604
|
|
|
540
605
|
if (emptyAttempts >= Agent.MAX_EMPTY_RESPONSE_RETRIES) {
|
|
541
|
-
this.logger.
|
|
606
|
+
this.logger.warn('LLM returned empty stop response after retries, coalescing to WAITING', {
|
|
542
607
|
agentId: this.id,
|
|
543
608
|
attempts: emptyAttempts + 1,
|
|
544
609
|
})
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
610
|
+
// Coalesce to WAITING instead of hard-erroring. The LLM accepted the
|
|
611
|
+
// message but couldn't produce output 3× in a row — treating this as
|
|
612
|
+
// a terminal failure was too aggressive (errored state, error message
|
|
613
|
+
// to parent, mailbox tokens stuck unconsumed). Synthetic WAITING goes
|
|
614
|
+
// through the normal success path: mailbox tokens get consumed,
|
|
615
|
+
// limits-guard skips it in dedup (existing WAITING exception), and
|
|
616
|
+
// the agent quietly transitions to pending → complete.
|
|
617
|
+
llmResponse = Ok({
|
|
618
|
+
...llmResponse.value,
|
|
619
|
+
content: 'WAITING',
|
|
548
620
|
})
|
|
549
621
|
break
|
|
550
622
|
}
|
|
@@ -553,24 +625,51 @@ export class Agent {
|
|
|
553
625
|
agentId: this.id,
|
|
554
626
|
attempt: emptyAttempts,
|
|
555
627
|
})
|
|
556
|
-
}
|
|
557
628
|
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
if (
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
629
|
+
// Inject a one-shot nudge after the first empty response. Appended after the
|
|
630
|
+
// existing cache breakpoint, so the cached prefix still hits — only the new
|
|
631
|
+
// tail is uncached. Uses the canonical "WAITING" literal so the response is
|
|
632
|
+
// recognized by the sanitizer and limits-guard plugin.
|
|
633
|
+
if (!nudgeInjected) {
|
|
634
|
+
request.messages.push({
|
|
635
|
+
role: 'user',
|
|
636
|
+
content:
|
|
637
|
+
'<system-nudge>Your previous response was empty (no text and no tool calls). '
|
|
638
|
+
+ 'Either produce a meaningful response — text or tool calls — or, if you '
|
|
639
|
+
+ 'have nothing to do, output only the word WAITING on its own line per the '
|
|
640
|
+
+ 'waiting protocol.</system-nudge>',
|
|
641
|
+
})
|
|
642
|
+
nudgeInjected = true
|
|
569
643
|
}
|
|
570
644
|
}
|
|
571
645
|
|
|
572
646
|
if (!llmResponse.ok) {
|
|
573
|
-
//
|
|
647
|
+
// Aborted (shutdown / interruption): bail silently. Emitting inference_failed
|
|
648
|
+
// would leave the agent in 'errored' with unconsumed plugin tokens — decide()
|
|
649
|
+
// would then loop resume_from_error ↔ infer forever (each retry re-aborts).
|
|
650
|
+
if (llmResponse.error.type === 'aborted') return
|
|
651
|
+
|
|
652
|
+
// Non-retryable failures (invalid_request, context_length) will fail the same
|
|
653
|
+
// way on every retry. Mark plugin tokens consumed before emitting
|
|
654
|
+
// inference_failed so decide()'s resume_from_error path doesn't re-feed the
|
|
655
|
+
// same message into a doomed retry loop. Retryable errors (rate_limit,
|
|
656
|
+
// server_error, network_error, timeout) keep the preserve-for-retry semantics.
|
|
657
|
+
if (!isRetryableLLMError(llmResponse.error)) {
|
|
658
|
+
const errorAgentState = this.state
|
|
659
|
+
if (errorAgentState) {
|
|
660
|
+
const errorCtx = this.buildAgentContext(errorAgentState)
|
|
661
|
+
for (const dequeued of pluginDequeued) {
|
|
662
|
+
if (!dequeued.plugin.dequeue) continue
|
|
663
|
+
const pluginCtx = this.buildPluginHookContext(dequeued.plugin, errorCtx)
|
|
664
|
+
await dequeued.plugin.dequeue.markConsumed(pluginCtx, dequeued.token)
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
// 4a. Inference failed — emit inference_failed without marking plugin messages
|
|
670
|
+
// consumed. The reducer leaves pendingToolResults / mailbox tokens intact so the
|
|
671
|
+
// next inference rebuilds the same turn; marking consumed here would drop the
|
|
672
|
+
// mailbox tokens and the retry would be missing the user message.
|
|
574
673
|
await this.store.emit(withSessionId(
|
|
575
674
|
this.store.sessionId,
|
|
576
675
|
llmEvents.create('inference_failed', {
|
|
@@ -587,6 +686,20 @@ export class Agent {
|
|
|
587
686
|
return
|
|
588
687
|
}
|
|
589
688
|
|
|
689
|
+
// Mark plugin messages as consumed only after successful inference. They've been
|
|
690
|
+
// appended to conversationHistory via the inference_completed reducer below.
|
|
691
|
+
{
|
|
692
|
+
const currentAgentState = this.state
|
|
693
|
+
if (currentAgentState) {
|
|
694
|
+
const ctx = this.buildAgentContext(currentAgentState)
|
|
695
|
+
for (const dequeued of pluginDequeued) {
|
|
696
|
+
if (!dequeued.plugin.dequeue) continue
|
|
697
|
+
const pluginCtx = this.buildPluginHookContext(dequeued.plugin, ctx)
|
|
698
|
+
await dequeued.plugin.dequeue.markConsumed(pluginCtx, dequeued.token)
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
}
|
|
702
|
+
|
|
590
703
|
// 4c. Sanitize response to prevent hallucination
|
|
591
704
|
const sanitized = sanitizeLLMResponse(llmResponse.value.content)
|
|
592
705
|
|
|
@@ -866,6 +979,7 @@ export class Agent {
|
|
|
866
979
|
agentConfig: this.config,
|
|
867
980
|
input: agentState.typedInput,
|
|
868
981
|
parentId: agentState.parentId,
|
|
982
|
+
runAuxiliaryInference: (extraMessages) => this.runAuxiliaryInference(extraMessages),
|
|
869
983
|
}
|
|
870
984
|
}
|
|
871
985
|
|
|
@@ -52,6 +52,13 @@ export interface BaseAgentConfig<TInput = unknown> {
|
|
|
52
52
|
services?: ServiceConfig[]
|
|
53
53
|
/** LLM middleware chain applied per-agent (runs after preset-level middleware) */
|
|
54
54
|
llmMiddleware?: LLMMiddleware[]
|
|
55
|
+
/**
|
|
56
|
+
* Prompt cache TTL for this agent's inference breakpoints.
|
|
57
|
+
* '1h' opts into Anthropic's extended cache tier for long-lived agents
|
|
58
|
+
* (e.g. an orchestrator that waits minutes between user turns). Defaults
|
|
59
|
+
* to the standard 5-minute tier.
|
|
60
|
+
*/
|
|
61
|
+
cacheTtl?: '5m' | '1h'
|
|
55
62
|
}
|
|
56
63
|
|
|
57
64
|
/**
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import type { InferenceResponse, LLMError, LLMMessage } from '~/core/llm/provider.js'
|
|
2
|
+
import type { Result } from '~/lib/utils/result.js'
|
|
1
3
|
import { SessionContext } from '../sessions/context.js'
|
|
2
4
|
import { AgentConfig } from './agent.js'
|
|
3
5
|
import { AgentId } from './schema.js'
|
|
@@ -16,4 +18,13 @@ export type AgentContext<TInput = unknown> =
|
|
|
16
18
|
input: TInput
|
|
17
19
|
/** The parent agent ID (null for root agents) */
|
|
18
20
|
parentId: AgentId | null
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Run a one-off LLM call reusing the agent's current system prompt, tools,
|
|
24
|
+
* and conversation prefix, with extra trailing messages appended. Lets
|
|
25
|
+
* plugins do side-channel inferences (e.g. summarization) while sharing
|
|
26
|
+
* the agent's warm prompt cache. See Agent.runAuxiliaryInference for the
|
|
27
|
+
* full contract.
|
|
28
|
+
*/
|
|
29
|
+
runAuxiliaryInference: (extraMessages: LLMMessage[]) => Promise<Result<InferenceResponse, LLMError>>
|
|
19
30
|
}
|
package/src/core/agents/state.ts
CHANGED
|
@@ -8,6 +8,7 @@ import { agentIdSchema } from '~/core/agents/schema.js'
|
|
|
8
8
|
import { createEventsFactory } from '~/core/events/types'
|
|
9
9
|
import type { ToolResultContent } from '~/core/llm/llm-log-types.js'
|
|
10
10
|
import type { ChatMessageContentItem } from '~/core/llm/llm-log-types.js'
|
|
11
|
+
import type { LLMMetrics } from '~/core/llm/state.js'
|
|
11
12
|
import type { PendingToolResult, ToolCallId } from '~/core/tools/schema.js'
|
|
12
13
|
import { MessageId } from '../../plugins/mailbox/schema.js'
|
|
13
14
|
|
|
@@ -107,11 +108,15 @@ export type AgentPauseReason = 'limit' | 'handler' | 'manual'
|
|
|
107
108
|
|
|
108
109
|
/**
|
|
109
110
|
* Prompt cache breakpoint marker.
|
|
110
|
-
* When set on an LLMMessage, providers place `cache_control
|
|
111
|
-
*
|
|
112
|
-
*
|
|
111
|
+
* When set on an LLMMessage, providers place `cache_control` on the LAST
|
|
112
|
+
* content block of the mapped message (regardless of block type), marking it
|
|
113
|
+
* as a prompt cache checkpoint.
|
|
114
|
+
*
|
|
115
|
+
* `ttl: '1h'` opts into Anthropic's 1-hour cache tier (write cost 2× input,
|
|
116
|
+
* read still 0.1×). Useful for long-lived agents whose prompt cache would
|
|
117
|
+
* otherwise expire between user turns. Omit for the default 5-minute tier.
|
|
113
118
|
*/
|
|
114
|
-
export type LLMMessageCacheControl = { type: 'ephemeral' }
|
|
119
|
+
export type LLMMessageCacheControl = { type: 'ephemeral'; ttl?: '5m' | '1h' }
|
|
115
120
|
|
|
116
121
|
/**
|
|
117
122
|
* User message - from mailbox or direct input.
|
|
@@ -204,6 +209,8 @@ export interface AgentState {
|
|
|
204
209
|
pauseReason?: AgentPauseReason
|
|
205
210
|
/** Human-readable pause message */
|
|
206
211
|
pauseMessage?: string
|
|
212
|
+
/** Metrics from the most recent completed inference — used by plugins (e.g. context-compact) to size context against the provider-reported truth. */
|
|
213
|
+
lastInferenceMetrics?: LLMMetrics
|
|
207
214
|
}
|
|
208
215
|
|
|
209
216
|
// ============================================================================
|
|
@@ -92,35 +92,55 @@ export class SessionFileStore implements FileStore {
|
|
|
92
92
|
}
|
|
93
93
|
}
|
|
94
94
|
|
|
95
|
-
async list(
|
|
95
|
+
async list(
|
|
96
|
+
path: string,
|
|
97
|
+
options?: { maxDepth?: number; gitIgnore?: boolean },
|
|
98
|
+
): Promise<Result<FileEntry[], string>> {
|
|
96
99
|
const resolved = this.resolvePath(path)
|
|
97
100
|
if (!resolved.ok) return resolved
|
|
98
101
|
|
|
102
|
+
const maxDepth = options?.maxDepth ?? 1
|
|
103
|
+
if (maxDepth < 1) return Ok([])
|
|
104
|
+
|
|
99
105
|
try {
|
|
100
|
-
const items = await this.fs.readdir(resolved.value, { withFileTypes: true })
|
|
101
106
|
const entries: FileEntry[] = []
|
|
102
|
-
|
|
103
|
-
let type: FileEntry['type']
|
|
104
|
-
let size: number | undefined
|
|
105
|
-
if (item.isFile()) {
|
|
106
|
-
type = 'file'
|
|
107
|
-
const s = await this.fs.stat(join(resolved.value, item.name))
|
|
108
|
-
size = s.size
|
|
109
|
-
} else if (item.isDirectory()) {
|
|
110
|
-
type = 'directory'
|
|
111
|
-
} else if (item.isSymbolicLink()) {
|
|
112
|
-
type = 'symlink'
|
|
113
|
-
} else {
|
|
114
|
-
type = 'other'
|
|
115
|
-
}
|
|
116
|
-
entries.push({ name: item.name, type, size })
|
|
117
|
-
}
|
|
107
|
+
await this.walkInto(resolved.value, '', maxDepth, entries)
|
|
118
108
|
return Ok(entries)
|
|
119
109
|
} catch {
|
|
120
110
|
return Err(`Directory not found: ${path}`)
|
|
121
111
|
}
|
|
122
112
|
}
|
|
123
113
|
|
|
114
|
+
private async walkInto(
|
|
115
|
+
absDir: string,
|
|
116
|
+
relPrefix: string,
|
|
117
|
+
remainingDepth: number,
|
|
118
|
+
out: FileEntry[],
|
|
119
|
+
): Promise<void> {
|
|
120
|
+
const items = await this.fs.readdir(absDir, { withFileTypes: true })
|
|
121
|
+
for (const item of items) {
|
|
122
|
+
const relName = relPrefix ? `${relPrefix}/${item.name}` : item.name
|
|
123
|
+
let type: FileEntry['type']
|
|
124
|
+
let size: number | undefined
|
|
125
|
+
if (item.isFile()) {
|
|
126
|
+
type = 'file'
|
|
127
|
+
const s = await this.fs.stat(join(absDir, item.name))
|
|
128
|
+
size = s.size
|
|
129
|
+
} else if (item.isDirectory()) {
|
|
130
|
+
type = 'directory'
|
|
131
|
+
} else if (item.isSymbolicLink()) {
|
|
132
|
+
type = 'symlink'
|
|
133
|
+
} else {
|
|
134
|
+
type = 'other'
|
|
135
|
+
}
|
|
136
|
+
out.push({ name: relName, type, size })
|
|
137
|
+
|
|
138
|
+
if (type === 'directory' && remainingDepth > 1) {
|
|
139
|
+
await this.walkInto(join(absDir, item.name), relName, remainingDepth - 1, out)
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
124
144
|
async remove(path: string): Promise<Result<void, string>> {
|
|
125
145
|
const resolved = this.resolvePath(path)
|
|
126
146
|
if (!resolved.ok) return resolved
|
|
@@ -1,25 +1,36 @@
|
|
|
1
|
-
import { afterEach, describe, expect, it,
|
|
2
|
-
import
|
|
1
|
+
import { afterEach, describe, expect, it, spyOn } from 'bun:test'
|
|
2
|
+
import { tmpdir } from 'node:os'
|
|
3
|
+
import { VipsImageResizer } from './vips-resizer.js'
|
|
4
|
+
import { createNodeFileSystem } from '~/testing/node-platform.js'
|
|
5
|
+
import type { ExecFileResult, ProcessRunner } from '~/platform/process.js'
|
|
3
6
|
|
|
4
7
|
type ExecFileCallback = (error: Error | null, stdout: string, stderr: string) => void
|
|
5
8
|
let execFileImpl: (cmd: string, args: string[], opts: unknown, cb: ExecFileCallback) => void = () => {}
|
|
6
9
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
10
|
+
// Fake ProcessRunner — wires execFile calls through the test-controlled
|
|
11
|
+
// `execFileImpl`. Avoids `mock.module('node:child_process')` because
|
|
12
|
+
// node-platform.ts promisifies execFile at module-load and other test files
|
|
13
|
+
// may load it first, freezing the binding to the real implementation.
|
|
14
|
+
function createFakeProcessRunner(): ProcessRunner {
|
|
15
|
+
return {
|
|
16
|
+
execFile: (file, args, options) =>
|
|
17
|
+
new Promise<ExecFileResult>((resolve, reject) => {
|
|
18
|
+
execFileImpl(file, args, options ?? {}, (err, stdout, stderr) => {
|
|
19
|
+
if (err) reject(err)
|
|
20
|
+
else resolve({ stdout: stdout ?? '', stderr: stderr ?? '' })
|
|
21
|
+
})
|
|
22
|
+
}),
|
|
23
|
+
spawn: () => {
|
|
24
|
+
throw new Error('spawn not implemented in test fake')
|
|
25
|
+
},
|
|
26
|
+
}
|
|
27
|
+
}
|
|
14
28
|
|
|
15
|
-
// Test-scoped helper — routes through createNodePlatform so the module-level
|
|
16
|
-
// node:child_process mock still intercepts execFile calls made by ProcessRunner.
|
|
17
29
|
function createResizer(maxDimension?: number): InstanceType<typeof VipsImageResizer> {
|
|
18
|
-
const platform = createNodePlatform()
|
|
19
30
|
return new VipsImageResizer({
|
|
20
|
-
fs:
|
|
21
|
-
process:
|
|
22
|
-
tmpDir:
|
|
31
|
+
fs: createNodeFileSystem(),
|
|
32
|
+
process: createFakeProcessRunner(),
|
|
33
|
+
tmpDir: tmpdir(),
|
|
23
34
|
maxDimension,
|
|
24
35
|
})
|
|
25
36
|
}
|
|
@@ -85,10 +85,15 @@ interface AnthropicErrorResponse {
|
|
|
85
85
|
// Request body types
|
|
86
86
|
// ============================================================================
|
|
87
87
|
|
|
88
|
+
interface AnthropicCacheControl {
|
|
89
|
+
type: 'ephemeral'
|
|
90
|
+
ttl?: '5m' | '1h'
|
|
91
|
+
}
|
|
92
|
+
|
|
88
93
|
interface AnthropicTextBlockParam {
|
|
89
94
|
type: 'text'
|
|
90
95
|
text: string
|
|
91
|
-
cache_control?:
|
|
96
|
+
cache_control?: AnthropicCacheControl
|
|
92
97
|
}
|
|
93
98
|
|
|
94
99
|
interface AnthropicImageBlockParam {
|
|
@@ -96,7 +101,7 @@ interface AnthropicImageBlockParam {
|
|
|
96
101
|
source:
|
|
97
102
|
| { type: 'base64'; media_type: string; data: string }
|
|
98
103
|
| { type: 'url'; url: string }
|
|
99
|
-
cache_control?:
|
|
104
|
+
cache_control?: AnthropicCacheControl
|
|
100
105
|
}
|
|
101
106
|
|
|
102
107
|
interface AnthropicToolUseBlockParam {
|
|
@@ -104,7 +109,7 @@ interface AnthropicToolUseBlockParam {
|
|
|
104
109
|
id: string
|
|
105
110
|
name: string
|
|
106
111
|
input: unknown
|
|
107
|
-
cache_control?:
|
|
112
|
+
cache_control?: AnthropicCacheControl
|
|
108
113
|
}
|
|
109
114
|
|
|
110
115
|
interface AnthropicToolResultBlockParam {
|
|
@@ -112,7 +117,7 @@ interface AnthropicToolResultBlockParam {
|
|
|
112
117
|
tool_use_id: string
|
|
113
118
|
content: string | Array<AnthropicTextBlockParam | AnthropicImageBlockParam>
|
|
114
119
|
is_error?: boolean
|
|
115
|
-
cache_control?:
|
|
120
|
+
cache_control?: AnthropicCacheControl
|
|
116
121
|
}
|
|
117
122
|
|
|
118
123
|
type AnthropicContentBlockParam =
|
|
@@ -127,19 +132,19 @@ interface AnthropicMessageParam {
|
|
|
127
132
|
}
|
|
128
133
|
|
|
129
134
|
/**
|
|
130
|
-
* Add `cache_control
|
|
131
|
-
*
|
|
132
|
-
*
|
|
133
|
-
*
|
|
135
|
+
* Add `cache_control` to the LAST content block of an AnthropicMessageParam,
|
|
136
|
+
* regardless of block type. Converts string content to a single text block
|
|
137
|
+
* first so the mark has a place to live. Mutates in place so the cache
|
|
138
|
+
* breakpoint survives subsequent `mergeConsecutiveMessages`.
|
|
134
139
|
*/
|
|
135
|
-
function applyCacheControlToLastBlock(msg: AnthropicMessageParam): void {
|
|
140
|
+
function applyCacheControlToLastBlock(msg: AnthropicMessageParam, cacheControl: AnthropicCacheControl): void {
|
|
136
141
|
if (typeof msg.content === 'string') {
|
|
137
|
-
msg.content = [{ type: 'text', text: msg.content, cache_control:
|
|
142
|
+
msg.content = [{ type: 'text', text: msg.content, cache_control: cacheControl }]
|
|
138
143
|
return
|
|
139
144
|
}
|
|
140
145
|
if (msg.content.length === 0) return
|
|
141
146
|
const lastIdx = msg.content.length - 1
|
|
142
|
-
msg.content[lastIdx] = { ...msg.content[lastIdx], cache_control:
|
|
147
|
+
msg.content[lastIdx] = { ...msg.content[lastIdx], cache_control: cacheControl }
|
|
143
148
|
}
|
|
144
149
|
|
|
145
150
|
interface AnthropicToolParam {
|
|
@@ -366,7 +371,9 @@ export class AnthropicProvider implements RoutableLLMProvider {
|
|
|
366
371
|
private async mapMessage(msg: LLMMessage, context?: InferenceContext): Promise<AnthropicMessageParam> {
|
|
367
372
|
const mapped = await this.mapMessageContent(msg, context)
|
|
368
373
|
if (msg.cacheControl) {
|
|
369
|
-
|
|
374
|
+
const cc: AnthropicCacheControl = { type: 'ephemeral' }
|
|
375
|
+
if (msg.cacheControl.ttl) cc.ttl = msg.cacheControl.ttl
|
|
376
|
+
applyCacheControlToLastBlock(mapped, cc)
|
|
370
377
|
}
|
|
371
378
|
return mapped
|
|
372
379
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { LLMMessage } from '~/core/agents/state.js'
|
|
1
|
+
import type { LLMMessage, LLMMessageCacheControl } from '~/core/agents/state.js'
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
4
|
* Mark the prompt cache breakpoint on a message list.
|
|
@@ -13,25 +13,34 @@ import type { LLMMessage } from '~/core/agents/state.js'
|
|
|
13
13
|
* Target index is `messages.length - 1 - uncachedSuffixCount`. The suffix is
|
|
14
14
|
* the tail of messages that must remain fresh (e.g. ephemeral session context
|
|
15
15
|
* rebuilt each inference).
|
|
16
|
+
*
|
|
17
|
+
* `ttl` opts into Anthropic's 1-hour cache tier (write cost 2× input, read
|
|
18
|
+
* still 0.1×). Useful for long-lived agents where the default 5-minute TTL
|
|
19
|
+
* would expire between user turns. Omit for the default 5-minute tier.
|
|
16
20
|
*/
|
|
17
|
-
export function applyCacheBreakpoint(
|
|
21
|
+
export function applyCacheBreakpoint(
|
|
22
|
+
messages: LLMMessage[],
|
|
23
|
+
uncachedSuffixCount: number,
|
|
24
|
+
ttl?: '5m' | '1h',
|
|
25
|
+
): LLMMessage[] {
|
|
18
26
|
const idx = messages.length - 1 - uncachedSuffixCount
|
|
19
27
|
if (idx < 0) return messages
|
|
20
28
|
|
|
29
|
+
const cacheControl: LLMMessageCacheControl = ttl ? { type: 'ephemeral', ttl } : { type: 'ephemeral' }
|
|
21
30
|
const target = messages[idx]
|
|
22
31
|
const result = [...messages]
|
|
23
32
|
switch (target.role) {
|
|
24
33
|
case 'user':
|
|
25
|
-
result[idx] = { ...target, cacheControl
|
|
34
|
+
result[idx] = { ...target, cacheControl }
|
|
26
35
|
break
|
|
27
36
|
case 'assistant':
|
|
28
|
-
result[idx] = { ...target, cacheControl
|
|
37
|
+
result[idx] = { ...target, cacheControl }
|
|
29
38
|
break
|
|
30
39
|
case 'system':
|
|
31
|
-
result[idx] = { ...target, cacheControl
|
|
40
|
+
result[idx] = { ...target, cacheControl }
|
|
32
41
|
break
|
|
33
42
|
case 'tool':
|
|
34
|
-
result[idx] = { ...target, cacheControl
|
|
43
|
+
result[idx] = { ...target, cacheControl }
|
|
35
44
|
break
|
|
36
45
|
}
|
|
37
46
|
return result
|
|
@@ -712,6 +712,8 @@ describe('applyEvent', () => {
|
|
|
712
712
|
expect(session.agents.get(agentId)!.pendingMessages).toHaveLength(2)
|
|
713
713
|
expect(session.agents.get(agentId)!.status).toBe('inferring')
|
|
714
714
|
|
|
715
|
+
const historyLenBeforeFailure = session.agents.get(agentId)!.conversationHistory.length
|
|
716
|
+
|
|
715
717
|
// 6. Inference fails
|
|
716
718
|
session = applyEvent(
|
|
717
719
|
session,
|
|
@@ -735,6 +737,10 @@ describe('applyEvent', () => {
|
|
|
735
737
|
expect(getAgentMailbox(selectMailboxState(session), agentId)[0].consumed).toBe(false)
|
|
736
738
|
// status is errored
|
|
737
739
|
expect(agent.status).toBe('errored')
|
|
740
|
+
// conversationHistory NOT extended — pendingMessages are dropped, not promoted.
|
|
741
|
+
// Otherwise tool results would appear both in history and in pendingToolResults,
|
|
742
|
+
// duplicating them on the next inference (Bedrock-style provider rejects 400).
|
|
743
|
+
expect(agent.conversationHistory).toHaveLength(historyLenBeforeFailure)
|
|
738
744
|
})
|
|
739
745
|
})
|
|
740
746
|
|
|
@@ -494,6 +494,15 @@ export class Session {
|
|
|
494
494
|
}
|
|
495
495
|
|
|
496
496
|
const result = await methodDef.handler(ctx, parsed.data)
|
|
497
|
+
|
|
498
|
+
// Plugin methods can mutate dequeue state (uploads.upload adds to pending,
|
|
499
|
+
// resources.inject can too) without explicitly calling ctx.scheduleAgent.
|
|
500
|
+
// Schedule every agent — scheduleProcessing is idempotent + debounced, and
|
|
501
|
+
// decide() shortcircuits to idle/complete when no work is actually pending.
|
|
502
|
+
for (const agent of this.agents.values()) {
|
|
503
|
+
agent.scheduleProcessing()
|
|
504
|
+
}
|
|
505
|
+
|
|
497
506
|
return result
|
|
498
507
|
}
|
|
499
508
|
|
|
@@ -795,6 +804,7 @@ export class Session {
|
|
|
795
804
|
checkIntervalMs: orch.checkIntervalMs,
|
|
796
805
|
input: orch.input,
|
|
797
806
|
plugins: withServicePluginConfig(orch),
|
|
807
|
+
cacheTtl: orch.cacheTtl,
|
|
798
808
|
}
|
|
799
809
|
}
|
|
800
810
|
|
|
@@ -810,6 +820,7 @@ export class Session {
|
|
|
810
820
|
checkIntervalMs: comm.checkIntervalMs,
|
|
811
821
|
input: comm.input,
|
|
812
822
|
plugins: withServicePluginConfig(comm),
|
|
823
|
+
cacheTtl: comm.cacheTtl,
|
|
813
824
|
}
|
|
814
825
|
}
|
|
815
826
|
|
|
@@ -828,6 +839,7 @@ export class Session {
|
|
|
828
839
|
checkIntervalMs: agentDef.checkIntervalMs,
|
|
829
840
|
input: agentDef.input,
|
|
830
841
|
plugins: withServicePluginConfig(agentDef),
|
|
842
|
+
cacheTtl: agentDef.cacheTtl,
|
|
831
843
|
}
|
|
832
844
|
}
|
|
833
845
|
}
|
|
@@ -207,15 +207,19 @@ export const coreReducer = createTypedReducer(
|
|
|
207
207
|
pendingToolCalls: toolCalls,
|
|
208
208
|
pendingMessages: [],
|
|
209
209
|
pendingToolResults: [],
|
|
210
|
+
lastInferenceMetrics: event.metrics,
|
|
210
211
|
}
|
|
211
212
|
})
|
|
212
213
|
}
|
|
213
214
|
|
|
214
215
|
case 'inference_failed':
|
|
216
|
+
// Failure is a clean rollback: pendingMessages are dropped (not promoted to history)
|
|
217
|
+
// and pendingToolResults / mailbox tokens stay intact so the next inference
|
|
218
|
+
// rebuilds the same turn. Runtime must skip markConsumed on failure to preserve
|
|
219
|
+
// mailbox tokens — see runInference().
|
|
215
220
|
return updateAgent(state, event.agentId, (agent) => ({
|
|
216
221
|
...agent,
|
|
217
222
|
status: 'errored',
|
|
218
|
-
conversationHistory: [...agent.conversationHistory, ...agent.pendingMessages],
|
|
219
223
|
pendingMessages: [],
|
|
220
224
|
}))
|
|
221
225
|
|
|
@@ -51,6 +51,7 @@ const createTestContext = (): ToolContext => {
|
|
|
51
51
|
agentConfig: { systemPrompt: 'test', model: ModelId('test'), spawnableAgents: [] },
|
|
52
52
|
input: undefined,
|
|
53
53
|
parentId: null,
|
|
54
|
+
runAuxiliaryInference: async () => Err({ type: 'invalid_request', message: 'not implemented in test' }),
|
|
54
55
|
}
|
|
55
56
|
}
|
|
56
57
|
|