@librechat/agents 3.1.73 → 3.1.75-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +66 -0
- package/dist/cjs/agents/AgentContext.cjs +146 -57
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +13 -3
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/index.cjs +145 -52
- package/dist/cjs/llm/anthropic/index.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/types.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +25 -15
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_outputs.cjs +84 -70
- package/dist/cjs/llm/anthropic/utils/message_outputs.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/index.cjs +1 -1
- package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +213 -3
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_outputs.cjs +2 -1
- package/dist/cjs/llm/bedrock/utils/message_outputs.cjs.map +1 -1
- package/dist/cjs/llm/google/utils/common.cjs +5 -4
- package/dist/cjs/llm/google/utils/common.cjs.map +1 -1
- package/dist/cjs/llm/openai/index.cjs +468 -647
- package/dist/cjs/llm/openai/index.cjs.map +1 -1
- package/dist/cjs/llm/openai/utils/index.cjs +1 -448
- package/dist/cjs/llm/openai/utils/index.cjs.map +1 -1
- package/dist/cjs/llm/openrouter/index.cjs +57 -175
- package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
- package/dist/cjs/llm/vertexai/index.cjs +5 -3
- package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
- package/dist/cjs/main.cjs +1 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/cache.cjs +39 -4
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/messages/core.cjs +7 -6
- package/dist/cjs/messages/core.cjs.map +1 -1
- package/dist/cjs/messages/format.cjs +7 -6
- package/dist/cjs/messages/format.cjs.map +1 -1
- package/dist/cjs/messages/langchain.cjs +26 -0
- package/dist/cjs/messages/langchain.cjs.map +1 -0
- package/dist/cjs/messages/prune.cjs +7 -6
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/cjs/tools/BashExecutor.cjs +21 -11
- package/dist/cjs/tools/BashExecutor.cjs.map +1 -1
- package/dist/cjs/tools/CodeExecutor.cjs +37 -10
- package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
- package/dist/cjs/tools/ProgrammaticToolCalling.cjs +16 -11
- package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +5 -1
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/esm/agents/AgentContext.mjs +147 -58
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +13 -3
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/llm/anthropic/index.mjs +146 -54
- package/dist/esm/llm/anthropic/index.mjs.map +1 -1
- package/dist/esm/llm/anthropic/types.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs +25 -15
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_outputs.mjs +84 -71
- package/dist/esm/llm/anthropic/utils/message_outputs.mjs.map +1 -1
- package/dist/esm/llm/bedrock/index.mjs +1 -1
- package/dist/esm/llm/bedrock/index.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs +214 -4
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_outputs.mjs +2 -1
- package/dist/esm/llm/bedrock/utils/message_outputs.mjs.map +1 -1
- package/dist/esm/llm/google/utils/common.mjs +5 -4
- package/dist/esm/llm/google/utils/common.mjs.map +1 -1
- package/dist/esm/llm/openai/index.mjs +469 -648
- package/dist/esm/llm/openai/index.mjs.map +1 -1
- package/dist/esm/llm/openai/utils/index.mjs +4 -449
- package/dist/esm/llm/openai/utils/index.mjs.map +1 -1
- package/dist/esm/llm/openrouter/index.mjs +57 -175
- package/dist/esm/llm/openrouter/index.mjs.map +1 -1
- package/dist/esm/llm/vertexai/index.mjs +5 -3
- package/dist/esm/llm/vertexai/index.mjs.map +1 -1
- package/dist/esm/main.mjs +1 -1
- package/dist/esm/messages/cache.mjs +39 -4
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/messages/core.mjs +7 -6
- package/dist/esm/messages/core.mjs.map +1 -1
- package/dist/esm/messages/format.mjs +7 -6
- package/dist/esm/messages/format.mjs.map +1 -1
- package/dist/esm/messages/langchain.mjs +23 -0
- package/dist/esm/messages/langchain.mjs.map +1 -0
- package/dist/esm/messages/prune.mjs +7 -6
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/esm/tools/BashExecutor.mjs +22 -12
- package/dist/esm/tools/BashExecutor.mjs.map +1 -1
- package/dist/esm/tools/CodeExecutor.mjs +37 -11
- package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
- package/dist/esm/tools/ProgrammaticToolCalling.mjs +17 -12
- package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +5 -1
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/types/agents/AgentContext.d.ts +29 -4
- package/dist/types/agents/__tests__/promptCacheLiveHelpers.d.ts +46 -0
- package/dist/types/llm/anthropic/index.d.ts +22 -9
- package/dist/types/llm/anthropic/types.d.ts +5 -1
- package/dist/types/llm/anthropic/utils/message_outputs.d.ts +13 -6
- package/dist/types/llm/anthropic/utils/output_parsers.d.ts +1 -1
- package/dist/types/llm/openai/index.d.ts +21 -24
- package/dist/types/llm/openrouter/index.d.ts +11 -9
- package/dist/types/llm/vertexai/index.d.ts +1 -0
- package/dist/types/messages/cache.d.ts +4 -1
- package/dist/types/messages/langchain.d.ts +27 -0
- package/dist/types/tools/CodeExecutor.d.ts +6 -0
- package/dist/types/types/graph.d.ts +26 -38
- package/dist/types/types/llm.d.ts +3 -3
- package/dist/types/types/run.d.ts +2 -0
- package/dist/types/types/stream.d.ts +1 -1
- package/dist/types/types/tools.d.ts +9 -0
- package/package.json +17 -16
- package/src/agents/AgentContext.ts +189 -71
- package/src/agents/__tests__/AgentContext.anthropic.live.test.ts +116 -0
- package/src/agents/__tests__/AgentContext.bedrock.live.test.ts +149 -0
- package/src/agents/__tests__/AgentContext.test.ts +333 -2
- package/src/agents/__tests__/promptCacheLiveHelpers.ts +165 -0
- package/src/graphs/Graph.ts +24 -4
- package/src/graphs/__tests__/composition.smoke.test.ts +188 -0
- package/src/llm/anthropic/index.ts +252 -84
- package/src/llm/anthropic/llm.spec.ts +751 -102
- package/src/llm/anthropic/types.ts +9 -1
- package/src/llm/anthropic/utils/message_inputs.ts +43 -20
- package/src/llm/anthropic/utils/message_outputs.ts +119 -101
- package/src/llm/anthropic/utils/server-tool-inputs.test.ts +77 -0
- package/src/llm/bedrock/index.ts +2 -2
- package/src/llm/bedrock/llm.spec.ts +341 -0
- package/src/llm/bedrock/utils/message_inputs.ts +303 -4
- package/src/llm/bedrock/utils/message_outputs.ts +2 -1
- package/src/llm/custom-chat-models.smoke.test.ts +662 -0
- package/src/llm/google/llm.spec.ts +339 -57
- package/src/llm/google/utils/common.ts +53 -48
- package/src/llm/openai/contentBlocks.test.ts +346 -0
- package/src/llm/openai/index.ts +736 -837
- package/src/llm/openai/utils/index.ts +84 -64
- package/src/llm/openrouter/index.ts +124 -247
- package/src/llm/openrouter/reasoning.test.ts +8 -1
- package/src/llm/vertexai/index.ts +11 -5
- package/src/llm/vertexai/llm.spec.ts +28 -1
- package/src/messages/cache.test.ts +106 -4
- package/src/messages/cache.ts +57 -5
- package/src/messages/core.ts +16 -9
- package/src/messages/format.ts +9 -6
- package/src/messages/langchain.ts +39 -0
- package/src/messages/prune.ts +12 -8
- package/src/scripts/caching.ts +2 -3
- package/src/specs/anthropic.simple.test.ts +61 -0
- package/src/specs/summarization.test.ts +58 -61
- package/src/tools/BashExecutor.ts +37 -13
- package/src/tools/CodeExecutor.ts +55 -11
- package/src/tools/ProgrammaticToolCalling.ts +29 -14
- package/src/tools/ToolNode.ts +5 -1
- package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +60 -0
- package/src/types/graph.ts +35 -88
- package/src/types/llm.ts +3 -3
- package/src/types/run.ts +2 -0
- package/src/types/stream.ts +1 -1
- package/src/types/tools.ts +9 -0
- package/src/utils/llmConfig.ts +1 -6
|
@@ -20,6 +20,16 @@ import { addCacheControl } from '@/messages/cache';
|
|
|
20
20
|
import { DEFAULT_RESERVE_RATIO } from '@/messages';
|
|
21
21
|
import { toJsonSchema } from '@/utils/schema';
|
|
22
22
|
|
|
23
|
+
type AgentSystemTextBlock = {
|
|
24
|
+
type: 'text';
|
|
25
|
+
text: string;
|
|
26
|
+
cache_control?: { type: 'ephemeral' };
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
type AgentSystemContentBlock =
|
|
30
|
+
| AgentSystemTextBlock
|
|
31
|
+
| { cachePoint: { type: 'default' } };
|
|
32
|
+
|
|
23
33
|
/**
|
|
24
34
|
* Encapsulates agent-specific state that can vary between agents in a multi-agent system
|
|
25
35
|
*/
|
|
@@ -249,7 +259,7 @@ export class AgentContext {
|
|
|
249
259
|
private summaryTokenCount: number = 0;
|
|
250
260
|
/**
|
|
251
261
|
* Where the summary should be injected:
|
|
252
|
-
* - `'system_prompt'`: cross-run summary, included in
|
|
262
|
+
* - `'system_prompt'`: cross-run summary, included in the dynamic system tail
|
|
253
263
|
* - `'user_message'`: mid-run compaction, injected as HumanMessage on clean slate
|
|
254
264
|
* - `'none'`: no summary present
|
|
255
265
|
*/
|
|
@@ -417,7 +427,8 @@ export class AgentContext {
|
|
|
417
427
|
|
|
418
428
|
/**
|
|
419
429
|
* Gets the system runnable, creating it lazily if needed.
|
|
420
|
-
* Includes instructions, additional instructions, and
|
|
430
|
+
* Includes stable instructions, dynamic additional instructions, and
|
|
431
|
+
* programmatic-only tools documentation.
|
|
421
432
|
* Only rebuilds when marked stale (via markToolsAsDiscovered).
|
|
422
433
|
*/
|
|
423
434
|
get systemRunnable():
|
|
@@ -431,8 +442,10 @@ export class AgentContext {
|
|
|
431
442
|
return this.cachedSystemRunnable;
|
|
432
443
|
}
|
|
433
444
|
|
|
434
|
-
|
|
435
|
-
|
|
445
|
+
this.cachedSystemRunnable = this.buildSystemRunnable({
|
|
446
|
+
stableInstructions: this.buildStableInstructionsString(),
|
|
447
|
+
dynamicInstructions: this.buildDynamicInstructionsString(),
|
|
448
|
+
});
|
|
436
449
|
this.systemRunnableStale = false;
|
|
437
450
|
return this.cachedSystemRunnable;
|
|
438
451
|
}
|
|
@@ -443,17 +456,19 @@ export class AgentContext {
|
|
|
443
456
|
*/
|
|
444
457
|
initializeSystemRunnable(): void {
|
|
445
458
|
if (this.systemRunnableStale || this.cachedSystemRunnable === undefined) {
|
|
446
|
-
|
|
447
|
-
|
|
459
|
+
this.cachedSystemRunnable = this.buildSystemRunnable({
|
|
460
|
+
stableInstructions: this.buildStableInstructionsString(),
|
|
461
|
+
dynamicInstructions: this.buildDynamicInstructionsString(),
|
|
462
|
+
});
|
|
448
463
|
this.systemRunnableStale = false;
|
|
449
464
|
}
|
|
450
465
|
}
|
|
451
466
|
|
|
452
467
|
/**
|
|
453
|
-
* Builds the
|
|
468
|
+
* Builds the cacheable instructions string (without creating SystemMessage).
|
|
454
469
|
* Includes agent identity preamble and handoff context when available.
|
|
455
470
|
*/
|
|
456
|
-
private
|
|
471
|
+
private buildStableInstructionsString(): string {
|
|
457
472
|
const parts: string[] = [];
|
|
458
473
|
|
|
459
474
|
const identityPreamble = this.buildIdentityPreamble();
|
|
@@ -465,6 +480,22 @@ export class AgentContext {
|
|
|
465
480
|
parts.push(this.instructions);
|
|
466
481
|
}
|
|
467
482
|
|
|
483
|
+
const programmaticToolsDoc = this.buildProgrammaticOnlyToolsInstructions();
|
|
484
|
+
if (programmaticToolsDoc) {
|
|
485
|
+
parts.push(programmaticToolsDoc);
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
return parts.join('\n\n');
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
/**
|
|
492
|
+
* Builds the dynamic system-tail string (without creating SystemMessage).
|
|
493
|
+
* Keep this out of prompt-cache-marked content so volatile context does not
|
|
494
|
+
* invalidate the stable prefix.
|
|
495
|
+
*/
|
|
496
|
+
private buildDynamicInstructionsString(): string {
|
|
497
|
+
const parts: string[] = [];
|
|
498
|
+
|
|
468
499
|
if (
|
|
469
500
|
this.additionalInstructions != null &&
|
|
470
501
|
this.additionalInstructions !== ''
|
|
@@ -472,14 +503,10 @@ export class AgentContext {
|
|
|
472
503
|
parts.push(this.additionalInstructions);
|
|
473
504
|
}
|
|
474
505
|
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
// Cross-run summary: include in system prompt so the model has context
|
|
481
|
-
// from the prior run. Mid-run summaries are injected as a HumanMessage
|
|
482
|
-
// on the post-compaction clean slate instead (see buildSystemRunnable).
|
|
506
|
+
// Cross-run summary: include in the system tail so the model has context
|
|
507
|
+
// from the prior run without invalidating the cacheable prefix. Mid-run
|
|
508
|
+
// summaries are injected as a HumanMessage on the post-compaction clean
|
|
509
|
+
// slate instead (see buildSystemRunnable).
|
|
483
510
|
if (
|
|
484
511
|
this._summaryLocation === 'system_prompt' &&
|
|
485
512
|
this.summaryText != null &&
|
|
@@ -523,9 +550,13 @@ export class AgentContext {
|
|
|
523
550
|
* Build system runnable from pre-built instructions string.
|
|
524
551
|
* Only called when content has actually changed.
|
|
525
552
|
*/
|
|
526
|
-
private buildSystemRunnable(
|
|
527
|
-
|
|
528
|
-
|
|
553
|
+
private buildSystemRunnable({
|
|
554
|
+
stableInstructions,
|
|
555
|
+
dynamicInstructions,
|
|
556
|
+
}: {
|
|
557
|
+
stableInstructions: string;
|
|
558
|
+
dynamicInstructions: string;
|
|
559
|
+
}):
|
|
529
560
|
| Runnable<
|
|
530
561
|
BaseMessage[],
|
|
531
562
|
(BaseMessage | SystemMessage)[],
|
|
@@ -537,35 +568,17 @@ export class AgentContext {
|
|
|
537
568
|
this.summaryText != null &&
|
|
538
569
|
this.summaryText !== '';
|
|
539
570
|
|
|
540
|
-
if (!
|
|
571
|
+
if (!stableInstructions && !dynamicInstructions && !hasMidRunSummary) {
|
|
541
572
|
this.systemMessageTokens = 0;
|
|
542
573
|
return undefined;
|
|
543
574
|
}
|
|
544
575
|
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
| undefined;
|
|
552
|
-
if (anthropicOptions?.promptCache === true) {
|
|
553
|
-
usePromptCache = true;
|
|
554
|
-
finalInstructions = {
|
|
555
|
-
content: [
|
|
556
|
-
{
|
|
557
|
-
type: 'text',
|
|
558
|
-
text: instructionsString,
|
|
559
|
-
cache_control: { type: 'ephemeral' },
|
|
560
|
-
},
|
|
561
|
-
],
|
|
562
|
-
};
|
|
563
|
-
}
|
|
564
|
-
}
|
|
565
|
-
|
|
566
|
-
const systemMessage = instructionsString
|
|
567
|
-
? new SystemMessage(finalInstructions)
|
|
568
|
-
: undefined;
|
|
576
|
+
const usePromptCache = this.hasAnthropicPromptCache();
|
|
577
|
+
const systemMessage = this.buildSystemMessage({
|
|
578
|
+
stableInstructions,
|
|
579
|
+
dynamicInstructions,
|
|
580
|
+
usePromptCache,
|
|
581
|
+
});
|
|
569
582
|
|
|
570
583
|
if (this.tokenCounter) {
|
|
571
584
|
this.systemMessageTokens = systemMessage
|
|
@@ -615,6 +628,72 @@ export class AgentContext {
|
|
|
615
628
|
}).withConfig({ runName: 'prompt' });
|
|
616
629
|
}
|
|
617
630
|
|
|
631
|
+
private hasAnthropicPromptCache(): boolean {
|
|
632
|
+
if (this.provider !== Providers.ANTHROPIC) {
|
|
633
|
+
return false;
|
|
634
|
+
}
|
|
635
|
+
const anthropicOptions = this.clientOptions as
|
|
636
|
+
| t.AnthropicClientOptions
|
|
637
|
+
| undefined;
|
|
638
|
+
return anthropicOptions?.promptCache === true;
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
private hasBedrockPromptCache(): boolean {
|
|
642
|
+
if (this.provider !== Providers.BEDROCK) {
|
|
643
|
+
return false;
|
|
644
|
+
}
|
|
645
|
+
const bedrockOptions = this.clientOptions as
|
|
646
|
+
| t.BedrockAnthropicClientOptions
|
|
647
|
+
| undefined;
|
|
648
|
+
return bedrockOptions?.promptCache === true;
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
private buildSystemMessage({
|
|
652
|
+
stableInstructions,
|
|
653
|
+
dynamicInstructions,
|
|
654
|
+
usePromptCache,
|
|
655
|
+
}: {
|
|
656
|
+
stableInstructions: string;
|
|
657
|
+
dynamicInstructions: string;
|
|
658
|
+
usePromptCache: boolean;
|
|
659
|
+
}): SystemMessage | undefined {
|
|
660
|
+
if (!stableInstructions && !dynamicInstructions) {
|
|
661
|
+
return undefined;
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
if (usePromptCache) {
|
|
665
|
+
const content: AgentSystemContentBlock[] = [];
|
|
666
|
+
if (stableInstructions) {
|
|
667
|
+
content.push({
|
|
668
|
+
type: 'text',
|
|
669
|
+
text: stableInstructions,
|
|
670
|
+
cache_control: { type: 'ephemeral' },
|
|
671
|
+
});
|
|
672
|
+
}
|
|
673
|
+
if (dynamicInstructions) {
|
|
674
|
+
content.push({ type: 'text', text: dynamicInstructions });
|
|
675
|
+
}
|
|
676
|
+
return new SystemMessage({ content } as BaseMessageFields);
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
if (this.hasBedrockPromptCache() && stableInstructions) {
|
|
680
|
+
const content: AgentSystemContentBlock[] = [
|
|
681
|
+
{ type: 'text', text: stableInstructions },
|
|
682
|
+
{ cachePoint: { type: 'default' } },
|
|
683
|
+
];
|
|
684
|
+
if (dynamicInstructions) {
|
|
685
|
+
content.push({ type: 'text', text: dynamicInstructions });
|
|
686
|
+
}
|
|
687
|
+
return new SystemMessage({ content } as BaseMessageFields);
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
return new SystemMessage(
|
|
691
|
+
[stableInstructions, dynamicInstructions]
|
|
692
|
+
.filter((part) => part !== '')
|
|
693
|
+
.join('\n\n')
|
|
694
|
+
);
|
|
695
|
+
}
|
|
696
|
+
|
|
618
697
|
/**
|
|
619
698
|
* Reset context for a new run
|
|
620
699
|
*/
|
|
@@ -681,10 +760,47 @@ export class AgentContext {
|
|
|
681
760
|
if (!this.toolDefinitions) {
|
|
682
761
|
return [];
|
|
683
762
|
}
|
|
684
|
-
|
|
685
|
-
|
|
763
|
+
/**
|
|
764
|
+
* Mirror `getEventDrivenToolsForBinding`'s gate: a definition is only
|
|
765
|
+
* bound to the model when its `allowed_callers` include `'direct'` and
|
|
766
|
+
* (if deferred) it has been discovered. Filtering by `defer_loading`
|
|
767
|
+
* alone left programmatic-only definitions counted in
|
|
768
|
+
* `toolSchemaTokens` even though they were never bound.
|
|
769
|
+
*/
|
|
770
|
+
return this.toolDefinitions.filter((def) => {
|
|
771
|
+
const allowedCallers = def.allowed_callers ?? ['direct'];
|
|
772
|
+
if (!allowedCallers.includes('direct')) {
|
|
773
|
+
return false;
|
|
774
|
+
}
|
|
775
|
+
return (
|
|
686
776
|
def.defer_loading !== true || this.discoveredToolNames.has(def.name)
|
|
687
|
-
|
|
777
|
+
);
|
|
778
|
+
});
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
/**
|
|
782
|
+
* Single source of truth for "which entries of `this.tools` should be
|
|
783
|
+
* treated as actually bound". Callers:
|
|
784
|
+
* - `getToolsForBinding` (non-event-driven branch)
|
|
785
|
+
* - `getEventDrivenToolsForBinding` (appends instance tools alongside
|
|
786
|
+
* schema-only definitions)
|
|
787
|
+
* - `calculateInstructionTokens` (counts schema bytes for accounting)
|
|
788
|
+
*
|
|
789
|
+
* In event-driven mode (`toolDefinitions` present) instance tools are
|
|
790
|
+
* appended unfiltered; outside event-driven mode they pass through
|
|
791
|
+
* `filterToolsForBinding`. Centralizing the decision here prevents the
|
|
792
|
+
* accounting/binding paths from drifting apart, which was the root
|
|
793
|
+
* cause of the original miscount.
|
|
794
|
+
*/
|
|
795
|
+
private getEffectiveInstanceTools(): t.GraphTools | undefined {
|
|
796
|
+
if (!this.tools) {
|
|
797
|
+
return undefined;
|
|
798
|
+
}
|
|
799
|
+
const isEventDriven = (this.toolDefinitions?.length ?? 0) > 0;
|
|
800
|
+
if (isEventDriven || !this.toolRegistry) {
|
|
801
|
+
return this.tools;
|
|
802
|
+
}
|
|
803
|
+
return this.filterToolsForBinding(this.tools);
|
|
688
804
|
}
|
|
689
805
|
|
|
690
806
|
/**
|
|
@@ -703,9 +819,17 @@ export class AgentContext {
|
|
|
703
819
|
* populated after `fromConfig()` kicks off the initial calculation, so
|
|
704
820
|
* callers that mutate `graphTools` must re-trigger this method to
|
|
705
821
|
* refresh `toolSchemaTokens`.
|
|
822
|
+
*
|
|
823
|
+
* Use `getEffectiveInstanceTools()` so accounting reflects exactly the
|
|
824
|
+
* subset that `getToolsForBinding` would emit — preventing the
|
|
825
|
+
* worst-case-ceiling miscount that triggered spurious `empty_messages`
|
|
826
|
+
* preflight rejections at low `maxContextTokens`. Deferred and
|
|
827
|
+
* non-`'direct'` `toolDefinitions` are excluded by
|
|
828
|
+
* `getActiveToolDefinitions()` below.
|
|
706
829
|
*/
|
|
707
830
|
const instanceTools: t.GraphTools = [
|
|
708
|
-
...((this.
|
|
831
|
+
...((this.getEffectiveInstanceTools() as t.GenericTool[] | undefined) ??
|
|
832
|
+
[]),
|
|
709
833
|
...((this.graphTools as t.GenericTool[] | undefined) ?? []),
|
|
710
834
|
];
|
|
711
835
|
|
|
@@ -900,8 +1024,16 @@ export class AgentContext {
|
|
|
900
1024
|
*/
|
|
901
1025
|
getTokenBudgetBreakdown(messages?: BaseMessage[]): t.TokenBudgetBreakdown {
|
|
902
1026
|
const maxContextTokens = this.maxContextTokens ?? 0;
|
|
903
|
-
|
|
904
|
-
|
|
1027
|
+
/**
|
|
1028
|
+
* Derive `toolCount` from `getToolsForBinding()` so the diagnostic stays
|
|
1029
|
+
* aligned with what is actually bound to the model — and with what
|
|
1030
|
+
* `calculateInstructionTokens` counts into `toolSchemaTokens`. Using raw
|
|
1031
|
+
* `this.tools.length` would inflate the count whenever the registry
|
|
1032
|
+
* marks instance tools as deferred-undiscovered or non-`'direct'`,
|
|
1033
|
+
* producing the same misleading "N tools" diagnostic this fix is meant
|
|
1034
|
+
* to eliminate.
|
|
1035
|
+
*/
|
|
1036
|
+
const toolCount = this.getToolsForBinding()?.length ?? 0;
|
|
905
1037
|
const messageCount = messages?.length ?? 0;
|
|
906
1038
|
|
|
907
1039
|
let messageTokens = 0;
|
|
@@ -1014,10 +1146,7 @@ export class AgentContext {
|
|
|
1014
1146
|
return this.getEventDrivenToolsForBinding();
|
|
1015
1147
|
}
|
|
1016
1148
|
|
|
1017
|
-
const filtered =
|
|
1018
|
-
!this.tools || !this.toolRegistry
|
|
1019
|
-
? this.tools
|
|
1020
|
-
: this.filterToolsForBinding(this.tools);
|
|
1149
|
+
const filtered = this.getEffectiveInstanceTools();
|
|
1021
1150
|
|
|
1022
1151
|
if (this.graphTools && this.graphTools.length > 0) {
|
|
1023
1152
|
return [...(filtered ?? []), ...this.graphTools];
|
|
@@ -1032,21 +1161,9 @@ export class AgentContext {
|
|
|
1032
1161
|
return this.graphTools ?? [];
|
|
1033
1162
|
}
|
|
1034
1163
|
|
|
1035
|
-
const
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
return false;
|
|
1039
|
-
}
|
|
1040
|
-
if (
|
|
1041
|
-
def.defer_loading === true &&
|
|
1042
|
-
!this.discoveredToolNames.has(def.name)
|
|
1043
|
-
) {
|
|
1044
|
-
return false;
|
|
1045
|
-
}
|
|
1046
|
-
return true;
|
|
1047
|
-
});
|
|
1048
|
-
|
|
1049
|
-
const schemaTools = createSchemaOnlyTools(defsToInclude) as t.GraphTools;
|
|
1164
|
+
const schemaTools = createSchemaOnlyTools(
|
|
1165
|
+
this.getActiveToolDefinitions()
|
|
1166
|
+
) as t.GraphTools;
|
|
1050
1167
|
|
|
1051
1168
|
const allTools = [...schemaTools];
|
|
1052
1169
|
|
|
@@ -1054,8 +1171,9 @@ export class AgentContext {
|
|
|
1054
1171
|
allTools.push(...this.graphTools);
|
|
1055
1172
|
}
|
|
1056
1173
|
|
|
1057
|
-
|
|
1058
|
-
|
|
1174
|
+
const instanceTools = this.getEffectiveInstanceTools();
|
|
1175
|
+
if (instanceTools && instanceTools.length > 0) {
|
|
1176
|
+
allTools.push(...instanceTools);
|
|
1059
1177
|
}
|
|
1060
1178
|
|
|
1061
1179
|
return allTools;
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
// src/agents/__tests__/AgentContext.anthropic.live.test.ts
|
|
2
|
+
/**
|
|
3
|
+
* Live Anthropic prompt-cache verification.
|
|
4
|
+
*
|
|
5
|
+
* Run with:
|
|
6
|
+
* RUN_ANTHROPIC_PROMPT_CACHE_LIVE_TESTS=1 ANTHROPIC_API_KEY=... npm test -- AgentContext.anthropic.live.test.ts --runInBand
|
|
7
|
+
*/
|
|
8
|
+
import { config as dotenvConfig } from 'dotenv';
|
|
9
|
+
dotenvConfig();
|
|
10
|
+
|
|
11
|
+
import { describe, expect, it } from '@jest/globals';
|
|
12
|
+
import type * as t from '@/types';
|
|
13
|
+
import {
|
|
14
|
+
runLiveTurn,
|
|
15
|
+
assertSystemPayloadShape,
|
|
16
|
+
buildDynamicInstructions,
|
|
17
|
+
buildStableInstructions,
|
|
18
|
+
waitForCachePropagation,
|
|
19
|
+
} from './promptCacheLiveHelpers';
|
|
20
|
+
import { Providers } from '@/common';
|
|
21
|
+
|
|
22
|
+
const shouldRunLive =
|
|
23
|
+
process.env.RUN_ANTHROPIC_PROMPT_CACHE_LIVE_TESTS === '1' &&
|
|
24
|
+
process.env.ANTHROPIC_API_KEY != null &&
|
|
25
|
+
process.env.ANTHROPIC_API_KEY !== '';
|
|
26
|
+
|
|
27
|
+
const describeIfLive = shouldRunLive ? describe : describe.skip;
|
|
28
|
+
|
|
29
|
+
const modelName =
|
|
30
|
+
process.env.ANTHROPIC_PROMPT_CACHE_MODEL ?? 'claude-sonnet-4-5';
|
|
31
|
+
const providerLabel = 'Anthropic';
|
|
32
|
+
|
|
33
|
+
function createClientOptions(): t.AnthropicClientOptions {
|
|
34
|
+
return {
|
|
35
|
+
modelName,
|
|
36
|
+
temperature: 0,
|
|
37
|
+
maxTokens: 8,
|
|
38
|
+
streaming: true,
|
|
39
|
+
streamUsage: true,
|
|
40
|
+
promptCache: true,
|
|
41
|
+
clientOptions: {
|
|
42
|
+
defaultHeaders: {
|
|
43
|
+
'anthropic-beta': 'prompt-caching-2024-07-31',
|
|
44
|
+
},
|
|
45
|
+
},
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
describeIfLive('AgentContext Anthropic prompt cache live API', () => {
|
|
50
|
+
it('caches only the stable system prefix while dynamic tail changes', async () => {
|
|
51
|
+
const nonce = `agent-cache-live-${Date.now()}`;
|
|
52
|
+
const clientOptions = createClientOptions();
|
|
53
|
+
const stableInstructions = buildStableInstructions({
|
|
54
|
+
nonce,
|
|
55
|
+
providerLabel,
|
|
56
|
+
});
|
|
57
|
+
const firstDynamicInstructions = buildDynamicInstructions({
|
|
58
|
+
marker: 'alpha',
|
|
59
|
+
tailDescription:
|
|
60
|
+
'The Dynamic Marker line is runtime context and must remain outside the cached prefix.',
|
|
61
|
+
});
|
|
62
|
+
const secondDynamicInstructions = buildDynamicInstructions({
|
|
63
|
+
marker: 'bravo',
|
|
64
|
+
tailDescription:
|
|
65
|
+
'The Dynamic Marker line is runtime context and must remain outside the cached prefix.',
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
await assertSystemPayloadShape({
|
|
69
|
+
agentId: 'live-cache-shape-check',
|
|
70
|
+
provider: Providers.ANTHROPIC,
|
|
71
|
+
clientOptions,
|
|
72
|
+
stableInstructions,
|
|
73
|
+
dynamicInstructions: firstDynamicInstructions,
|
|
74
|
+
expectedContent: [
|
|
75
|
+
{
|
|
76
|
+
type: 'text',
|
|
77
|
+
text: stableInstructions,
|
|
78
|
+
cache_control: { type: 'ephemeral' },
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
type: 'text',
|
|
82
|
+
text: firstDynamicInstructions,
|
|
83
|
+
},
|
|
84
|
+
],
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
const first = await runLiveTurn({
|
|
88
|
+
provider: Providers.ANTHROPIC,
|
|
89
|
+
providerLabel,
|
|
90
|
+
clientOptions,
|
|
91
|
+
runId: `${nonce}-first`,
|
|
92
|
+
threadId: `${nonce}-thread`,
|
|
93
|
+
stableInstructions,
|
|
94
|
+
dynamicInstructions: firstDynamicInstructions,
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
expect(first.text.toLowerCase()).toContain('alpha');
|
|
98
|
+
expect(first.usage.input_token_details?.cache_creation).toBeGreaterThan(0);
|
|
99
|
+
expect(first.usage.input_token_details?.cache_read ?? 0).toBe(0);
|
|
100
|
+
|
|
101
|
+
await waitForCachePropagation();
|
|
102
|
+
|
|
103
|
+
const second = await runLiveTurn({
|
|
104
|
+
provider: Providers.ANTHROPIC,
|
|
105
|
+
providerLabel,
|
|
106
|
+
clientOptions,
|
|
107
|
+
runId: `${nonce}-second`,
|
|
108
|
+
threadId: `${nonce}-thread`,
|
|
109
|
+
stableInstructions,
|
|
110
|
+
dynamicInstructions: secondDynamicInstructions,
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
expect(second.text.toLowerCase()).toContain('bravo');
|
|
114
|
+
expect(second.usage.input_token_details?.cache_read).toBeGreaterThan(0);
|
|
115
|
+
}, 120_000);
|
|
116
|
+
});
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
// src/agents/__tests__/AgentContext.bedrock.live.test.ts
|
|
2
|
+
/**
|
|
3
|
+
* Live Bedrock prompt-cache verification.
|
|
4
|
+
*
|
|
5
|
+
* Run with:
|
|
6
|
+
* RUN_BEDROCK_PROMPT_CACHE_LIVE_TESTS=1 BEDROCK_AWS_REGION=... BEDROCK_AWS_ACCESS_KEY_ID=... BEDROCK_AWS_SECRET_ACCESS_KEY=... npm test -- AgentContext.bedrock.live.test.ts --runInBand
|
|
7
|
+
*
|
|
8
|
+
* Standard AWS credential env vars or AWS_PROFILE can also be used.
|
|
9
|
+
*/
|
|
10
|
+
import { config as dotenvConfig } from 'dotenv';
|
|
11
|
+
dotenvConfig();
|
|
12
|
+
|
|
13
|
+
import { describe, expect, it } from '@jest/globals';
|
|
14
|
+
import type * as t from '@/types';
|
|
15
|
+
import {
|
|
16
|
+
runLiveTurn,
|
|
17
|
+
assertSystemPayloadShape,
|
|
18
|
+
buildDynamicInstructions,
|
|
19
|
+
buildStableInstructions,
|
|
20
|
+
waitForCachePropagation,
|
|
21
|
+
} from './promptCacheLiveHelpers';
|
|
22
|
+
import { Providers } from '@/common';
|
|
23
|
+
|
|
24
|
+
const accessKeyId =
|
|
25
|
+
process.env.BEDROCK_AWS_ACCESS_KEY_ID ?? process.env.AWS_ACCESS_KEY_ID;
|
|
26
|
+
const secretAccessKey =
|
|
27
|
+
process.env.BEDROCK_AWS_SECRET_ACCESS_KEY ??
|
|
28
|
+
process.env.AWS_SECRET_ACCESS_KEY;
|
|
29
|
+
const sessionToken =
|
|
30
|
+
process.env.BEDROCK_AWS_SESSION_TOKEN ?? process.env.AWS_SESSION_TOKEN;
|
|
31
|
+
const hasCredentialPair =
|
|
32
|
+
accessKeyId != null &&
|
|
33
|
+
accessKeyId !== '' &&
|
|
34
|
+
secretAccessKey != null &&
|
|
35
|
+
secretAccessKey !== '';
|
|
36
|
+
const hasAmbientCredentials =
|
|
37
|
+
process.env.AWS_PROFILE != null ||
|
|
38
|
+
process.env.AWS_WEB_IDENTITY_TOKEN_FILE != null;
|
|
39
|
+
|
|
40
|
+
const shouldRunLive =
|
|
41
|
+
process.env.RUN_BEDROCK_PROMPT_CACHE_LIVE_TESTS === '1' &&
|
|
42
|
+
(hasCredentialPair || hasAmbientCredentials);
|
|
43
|
+
|
|
44
|
+
const describeIfLive = shouldRunLive ? describe : describe.skip;
|
|
45
|
+
|
|
46
|
+
const model =
|
|
47
|
+
process.env.BEDROCK_PROMPT_CACHE_MODEL ??
|
|
48
|
+
'us.anthropic.claude-sonnet-4-5-20250929-v1:0';
|
|
49
|
+
const region =
|
|
50
|
+
process.env.BEDROCK_AWS_REGION ?? process.env.AWS_REGION ?? 'us-east-1';
|
|
51
|
+
const providerLabel = 'Bedrock';
|
|
52
|
+
|
|
53
|
+
function getCredentials():
|
|
54
|
+
| t.BedrockAnthropicClientOptions['credentials']
|
|
55
|
+
| undefined {
|
|
56
|
+
if (!hasCredentialPair) {
|
|
57
|
+
return undefined;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
return {
|
|
61
|
+
accessKeyId,
|
|
62
|
+
secretAccessKey,
|
|
63
|
+
...(sessionToken != null && sessionToken !== '' ? { sessionToken } : {}),
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function createClientOptions(): t.BedrockAnthropicClientOptions {
|
|
68
|
+
const credentials = getCredentials();
|
|
69
|
+
return {
|
|
70
|
+
model,
|
|
71
|
+
region,
|
|
72
|
+
maxTokens: 8,
|
|
73
|
+
streaming: true,
|
|
74
|
+
streamUsage: true,
|
|
75
|
+
promptCache: true,
|
|
76
|
+
...(credentials != null ? { credentials } : {}),
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
describeIfLive('AgentContext Bedrock prompt cache live API', () => {
|
|
81
|
+
it('caches only the stable system prefix while dynamic tail changes', async () => {
|
|
82
|
+
const nonce = `agent-bedrock-cache-live-${Date.now()}`;
|
|
83
|
+
const clientOptions = createClientOptions();
|
|
84
|
+
const stableInstructions = buildStableInstructions({
|
|
85
|
+
nonce,
|
|
86
|
+
providerLabel,
|
|
87
|
+
});
|
|
88
|
+
const firstDynamicInstructions = buildDynamicInstructions({
|
|
89
|
+
marker: 'alpha',
|
|
90
|
+
tailDescription:
|
|
91
|
+
'The Dynamic Marker line is runtime context and must remain after the Bedrock cache point.',
|
|
92
|
+
});
|
|
93
|
+
const secondDynamicInstructions = buildDynamicInstructions({
|
|
94
|
+
marker: 'bravo',
|
|
95
|
+
tailDescription:
|
|
96
|
+
'The Dynamic Marker line is runtime context and must remain after the Bedrock cache point.',
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
await assertSystemPayloadShape({
|
|
100
|
+
agentId: 'live-bedrock-cache-shape-check',
|
|
101
|
+
provider: Providers.BEDROCK,
|
|
102
|
+
clientOptions,
|
|
103
|
+
stableInstructions,
|
|
104
|
+
dynamicInstructions: firstDynamicInstructions,
|
|
105
|
+
expectedContent: [
|
|
106
|
+
{
|
|
107
|
+
type: 'text',
|
|
108
|
+
text: stableInstructions,
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
cachePoint: { type: 'default' },
|
|
112
|
+
},
|
|
113
|
+
{
|
|
114
|
+
type: 'text',
|
|
115
|
+
text: firstDynamicInstructions,
|
|
116
|
+
},
|
|
117
|
+
],
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
const first = await runLiveTurn({
|
|
121
|
+
provider: Providers.BEDROCK,
|
|
122
|
+
providerLabel,
|
|
123
|
+
clientOptions,
|
|
124
|
+
runId: `${nonce}-first`,
|
|
125
|
+
threadId: `${nonce}-thread`,
|
|
126
|
+
stableInstructions,
|
|
127
|
+
dynamicInstructions: firstDynamicInstructions,
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
expect(first.text.toLowerCase()).toContain('alpha');
|
|
131
|
+
expect(first.usage.input_token_details?.cache_creation).toBeGreaterThan(0);
|
|
132
|
+
expect(first.usage.input_token_details?.cache_read ?? 0).toBe(0);
|
|
133
|
+
|
|
134
|
+
await waitForCachePropagation();
|
|
135
|
+
|
|
136
|
+
const second = await runLiveTurn({
|
|
137
|
+
provider: Providers.BEDROCK,
|
|
138
|
+
providerLabel,
|
|
139
|
+
clientOptions,
|
|
140
|
+
runId: `${nonce}-second`,
|
|
141
|
+
threadId: `${nonce}-thread`,
|
|
142
|
+
stableInstructions,
|
|
143
|
+
dynamicInstructions: secondDynamicInstructions,
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
expect(second.text.toLowerCase()).toContain('bravo');
|
|
147
|
+
expect(second.usage.input_token_details?.cache_read).toBeGreaterThan(0);
|
|
148
|
+
}, 180_000);
|
|
149
|
+
});
|