@librechat/agents 3.2.35 → 3.2.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +75 -2
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/agents/projection.cjs +25 -0
- package/dist/cjs/agents/projection.cjs.map +1 -0
- package/dist/cjs/graphs/Graph.cjs +10 -26
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/langfuse.cjs +16 -5
- package/dist/cjs/langfuse.cjs.map +1 -1
- package/dist/cjs/langfuseToolOutputTracing.cjs +7 -0
- package/dist/cjs/langfuseToolOutputTracing.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +118 -7
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +44 -4
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/main.cjs +7 -0
- package/dist/cjs/messages/budget.cjs +23 -0
- package/dist/cjs/messages/budget.cjs.map +1 -0
- package/dist/cjs/messages/cache.cjs +184 -0
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/messages/index.cjs +1 -0
- package/dist/cjs/summarization/node.cjs +1 -1
- package/dist/cjs/summarization/node.cjs.map +1 -1
- package/dist/cjs/tools/search/format.cjs +91 -2
- package/dist/cjs/tools/search/format.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +4 -3
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/toolOutputReferences.cjs +28 -14
- package/dist/cjs/tools/toolOutputReferences.cjs.map +1 -1
- package/dist/esm/agents/AgentContext.mjs +76 -3
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/agents/projection.mjs +25 -0
- package/dist/esm/agents/projection.mjs.map +1 -0
- package/dist/esm/graphs/Graph.mjs +9 -25
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/langfuse.mjs +16 -5
- package/dist/esm/langfuse.mjs.map +1 -1
- package/dist/esm/langfuseToolOutputTracing.mjs +7 -0
- package/dist/esm/langfuseToolOutputTracing.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs +118 -7
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs +44 -4
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/main.mjs +4 -2
- package/dist/esm/messages/budget.mjs +23 -0
- package/dist/esm/messages/budget.mjs.map +1 -0
- package/dist/esm/messages/cache.mjs +182 -1
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/messages/index.mjs +1 -0
- package/dist/esm/summarization/node.mjs +2 -2
- package/dist/esm/summarization/node.mjs.map +1 -1
- package/dist/esm/tools/search/format.mjs +91 -2
- package/dist/esm/tools/search/format.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +4 -3
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/toolOutputReferences.mjs +28 -14
- package/dist/esm/tools/toolOutputReferences.mjs.map +1 -1
- package/dist/types/agents/AgentContext.d.ts +30 -1
- package/dist/types/agents/projection.d.ts +26 -0
- package/dist/types/index.d.ts +1 -0
- package/dist/types/messages/budget.d.ts +11 -0
- package/dist/types/messages/cache.d.ts +47 -0
- package/dist/types/messages/index.d.ts +1 -0
- package/dist/types/tools/search/format.d.ts +4 -1
- package/dist/types/tools/search/types.d.ts +7 -0
- package/dist/types/types/graph.d.ts +2 -0
- package/package.json +2 -1
- package/src/agents/AgentContext.ts +105 -4
- package/src/agents/__tests__/AgentContext.test.ts +232 -9
- package/src/agents/__tests__/projection.test.ts +73 -0
- package/src/agents/projection.ts +46 -0
- package/src/graphs/Graph.ts +66 -65
- package/src/index.ts +3 -0
- package/src/langfuse.ts +38 -4
- package/src/langfuseToolOutputTracing.ts +18 -0
- package/src/llm/anthropic/utils/cross-provider-reasoning.test.ts +317 -0
- package/src/llm/anthropic/utils/message_inputs.ts +209 -19
- package/src/llm/anthropic/utils/stripPrefillCache.test.ts +111 -0
- package/src/llm/bedrock/utils/cross-provider-reasoning.test.ts +131 -0
- package/src/llm/bedrock/utils/message_inputs.test.ts +129 -0
- package/src/llm/bedrock/utils/message_inputs.ts +81 -4
- package/src/llm/bedrock/utils/toolResultCachePoint.test.ts +103 -0
- package/src/messages/budget.ts +32 -0
- package/src/messages/cache.tail.test.ts +340 -0
- package/src/messages/cache.ts +267 -1
- package/src/messages/index.ts +1 -0
- package/src/messages/tailCacheConversion.test.ts +161 -0
- package/src/scripts/bench-prompt-cache.ts +479 -0
- package/src/specs/langfuse-config.test.ts +69 -2
- package/src/specs/langfuse-metadata.test.ts +44 -0
- package/src/specs/langfuse-tool-output-tracing.test.ts +6 -0
- package/src/summarization/node.ts +2 -2
- package/src/tools/__tests__/annotateMessagesForLLM.test.ts +50 -0
- package/src/tools/search/format.test.ts +242 -0
- package/src/tools/search/format.ts +122 -5
- package/src/tools/search/tool.ts +5 -1
- package/src/tools/search/types.ts +7 -0
- package/src/tools/toolOutputReferences.ts +34 -20
- package/src/types/graph.ts +2 -0
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import { CallbackHandler } from '@langfuse/langchain';
|
|
2
2
|
import {
|
|
3
|
-
createLangfuseHandler,
|
|
4
|
-
disposeLangfuseHandler,
|
|
5
3
|
hasLangfuseConfigCredentials,
|
|
6
4
|
shouldCreateLangfuseHandler,
|
|
5
|
+
isExplicitLangfuseConfig,
|
|
6
|
+
disposeLangfuseHandler,
|
|
7
|
+
createLangfuseHandler,
|
|
7
8
|
} from '@/langfuse';
|
|
8
9
|
|
|
9
10
|
const mockForceFlush = jest.fn();
|
|
@@ -68,6 +69,39 @@ describe('createLangfuseHandler', () => {
|
|
|
68
69
|
});
|
|
69
70
|
});
|
|
70
71
|
|
|
72
|
+
it('adds configured trace metadata and tags to the callback handler', () => {
|
|
73
|
+
process.env.LANGFUSE_PUBLIC_KEY = 'pk-env';
|
|
74
|
+
process.env.LANGFUSE_SECRET_KEY = 'sk-env';
|
|
75
|
+
|
|
76
|
+
const handler = createLangfuseHandler({
|
|
77
|
+
langfuse: {
|
|
78
|
+
metadata: {
|
|
79
|
+
tenantId: 'tenant-1',
|
|
80
|
+
empty: '',
|
|
81
|
+
skipped: null,
|
|
82
|
+
},
|
|
83
|
+
tags: ['tenant:tenant-1', 'agent'],
|
|
84
|
+
},
|
|
85
|
+
traceMetadata: {
|
|
86
|
+
messageId: 'message-1',
|
|
87
|
+
agentId: 'agent-1',
|
|
88
|
+
},
|
|
89
|
+
tags: ['librechat', 'agent'],
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
expect(handler).toBeDefined();
|
|
93
|
+
expect(MockedCallbackHandler).toHaveBeenCalledWith({
|
|
94
|
+
userId: undefined,
|
|
95
|
+
sessionId: undefined,
|
|
96
|
+
traceMetadata: {
|
|
97
|
+
tenantId: 'tenant-1',
|
|
98
|
+
messageId: 'message-1',
|
|
99
|
+
agentId: 'agent-1',
|
|
100
|
+
},
|
|
101
|
+
tags: ['librechat', 'agent', 'tenant:tenant-1'],
|
|
102
|
+
});
|
|
103
|
+
});
|
|
104
|
+
|
|
71
105
|
it('creates a handler for explicit credentials supplied in config', () => {
|
|
72
106
|
const handler = createLangfuseHandler({
|
|
73
107
|
langfuse: {
|
|
@@ -158,6 +192,39 @@ describe('createLangfuseHandler', () => {
|
|
|
158
192
|
).toBe(true);
|
|
159
193
|
});
|
|
160
194
|
|
|
195
|
+
it('does not treat sanitized-away trace attributes as explicit config', () => {
|
|
196
|
+
expect(
|
|
197
|
+
isExplicitLangfuseConfig({
|
|
198
|
+
metadata: {
|
|
199
|
+
empty: '',
|
|
200
|
+
whitespace: ' ',
|
|
201
|
+
missing: null,
|
|
202
|
+
tooLong: 'x'.repeat(201),
|
|
203
|
+
},
|
|
204
|
+
tags: ['', ' '],
|
|
205
|
+
})
|
|
206
|
+
).toBe(false);
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
it('treats valid trace metadata or tags as explicit config', () => {
|
|
210
|
+
expect(
|
|
211
|
+
isExplicitLangfuseConfig({
|
|
212
|
+
metadata: {
|
|
213
|
+
tenantId: 'tenant-1',
|
|
214
|
+
},
|
|
215
|
+
tags: ['', ' '],
|
|
216
|
+
})
|
|
217
|
+
).toBe(true);
|
|
218
|
+
expect(
|
|
219
|
+
isExplicitLangfuseConfig({
|
|
220
|
+
metadata: {
|
|
221
|
+
empty: '',
|
|
222
|
+
},
|
|
223
|
+
tags: ['tenant:tenant-1'],
|
|
224
|
+
})
|
|
225
|
+
).toBe(true);
|
|
226
|
+
});
|
|
227
|
+
|
|
161
228
|
it('does not flush the shared Langfuse provider during per-chat cleanup', async () => {
|
|
162
229
|
await expect(disposeLangfuseHandler({})).resolves.toBeUndefined();
|
|
163
230
|
expect(mockForceFlush).not.toHaveBeenCalled();
|
|
@@ -108,6 +108,50 @@ describe('Langfuse trace metadata includes agentName', () => {
|
|
|
108
108
|
});
|
|
109
109
|
});
|
|
110
110
|
|
|
111
|
+
it('propagates configured Langfuse metadata and tags around processStream observations', async () => {
|
|
112
|
+
const run = await createTestRun(
|
|
113
|
+
'DWAINE',
|
|
114
|
+
{},
|
|
115
|
+
{
|
|
116
|
+
langfuse: {
|
|
117
|
+
metadata: { tenantId: 'tenant-1' },
|
|
118
|
+
tags: ['tenant:tenant-1'],
|
|
119
|
+
},
|
|
120
|
+
}
|
|
121
|
+
);
|
|
122
|
+
await run.processStream(
|
|
123
|
+
{ messages: [] },
|
|
124
|
+
{
|
|
125
|
+
configurable: {
|
|
126
|
+
thread_id: 'thread-123',
|
|
127
|
+
user_id: 'user-456',
|
|
128
|
+
},
|
|
129
|
+
version: 'v2',
|
|
130
|
+
}
|
|
131
|
+
);
|
|
132
|
+
|
|
133
|
+
expect(MockedCallbackHandler).toHaveBeenCalledTimes(1);
|
|
134
|
+
const ctorArgs = MockedCallbackHandler.mock.calls[0][0];
|
|
135
|
+
expect(ctorArgs).toMatchObject({
|
|
136
|
+
traceMetadata: {
|
|
137
|
+
tenantId: 'tenant-1',
|
|
138
|
+
messageId: 'test-run-id',
|
|
139
|
+
agentId: 'agent_abc123',
|
|
140
|
+
agentName: 'DWAINE',
|
|
141
|
+
},
|
|
142
|
+
tags: ['librechat', 'agent', 'tenant:tenant-1'],
|
|
143
|
+
});
|
|
144
|
+
expect(MockedPropagateAttributes.mock.calls[0][0]).toMatchObject({
|
|
145
|
+
tags: ['librechat', 'agent', 'tenant:tenant-1'],
|
|
146
|
+
metadata: {
|
|
147
|
+
tenantId: 'tenant-1',
|
|
148
|
+
messageId: 'test-run-id',
|
|
149
|
+
agentId: 'agent_abc123',
|
|
150
|
+
agentName: 'DWAINE',
|
|
151
|
+
},
|
|
152
|
+
});
|
|
153
|
+
});
|
|
154
|
+
|
|
111
155
|
it('falls back to agentId when agent has no explicit name', async () => {
|
|
112
156
|
const run = await createTestRun();
|
|
113
157
|
await run.processStream(
|
|
@@ -586,6 +586,8 @@ describe('Langfuse tool output tracing redaction', () => {
|
|
|
586
586
|
publicKey: 'pk-run',
|
|
587
587
|
secretKey: 'sk-run',
|
|
588
588
|
baseUrl: 'https://langfuse.test',
|
|
589
|
+
metadata: { tenantId: 'tenant-run' },
|
|
590
|
+
tags: ['tenant:tenant-run', 'shared'],
|
|
589
591
|
toolNodeTracing: { enabled: true },
|
|
590
592
|
toolOutputTracing: {
|
|
591
593
|
enabled: true,
|
|
@@ -593,6 +595,8 @@ describe('Langfuse tool output tracing redaction', () => {
|
|
|
593
595
|
},
|
|
594
596
|
},
|
|
595
597
|
{
|
|
598
|
+
metadata: { agentId: 'agent-1' },
|
|
599
|
+
tags: ['shared', 'agent:agent-1'],
|
|
596
600
|
toolOutputTracing: {
|
|
597
601
|
enabled: false,
|
|
598
602
|
redactedToolNames: ['execute_sql'],
|
|
@@ -605,6 +609,8 @@ describe('Langfuse tool output tracing redaction', () => {
|
|
|
605
609
|
publicKey: 'pk-run',
|
|
606
610
|
secretKey: 'sk-run',
|
|
607
611
|
baseUrl: 'https://langfuse.test',
|
|
612
|
+
metadata: { tenantId: 'tenant-run', agentId: 'agent-1' },
|
|
613
|
+
tags: ['tenant:tenant-run', 'shared', 'agent:agent-1'],
|
|
608
614
|
toolNodeTracing: { enabled: true },
|
|
609
615
|
toolOutputTracing: {
|
|
610
616
|
enabled: false,
|
|
@@ -22,7 +22,7 @@ import { attemptInvoke, tryFallbackProviders } from '@/llm/invoke';
|
|
|
22
22
|
import { createRemoveAllMessage } from '@/messages/reducer';
|
|
23
23
|
import { splitAtRecencyBoundary } from '@/messages/recency';
|
|
24
24
|
import { getMaxOutputTokensKey } from '@/llm/request';
|
|
25
|
-
import {
|
|
25
|
+
import { addTailCacheControl } from '@/messages/cache';
|
|
26
26
|
import { initializeModel } from '@/llm/init';
|
|
27
27
|
import { getChunkContent } from '@/stream';
|
|
28
28
|
import { executeHooks } from '@/hooks';
|
|
@@ -1227,7 +1227,7 @@ async function summarizeWithCacheHit({
|
|
|
1227
1227
|
|
|
1228
1228
|
const fullMessages = [...messages, new HumanMessage(instruction)];
|
|
1229
1229
|
const invokeMessages =
|
|
1230
|
-
usePromptCache === true ?
|
|
1230
|
+
usePromptCache === true ? addTailCacheControl(fullMessages) : fullMessages;
|
|
1231
1231
|
|
|
1232
1232
|
const result = await attemptInvoke(
|
|
1233
1233
|
{
|
|
@@ -173,6 +173,56 @@ describe('annotateMessagesForLLM', () => {
|
|
|
173
173
|
expect(out[0].content).toBe('[ref: tool0turn0]\nplain output');
|
|
174
174
|
});
|
|
175
175
|
|
|
176
|
+
it('annotates a live ref on multi-part content (prompt-cache-rewritten tool tail)', () => {
|
|
177
|
+
/**
|
|
178
|
+
* A tail tool result that prompt caching rewrote from a string into a
|
|
179
|
+
* text-block array (to host the cache_control / cachePoint marker) keeps
|
|
180
|
+
* its `_refKey` on additional_kwargs. The live-ref marker must still be
|
|
181
|
+
* projected as a leading text block; otherwise the common tool-result
|
|
182
|
+
* tail silently loses its reference annotation once cached.
|
|
183
|
+
*/
|
|
184
|
+
const registry = new ToolOutputReferenceRegistry();
|
|
185
|
+
registry.set('r1', 'tool0turn0', 'raw');
|
|
186
|
+
const tm = makeToolMessage({
|
|
187
|
+
content: [
|
|
188
|
+
{ type: 'text', text: 'output', cache_control: { type: 'ephemeral' } },
|
|
189
|
+
] as unknown as ToolMessage['content'],
|
|
190
|
+
additional_kwargs: { _refKey: 'tool0turn0' },
|
|
191
|
+
});
|
|
192
|
+
const out = annotateMessagesForLLM([tm], registry, 'r1');
|
|
193
|
+
const blocks = out[0].content as Array<{
|
|
194
|
+
type: string;
|
|
195
|
+
text?: string;
|
|
196
|
+
cache_control?: unknown;
|
|
197
|
+
}>;
|
|
198
|
+
expect(blocks).toHaveLength(2);
|
|
199
|
+
expect(blocks[0]).toEqual({ type: 'text', text: '[ref: tool0turn0]' });
|
|
200
|
+
// The original block (and its cache marker) is preserved after the prefix.
|
|
201
|
+
expect(blocks[1].text).toBe('output');
|
|
202
|
+
expect(blocks[1].cache_control).toEqual({ type: 'ephemeral' });
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
it('annotates both live ref and unresolved on multi-part content', () => {
|
|
206
|
+
const registry = new ToolOutputReferenceRegistry();
|
|
207
|
+
registry.set('r1', 'tool0turn0', 'raw');
|
|
208
|
+
const tm = makeToolMessage({
|
|
209
|
+
content: [
|
|
210
|
+
{ type: 'text', text: 'output' },
|
|
211
|
+
] as unknown as ToolMessage['content'],
|
|
212
|
+
additional_kwargs: {
|
|
213
|
+
_refKey: 'tool0turn0',
|
|
214
|
+
_unresolvedRefs: ['tool9turn9'],
|
|
215
|
+
},
|
|
216
|
+
});
|
|
217
|
+
const out = annotateMessagesForLLM([tm], registry, 'r1');
|
|
218
|
+
const blocks = out[0].content as Array<{ type: string; text?: string }>;
|
|
219
|
+
expect(blocks.map((b) => b.text)).toEqual([
|
|
220
|
+
'[ref: tool0turn0]',
|
|
221
|
+
'[unresolved refs: tool9turn9]',
|
|
222
|
+
'output',
|
|
223
|
+
]);
|
|
224
|
+
});
|
|
225
|
+
|
|
176
226
|
it('prepends an unresolved-refs warning text block to multi-part content', () => {
|
|
177
227
|
const registry = new ToolOutputReferenceRegistry();
|
|
178
228
|
const tm = makeToolMessage({
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
import type * as t from './types';
|
|
2
|
+
import { formatResultsForLLM, resolveMaxLLMOutputChars } from './format';
|
|
3
|
+
|
|
4
|
+
const makeOrganic = (
|
|
5
|
+
link: string,
|
|
6
|
+
highlights: t.Highlight[]
|
|
7
|
+
): t.ProcessedOrganic => ({
|
|
8
|
+
link,
|
|
9
|
+
title: `Title for ${link}`,
|
|
10
|
+
snippet: `Snippet for ${link}`,
|
|
11
|
+
highlights,
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
const highlight = (text: string, score = 0.9): t.Highlight => ({ text, score });
|
|
15
|
+
|
|
16
|
+
const reference = (url: string, originalIndex = 0): t.UsedReferences[number] => ({
|
|
17
|
+
type: 'link',
|
|
18
|
+
originalIndex,
|
|
19
|
+
reference: { originalUrl: url, title: 'Ref', text: 'ref' },
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
const countHighlightBlocks = (output: string): number =>
|
|
23
|
+
(output.match(/### Highlight \d+/g) ?? []).length;
|
|
24
|
+
|
|
25
|
+
const OMISSION_MARKER = 'omitted to fit the context budget';
|
|
26
|
+
|
|
27
|
+
describe('resolveMaxLLMOutputChars', () => {
|
|
28
|
+
const originalEnv = process.env.SEARCH_MAX_LLM_OUTPUT_CHARS;
|
|
29
|
+
|
|
30
|
+
afterEach(() => {
|
|
31
|
+
if (originalEnv == null) {
|
|
32
|
+
delete process.env.SEARCH_MAX_LLM_OUTPUT_CHARS;
|
|
33
|
+
} else {
|
|
34
|
+
process.env.SEARCH_MAX_LLM_OUTPUT_CHARS = originalEnv;
|
|
35
|
+
}
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
test('falls back to the 50,000 char default when nothing is configured', () => {
|
|
39
|
+
delete process.env.SEARCH_MAX_LLM_OUTPUT_CHARS;
|
|
40
|
+
expect(resolveMaxLLMOutputChars()).toBe(50000);
|
|
41
|
+
expect(resolveMaxLLMOutputChars(0)).toBe(50000);
|
|
42
|
+
expect(resolveMaxLLMOutputChars(-100)).toBe(50000);
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
test('honors the SEARCH_MAX_LLM_OUTPUT_CHARS env var', () => {
|
|
46
|
+
process.env.SEARCH_MAX_LLM_OUTPUT_CHARS = '777';
|
|
47
|
+
expect(resolveMaxLLMOutputChars()).toBe(777);
|
|
48
|
+
expect(resolveMaxLLMOutputChars(0)).toBe(777);
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
test('an explicit positive config value wins over env and default', () => {
|
|
52
|
+
process.env.SEARCH_MAX_LLM_OUTPUT_CHARS = '777';
|
|
53
|
+
expect(resolveMaxLLMOutputChars(1234)).toBe(1234);
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
test('ignores a non-numeric env var', () => {
|
|
57
|
+
process.env.SEARCH_MAX_LLM_OUTPUT_CHARS = 'not-a-number';
|
|
58
|
+
expect(resolveMaxLLMOutputChars()).toBe(50000);
|
|
59
|
+
});
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
describe('formatResultsForLLM highlight budget', () => {
|
|
63
|
+
test('keeps whole highlights in relevance order until the budget is hit', () => {
|
|
64
|
+
const results: t.SearchResultData = {
|
|
65
|
+
organic: [
|
|
66
|
+
makeOrganic('https://a.com', [highlight('A'.repeat(100))]),
|
|
67
|
+
makeOrganic('https://b.com', [highlight('B'.repeat(100))]),
|
|
68
|
+
],
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
const { output } = formatResultsForLLM(0, results, 100);
|
|
72
|
+
|
|
73
|
+
expect(output).toContain('A'.repeat(100));
|
|
74
|
+
expect(output).not.toContain('B'.repeat(100));
|
|
75
|
+
expect(countHighlightBlocks(output)).toBe(1);
|
|
76
|
+
expect(output).toContain('_[1 additional highlight omitted to fit the context budget');
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
test('truncates the boundary highlight when meaningful room remains', () => {
|
|
80
|
+
const results: t.SearchResultData = {
|
|
81
|
+
organic: [makeOrganic('https://a.com', [highlight('A'.repeat(1000))])],
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
const { output } = formatResultsForLLM(0, results, 500);
|
|
85
|
+
|
|
86
|
+
expect(output).toContain('…[truncated]');
|
|
87
|
+
expect(output).toContain('A'.repeat(500));
|
|
88
|
+
expect(output).not.toContain('A'.repeat(501));
|
|
89
|
+
expect(output).toContain('_[1 additional highlight omitted to fit the context budget');
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
test('drops the boundary highlight entirely when too little room remains', () => {
|
|
93
|
+
const results: t.SearchResultData = {
|
|
94
|
+
organic: [
|
|
95
|
+
makeOrganic('https://a.com', [highlight('A'.repeat(100))]),
|
|
96
|
+
makeOrganic('https://b.com', [highlight('B'.repeat(100))]),
|
|
97
|
+
],
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
const { output } = formatResultsForLLM(0, results, 150);
|
|
101
|
+
|
|
102
|
+
expect(output).toContain('A'.repeat(100));
|
|
103
|
+
expect(output).not.toContain('…[truncated]');
|
|
104
|
+
expect(output).not.toContain('B');
|
|
105
|
+
expect(countHighlightBlocks(output)).toBe(1);
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
test('always keeps snippets, titles, and URLs even when all highlights are dropped', () => {
|
|
109
|
+
const results: t.SearchResultData = {
|
|
110
|
+
organic: [makeOrganic('https://a.com', [highlight('A'.repeat(100))])],
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
const { output } = formatResultsForLLM(0, results, 10);
|
|
114
|
+
|
|
115
|
+
expect(output).toContain('URL: https://a.com');
|
|
116
|
+
expect(output).toContain('Summary: Snippet for https://a.com');
|
|
117
|
+
expect(output).toContain('"Title for https://a.com"');
|
|
118
|
+
expect(countHighlightBlocks(output)).toBe(0);
|
|
119
|
+
expect(output).toContain('_[1 additional highlight omitted to fit the context budget');
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
test('emits no omission marker when every highlight fits the budget', () => {
|
|
123
|
+
const results: t.SearchResultData = {
|
|
124
|
+
organic: [
|
|
125
|
+
makeOrganic('https://a.com', [highlight('A'.repeat(100))]),
|
|
126
|
+
makeOrganic('https://b.com', [highlight('B'.repeat(100))]),
|
|
127
|
+
],
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
const { output } = formatResultsForLLM(0, results, 50000);
|
|
131
|
+
|
|
132
|
+
expect(output).toContain('A'.repeat(100));
|
|
133
|
+
expect(output).toContain('B'.repeat(100));
|
|
134
|
+
expect(countHighlightBlocks(output)).toBe(2);
|
|
135
|
+
expect(output).not.toContain(OMISSION_MARKER);
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
test('drops references with no surviving marker when truncating', () => {
|
|
139
|
+
const withRefs = highlight('A'.repeat(1000));
|
|
140
|
+
withRefs.references = [reference('https://cited.example')];
|
|
141
|
+
const results: t.SearchResultData = {
|
|
142
|
+
organic: [makeOrganic('https://a.com', [withRefs])],
|
|
143
|
+
};
|
|
144
|
+
|
|
145
|
+
const { output, references } = formatResultsForLLM(0, results, 500);
|
|
146
|
+
|
|
147
|
+
expect(output).toContain('…[truncated]');
|
|
148
|
+
expect(output).not.toContain('Core References');
|
|
149
|
+
expect(output).not.toContain('https://cited.example');
|
|
150
|
+
expect(references).toHaveLength(0);
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
test('keeps references whose marker survives truncation and drops the rest', () => {
|
|
154
|
+
const withRefs = highlight(`(link#1) ${'A'.repeat(1000)} (link#2)`);
|
|
155
|
+
withRefs.references = [
|
|
156
|
+
reference('https://one.example', 0),
|
|
157
|
+
reference('https://two.example', 1),
|
|
158
|
+
];
|
|
159
|
+
const results: t.SearchResultData = {
|
|
160
|
+
organic: [makeOrganic('https://a.com', [withRefs])],
|
|
161
|
+
};
|
|
162
|
+
|
|
163
|
+
const { output, references } = formatResultsForLLM(0, results, 500);
|
|
164
|
+
|
|
165
|
+
expect(output).toContain('…[truncated]');
|
|
166
|
+
expect(output).toContain('https://one.example');
|
|
167
|
+
expect(output).not.toContain('https://two.example');
|
|
168
|
+
expect(references).toHaveLength(1);
|
|
169
|
+
expect(references[0].link).toBe('https://one.example');
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
test('stops at the boundary highlight — no lower-ranked highlight slips in', () => {
|
|
173
|
+
const results: t.SearchResultData = {
|
|
174
|
+
organic: [
|
|
175
|
+
makeOrganic('https://a.com', [
|
|
176
|
+
highlight('A'.repeat(100), 0.9),
|
|
177
|
+
highlight('B'.repeat(300), 0.8),
|
|
178
|
+
highlight('C'.repeat(10), 0.7),
|
|
179
|
+
]),
|
|
180
|
+
],
|
|
181
|
+
};
|
|
182
|
+
|
|
183
|
+
const { output } = formatResultsForLLM(0, results, 150);
|
|
184
|
+
|
|
185
|
+
expect(output).toContain('A'.repeat(100));
|
|
186
|
+
expect(output).not.toContain('B'.repeat(300));
|
|
187
|
+
expect(output).not.toContain('C'.repeat(10));
|
|
188
|
+
expect(output).not.toContain('…[truncated]');
|
|
189
|
+
expect(countHighlightBlocks(output)).toBe(1);
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
test('keeps references on a whole highlight that fits the budget', () => {
|
|
193
|
+
const withRefs = highlight('A'.repeat(100));
|
|
194
|
+
withRefs.references = [reference('https://cited.example')];
|
|
195
|
+
const results: t.SearchResultData = {
|
|
196
|
+
organic: [makeOrganic('https://a.com', [withRefs])],
|
|
197
|
+
};
|
|
198
|
+
|
|
199
|
+
const { output, references } = formatResultsForLLM(0, results, 50000);
|
|
200
|
+
|
|
201
|
+
expect(output).toContain('Core References');
|
|
202
|
+
expect(references).toHaveLength(1);
|
|
203
|
+
expect(references[0].link).toBe('https://cited.example');
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
test('skips blank highlights instead of charging them against the budget', () => {
|
|
207
|
+
const results: t.SearchResultData = {
|
|
208
|
+
organic: [
|
|
209
|
+
makeOrganic('https://a.com', [
|
|
210
|
+
highlight(' \n\t '),
|
|
211
|
+
highlight('A'.repeat(100)),
|
|
212
|
+
]),
|
|
213
|
+
],
|
|
214
|
+
};
|
|
215
|
+
|
|
216
|
+
const { output } = formatResultsForLLM(0, results, 100);
|
|
217
|
+
|
|
218
|
+
expect(output).toContain('A'.repeat(100));
|
|
219
|
+
expect(output).not.toContain('…[truncated]');
|
|
220
|
+
expect(countHighlightBlocks(output)).toBe(1);
|
|
221
|
+
expect(output).not.toContain(OMISSION_MARKER);
|
|
222
|
+
});
|
|
223
|
+
|
|
224
|
+
test('spends the budget across organic results before news results', () => {
|
|
225
|
+
const results: t.SearchResultData = {
|
|
226
|
+
organic: [makeOrganic('https://a.com', [highlight('A'.repeat(100))])],
|
|
227
|
+
topStories: [
|
|
228
|
+
{
|
|
229
|
+
link: 'https://news.com',
|
|
230
|
+
title: 'Story',
|
|
231
|
+
highlights: [highlight('N'.repeat(100))],
|
|
232
|
+
},
|
|
233
|
+
],
|
|
234
|
+
};
|
|
235
|
+
|
|
236
|
+
const { output } = formatResultsForLLM(0, results, 100);
|
|
237
|
+
|
|
238
|
+
expect(output).toContain('A'.repeat(100));
|
|
239
|
+
expect(output).not.toContain('N'.repeat(100));
|
|
240
|
+
expect(output).toContain('_[1 additional highlight omitted to fit the context budget');
|
|
241
|
+
});
|
|
242
|
+
});
|
|
@@ -1,6 +1,113 @@
|
|
|
1
1
|
import type * as t from './types';
|
|
2
2
|
import { getDomainName, fileExtRegex } from './utils';
|
|
3
3
|
|
|
4
|
+
/** Default per-search budget for model-facing highlight content (chars). Hosts
|
|
5
|
+
* that know the context window (e.g. LibreChat) pass a window-relative value;
|
|
6
|
+
* this fixed fallback keeps standalone consumers bounded instead of dumping the
|
|
7
|
+
* full reranked content of every source into the prompt. */
|
|
8
|
+
const DEFAULT_MAX_LLM_OUTPUT_CHARS = 50000;
|
|
9
|
+
|
|
10
|
+
/** Minimum room (chars) worth filling with a truncated boundary highlight; below
|
|
11
|
+
* this we drop it whole rather than emit a useless sliver. */
|
|
12
|
+
const MIN_PARTIAL_HIGHLIGHT_CHARS = 200;
|
|
13
|
+
|
|
14
|
+
/** Resolves the per-search highlight budget from config, the
|
|
15
|
+
* `SEARCH_MAX_LLM_OUTPUT_CHARS` env var, or the default (50,000 chars). */
|
|
16
|
+
export function resolveMaxLLMOutputChars(maxOutputChars?: number): number {
|
|
17
|
+
if (maxOutputChars != null && maxOutputChars > 0) {
|
|
18
|
+
return maxOutputChars;
|
|
19
|
+
}
|
|
20
|
+
const envValue = Number(process.env.SEARCH_MAX_LLM_OUTPUT_CHARS);
|
|
21
|
+
if (Number.isFinite(envValue) && envValue > 0) {
|
|
22
|
+
return envValue;
|
|
23
|
+
}
|
|
24
|
+
return DEFAULT_MAX_LLM_OUTPUT_CHARS;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/** Inline citation markers embedded in highlight text, e.g. `(link#2 "Title")`.
|
|
28
|
+
* Mirrors the matcher in `highlights.ts` so truncation can tell which citations
|
|
29
|
+
* survive in a sliced prefix. */
|
|
30
|
+
const REFERENCE_MARKER_REGEX = /\((link|image|video)#(\d+)(?:\s+"[^"]*")?\)/g;
|
|
31
|
+
|
|
32
|
+
/** Builds the set of `type#originalIndex` keys whose complete citation marker
|
|
33
|
+
* appears in `text`, so references can be filtered to those still visible. */
|
|
34
|
+
function visibleReferenceKeys(text: string): Set<string> {
|
|
35
|
+
const keys = new Set<string>();
|
|
36
|
+
if (!text.includes('#')) {
|
|
37
|
+
return keys;
|
|
38
|
+
}
|
|
39
|
+
const regex = new RegExp(REFERENCE_MARKER_REGEX);
|
|
40
|
+
let match: RegExpExecArray | null;
|
|
41
|
+
while ((match = regex.exec(text)) !== null) {
|
|
42
|
+
keys.add(`${match[1]}#${parseInt(match[2], 10) - 1}`);
|
|
43
|
+
}
|
|
44
|
+
return keys;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/** Truncates a highlight to `maxLen` chars of (already-trimmed) text, keeping
|
|
48
|
+
* only the references whose markers survive in the kept prefix — markers in the
|
|
49
|
+
* cut tail would otherwise emit Core References for citations the model can no
|
|
50
|
+
* longer see, while a blanket drop would lose still-visible ones. */
|
|
51
|
+
function truncateHighlight(highlight: t.Highlight, text: string, maxLen: number): t.Highlight {
|
|
52
|
+
const prefix = text.slice(0, maxLen);
|
|
53
|
+
const truncated: t.Highlight = { score: highlight.score, text: `${prefix}\n…[truncated]` };
|
|
54
|
+
if (highlight.references != null && highlight.references.length > 0) {
|
|
55
|
+
const keys = visibleReferenceKeys(prefix);
|
|
56
|
+
const visible = highlight.references.filter((ref) => keys.has(`${ref.type}#${ref.originalIndex}`));
|
|
57
|
+
if (visible.length > 0) {
|
|
58
|
+
truncated.references = visible;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
return truncated;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/** Bounds the highlight chunks — the dominant, unbounded part of search output —
|
|
65
|
+
* to `maxChars`, walking sources in relevance order (organic first, then news;
|
|
66
|
+
* highlights in their reranked order). Whole highlights are kept until the
|
|
67
|
+
* budget is hit, the boundary one is truncated if meaningful room remains, and
|
|
68
|
+
* every later highlight is dropped (relevance-ordered prefix). Blank highlights
|
|
69
|
+
* are skipped (never rendered, so never charged); a truncated highlight keeps
|
|
70
|
+
* only references whose markers survive in the kept prefix. Snippets/titles/URLs
|
|
71
|
+
* are left untouched (small, high-signal) and per-source `content` stays in the
|
|
72
|
+
* `WEB_SEARCH` artifact for citations. Mutates `results` in place; returns how
|
|
73
|
+
* many highlights were dropped or truncated (0 when everything fit). */
|
|
74
|
+
function trimHighlightsToBudget(results: t.SearchResultData, maxChars: number): number {
|
|
75
|
+
let used = 0;
|
|
76
|
+
let trimmed = 0;
|
|
77
|
+
const sections: (t.ValidSource[] | undefined)[] = [results.organic, results.topStories];
|
|
78
|
+
for (const sources of sections) {
|
|
79
|
+
if (sources == null) {
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
for (const source of sources) {
|
|
83
|
+
const highlights = source.highlights;
|
|
84
|
+
if (highlights == null || highlights.length === 0) {
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
const kept: t.Highlight[] = [];
|
|
88
|
+
for (const highlight of highlights) {
|
|
89
|
+
const text = highlight.text.trim();
|
|
90
|
+
if (text.length === 0) {
|
|
91
|
+
continue;
|
|
92
|
+
}
|
|
93
|
+
if (used + text.length <= maxChars) {
|
|
94
|
+
kept.push(highlight);
|
|
95
|
+
used += text.length;
|
|
96
|
+
continue;
|
|
97
|
+
}
|
|
98
|
+
const remaining = maxChars - used;
|
|
99
|
+
if (remaining >= MIN_PARTIAL_HIGHLIGHT_CHARS) {
|
|
100
|
+
kept.push(truncateHighlight(highlight, text, remaining));
|
|
101
|
+
}
|
|
102
|
+
used = maxChars;
|
|
103
|
+
trimmed++;
|
|
104
|
+
}
|
|
105
|
+
source.highlights = kept;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
return trimmed;
|
|
109
|
+
}
|
|
110
|
+
|
|
4
111
|
function addHighlightSection(): string[] {
|
|
5
112
|
return ['\n## Highlights', ''];
|
|
6
113
|
}
|
|
@@ -112,8 +219,15 @@ function formatSource(
|
|
|
112
219
|
|
|
113
220
|
export function formatResultsForLLM(
|
|
114
221
|
turn: number,
|
|
115
|
-
results: t.SearchResultData
|
|
222
|
+
results: t.SearchResultData,
|
|
223
|
+
maxOutputChars?: number
|
|
116
224
|
): { output: string; references: t.ResultReference[] } {
|
|
225
|
+
/** Bound highlight content to the per-search budget before formatting */
|
|
226
|
+
const trimmedHighlights = trimHighlightsToBudget(
|
|
227
|
+
results,
|
|
228
|
+
resolveMaxLLMOutputChars(maxOutputChars)
|
|
229
|
+
);
|
|
230
|
+
|
|
117
231
|
/** Array to collect all output lines */
|
|
118
232
|
const outputLines: string[] = [];
|
|
119
233
|
|
|
@@ -243,8 +357,11 @@ export function formatResultsForLLM(
|
|
|
243
357
|
outputLines.push(paaLines.join(''));
|
|
244
358
|
}
|
|
245
359
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
360
|
+
let output = outputLines.join('\n').trim();
|
|
361
|
+
if (trimmedHighlights > 0) {
|
|
362
|
+
output += `\n\n_[${trimmedHighlights} additional highlight${
|
|
363
|
+
trimmedHighlights === 1 ? '' : 's'
|
|
364
|
+
} omitted to fit the context budget; the cited sources contain the full content.]_`;
|
|
365
|
+
}
|
|
366
|
+
return { output, references };
|
|
250
367
|
}
|
package/src/tools/search/tool.ts
CHANGED
|
@@ -289,10 +289,12 @@ function createOnSearchResults({
|
|
|
289
289
|
function createTool({
|
|
290
290
|
schema,
|
|
291
291
|
search,
|
|
292
|
+
maxOutputChars,
|
|
292
293
|
onSearchResults: _onSearchResults,
|
|
293
294
|
}: {
|
|
294
295
|
schema: Record<string, unknown>;
|
|
295
296
|
search: ReturnType<typeof createSearchProcessor>;
|
|
297
|
+
maxOutputChars?: number;
|
|
296
298
|
onSearchResults: t.SearchToolConfig['onSearchResults'];
|
|
297
299
|
}): DynamicStructuredTool {
|
|
298
300
|
return tool(
|
|
@@ -313,7 +315,7 @@ function createTool({
|
|
|
313
315
|
}),
|
|
314
316
|
});
|
|
315
317
|
const turn = runnableConfig.toolCall?.turn ?? 0;
|
|
316
|
-
const { output, references } = formatResultsForLLM(turn, searchResult);
|
|
318
|
+
const { output, references } = formatResultsForLLM(turn, searchResult, maxOutputChars);
|
|
317
319
|
const data: t.SearchResultData = { turn, ...searchResult, references };
|
|
318
320
|
return [output, { [Constants.WEB_SEARCH]: data }];
|
|
319
321
|
},
|
|
@@ -359,6 +361,7 @@ export const createSearchTool = (
|
|
|
359
361
|
rerankerType = 'cohere',
|
|
360
362
|
topResults = 5,
|
|
361
363
|
maxContentLength,
|
|
364
|
+
maxOutputChars,
|
|
362
365
|
strategies = ['no_extraction'],
|
|
363
366
|
filterContent = true,
|
|
364
367
|
safeSearch = 1,
|
|
@@ -483,6 +486,7 @@ export const createSearchTool = (
|
|
|
483
486
|
return createTool({
|
|
484
487
|
search,
|
|
485
488
|
schema: toolSchema,
|
|
489
|
+
maxOutputChars,
|
|
486
490
|
onSearchResults: _onSearchResults,
|
|
487
491
|
});
|
|
488
492
|
};
|
|
@@ -218,6 +218,13 @@ export interface SearchToolConfig
|
|
|
218
218
|
ProcessSourcesConfig,
|
|
219
219
|
FirecrawlConfig {
|
|
220
220
|
tavilyScraperOptions?: TavilyScraperConfig;
|
|
221
|
+
/** Max chars of highlight content this tool feeds the MODEL per search (the
|
|
222
|
+
* dominant, otherwise-unbounded part of the output). Distinct from
|
|
223
|
+
* `maxContentLength`, which caps scraped/reranked content per source — full
|
|
224
|
+
* content always remains in the `WEB_SEARCH` artifact. Defaults to 50,000;
|
|
225
|
+
* also configurable via the `SEARCH_MAX_LLM_OUTPUT_CHARS` env var. Hosts that
|
|
226
|
+
* know the context window (e.g. LibreChat) pass a window-relative value. */
|
|
227
|
+
maxOutputChars?: number;
|
|
221
228
|
logger?: Logger;
|
|
222
229
|
safeSearch?: SafeSearchLevel;
|
|
223
230
|
jinaApiKey?: string;
|