@librechat/agents 3.1.75 → 3.1.76
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/graphs/Graph.cjs +13 -3
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/langchain/google-common.cjs +3 -0
- package/dist/cjs/langchain/google-common.cjs.map +1 -0
- package/dist/cjs/langchain/index.cjs +86 -0
- package/dist/cjs/langchain/index.cjs.map +1 -0
- package/dist/cjs/langchain/language_models/chat_models.cjs +3 -0
- package/dist/cjs/langchain/language_models/chat_models.cjs.map +1 -0
- package/dist/cjs/langchain/messages/tool.cjs +3 -0
- package/dist/cjs/langchain/messages/tool.cjs.map +1 -0
- package/dist/cjs/langchain/messages.cjs +51 -0
- package/dist/cjs/langchain/messages.cjs.map +1 -0
- package/dist/cjs/langchain/openai.cjs +3 -0
- package/dist/cjs/langchain/openai.cjs.map +1 -0
- package/dist/cjs/langchain/prompts.cjs +11 -0
- package/dist/cjs/langchain/prompts.cjs.map +1 -0
- package/dist/cjs/langchain/runnables.cjs +19 -0
- package/dist/cjs/langchain/runnables.cjs.map +1 -0
- package/dist/cjs/langchain/tools.cjs +23 -0
- package/dist/cjs/langchain/tools.cjs.map +1 -0
- package/dist/cjs/langchain/utils/env.cjs +11 -0
- package/dist/cjs/langchain/utils/env.cjs.map +1 -0
- package/dist/cjs/llm/anthropic/index.cjs +145 -52
- package/dist/cjs/llm/anthropic/index.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/types.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +21 -14
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_outputs.cjs +84 -70
- package/dist/cjs/llm/anthropic/utils/message_outputs.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/index.cjs +1 -1
- package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +213 -3
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_outputs.cjs +2 -1
- package/dist/cjs/llm/bedrock/utils/message_outputs.cjs.map +1 -1
- package/dist/cjs/llm/google/utils/common.cjs +5 -4
- package/dist/cjs/llm/google/utils/common.cjs.map +1 -1
- package/dist/cjs/llm/openai/index.cjs +519 -655
- package/dist/cjs/llm/openai/index.cjs.map +1 -1
- package/dist/cjs/llm/openai/utils/index.cjs +20 -458
- package/dist/cjs/llm/openai/utils/index.cjs.map +1 -1
- package/dist/cjs/llm/openrouter/index.cjs +57 -175
- package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
- package/dist/cjs/llm/vertexai/index.cjs +5 -3
- package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
- package/dist/cjs/main.cjs +83 -3
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/cache.cjs +2 -1
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/messages/core.cjs +7 -6
- package/dist/cjs/messages/core.cjs.map +1 -1
- package/dist/cjs/messages/format.cjs +73 -15
- package/dist/cjs/messages/format.cjs.map +1 -1
- package/dist/cjs/messages/langchain.cjs +26 -0
- package/dist/cjs/messages/langchain.cjs.map +1 -0
- package/dist/cjs/messages/prune.cjs +7 -6
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +5 -1
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/search/search.cjs +55 -66
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/tavily-scraper.cjs +189 -0
- package/dist/cjs/tools/search/tavily-scraper.cjs.map +1 -0
- package/dist/cjs/tools/search/tavily-search.cjs +372 -0
- package/dist/cjs/tools/search/tavily-search.cjs.map +1 -0
- package/dist/cjs/tools/search/tool.cjs +26 -4
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/search/utils.cjs +10 -3
- package/dist/cjs/tools/search/utils.cjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +13 -3
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/langchain/google-common.mjs +2 -0
- package/dist/esm/langchain/google-common.mjs.map +1 -0
- package/dist/esm/langchain/index.mjs +5 -0
- package/dist/esm/langchain/index.mjs.map +1 -0
- package/dist/esm/langchain/language_models/chat_models.mjs +2 -0
- package/dist/esm/langchain/language_models/chat_models.mjs.map +1 -0
- package/dist/esm/langchain/messages/tool.mjs +2 -0
- package/dist/esm/langchain/messages/tool.mjs.map +1 -0
- package/dist/esm/langchain/messages.mjs +2 -0
- package/dist/esm/langchain/messages.mjs.map +1 -0
- package/dist/esm/langchain/openai.mjs +2 -0
- package/dist/esm/langchain/openai.mjs.map +1 -0
- package/dist/esm/langchain/prompts.mjs +2 -0
- package/dist/esm/langchain/prompts.mjs.map +1 -0
- package/dist/esm/langchain/runnables.mjs +2 -0
- package/dist/esm/langchain/runnables.mjs.map +1 -0
- package/dist/esm/langchain/tools.mjs +2 -0
- package/dist/esm/langchain/tools.mjs.map +1 -0
- package/dist/esm/langchain/utils/env.mjs +2 -0
- package/dist/esm/langchain/utils/env.mjs.map +1 -0
- package/dist/esm/llm/anthropic/index.mjs +146 -54
- package/dist/esm/llm/anthropic/index.mjs.map +1 -1
- package/dist/esm/llm/anthropic/types.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs +21 -14
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_outputs.mjs +84 -71
- package/dist/esm/llm/anthropic/utils/message_outputs.mjs.map +1 -1
- package/dist/esm/llm/bedrock/index.mjs +1 -1
- package/dist/esm/llm/bedrock/index.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs +214 -4
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_outputs.mjs +2 -1
- package/dist/esm/llm/bedrock/utils/message_outputs.mjs.map +1 -1
- package/dist/esm/llm/google/utils/common.mjs +5 -4
- package/dist/esm/llm/google/utils/common.mjs.map +1 -1
- package/dist/esm/llm/openai/index.mjs +520 -656
- package/dist/esm/llm/openai/index.mjs.map +1 -1
- package/dist/esm/llm/openai/utils/index.mjs +23 -459
- package/dist/esm/llm/openai/utils/index.mjs.map +1 -1
- package/dist/esm/llm/openrouter/index.mjs +57 -175
- package/dist/esm/llm/openrouter/index.mjs.map +1 -1
- package/dist/esm/llm/vertexai/index.mjs +5 -3
- package/dist/esm/llm/vertexai/index.mjs.map +1 -1
- package/dist/esm/main.mjs +4 -0
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/cache.mjs +2 -1
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/messages/core.mjs +7 -6
- package/dist/esm/messages/core.mjs.map +1 -1
- package/dist/esm/messages/format.mjs +73 -15
- package/dist/esm/messages/format.mjs.map +1 -1
- package/dist/esm/messages/langchain.mjs +23 -0
- package/dist/esm/messages/langchain.mjs.map +1 -0
- package/dist/esm/messages/prune.mjs +7 -6
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +5 -1
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/search/search.mjs +55 -66
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/tavily-scraper.mjs +186 -0
- package/dist/esm/tools/search/tavily-scraper.mjs.map +1 -0
- package/dist/esm/tools/search/tavily-search.mjs +370 -0
- package/dist/esm/tools/search/tavily-search.mjs.map +1 -0
- package/dist/esm/tools/search/tool.mjs +26 -4
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/search/utils.mjs +10 -3
- package/dist/esm/tools/search/utils.mjs.map +1 -1
- package/dist/types/index.d.ts +1 -0
- package/dist/types/langchain/google-common.d.ts +1 -0
- package/dist/types/langchain/index.d.ts +8 -0
- package/dist/types/langchain/language_models/chat_models.d.ts +1 -0
- package/dist/types/langchain/messages/tool.d.ts +1 -0
- package/dist/types/langchain/messages.d.ts +2 -0
- package/dist/types/langchain/openai.d.ts +1 -0
- package/dist/types/langchain/prompts.d.ts +1 -0
- package/dist/types/langchain/runnables.d.ts +2 -0
- package/dist/types/langchain/tools.d.ts +2 -0
- package/dist/types/langchain/utils/env.d.ts +1 -0
- package/dist/types/llm/anthropic/index.d.ts +22 -9
- package/dist/types/llm/anthropic/types.d.ts +5 -1
- package/dist/types/llm/anthropic/utils/message_outputs.d.ts +13 -6
- package/dist/types/llm/anthropic/utils/output_parsers.d.ts +1 -1
- package/dist/types/llm/openai/index.d.ts +21 -24
- package/dist/types/llm/openrouter/index.d.ts +11 -9
- package/dist/types/llm/vertexai/index.d.ts +1 -0
- package/dist/types/messages/cache.d.ts +4 -1
- package/dist/types/messages/format.d.ts +4 -1
- package/dist/types/messages/langchain.d.ts +27 -0
- package/dist/types/tools/search/tavily-scraper.d.ts +19 -0
- package/dist/types/tools/search/tavily-search.d.ts +4 -0
- package/dist/types/tools/search/types.d.ts +99 -5
- package/dist/types/tools/search/utils.d.ts +2 -2
- package/dist/types/types/graph.d.ts +23 -37
- package/dist/types/types/llm.d.ts +3 -3
- package/dist/types/types/stream.d.ts +1 -1
- package/package.json +80 -17
- package/src/graphs/Graph.ts +24 -4
- package/src/graphs/__tests__/composition.smoke.test.ts +188 -0
- package/src/index.ts +3 -0
- package/src/langchain/google-common.ts +1 -0
- package/src/langchain/index.ts +8 -0
- package/src/langchain/language_models/chat_models.ts +1 -0
- package/src/langchain/messages/tool.ts +5 -0
- package/src/langchain/messages.ts +21 -0
- package/src/langchain/openai.ts +1 -0
- package/src/langchain/prompts.ts +1 -0
- package/src/langchain/runnables.ts +7 -0
- package/src/langchain/tools.ts +8 -0
- package/src/langchain/utils/env.ts +1 -0
- package/src/llm/anthropic/index.ts +252 -84
- package/src/llm/anthropic/llm.spec.ts +751 -102
- package/src/llm/anthropic/types.ts +9 -1
- package/src/llm/anthropic/utils/message_inputs.ts +37 -19
- package/src/llm/anthropic/utils/message_outputs.ts +119 -101
- package/src/llm/bedrock/index.ts +2 -2
- package/src/llm/bedrock/llm.spec.ts +341 -0
- package/src/llm/bedrock/utils/message_inputs.ts +303 -4
- package/src/llm/bedrock/utils/message_outputs.ts +2 -1
- package/src/llm/custom-chat-models.smoke.test.ts +836 -0
- package/src/llm/google/llm.spec.ts +339 -57
- package/src/llm/google/utils/common.ts +53 -48
- package/src/llm/openai/contentBlocks.test.ts +346 -0
- package/src/llm/openai/index.ts +856 -833
- package/src/llm/openai/utils/index.ts +107 -78
- package/src/llm/openai/utils/messages.test.ts +159 -0
- package/src/llm/openrouter/index.ts +124 -247
- package/src/llm/openrouter/reasoning.test.ts +8 -1
- package/src/llm/vertexai/index.ts +11 -5
- package/src/llm/vertexai/llm.spec.ts +28 -1
- package/src/messages/cache.test.ts +4 -3
- package/src/messages/cache.ts +3 -2
- package/src/messages/core.ts +16 -9
- package/src/messages/format.ts +96 -16
- package/src/messages/formatAgentMessages.test.ts +166 -1
- package/src/messages/langchain.ts +39 -0
- package/src/messages/prune.ts +12 -8
- package/src/scripts/caching.ts +2 -3
- package/src/specs/summarization.test.ts +51 -58
- package/src/tools/ToolNode.ts +5 -1
- package/src/tools/search/search.ts +83 -73
- package/src/tools/search/tavily-scraper.ts +235 -0
- package/src/tools/search/tavily-search.ts +424 -0
- package/src/tools/search/tavily.test.ts +965 -0
- package/src/tools/search/tool.ts +36 -26
- package/src/tools/search/types.ts +134 -11
- package/src/tools/search/utils.ts +13 -5
- package/src/types/graph.ts +32 -87
- package/src/types/llm.ts +3 -3
- package/src/types/stream.ts +1 -1
- package/src/utils/llmConfig.ts +1 -6
|
@@ -138,6 +138,7 @@ async function createSummarizationRun(opts: {
|
|
|
138
138
|
tools?: t.GraphTools;
|
|
139
139
|
indexTokenCountMap?: Record<string, number>;
|
|
140
140
|
llmConfigOverride?: Record<string, unknown>;
|
|
141
|
+
maxSummaryTokens?: number;
|
|
141
142
|
}): Promise<Run<t.IState>> {
|
|
142
143
|
const llmConfig = {
|
|
143
144
|
...getLLMConfig(opts.agentProvider),
|
|
@@ -157,6 +158,7 @@ async function createSummarizationRun(opts: {
|
|
|
157
158
|
summarizationConfig: {
|
|
158
159
|
provider: opts.summarizationProvider,
|
|
159
160
|
model: opts.summarizationModel,
|
|
161
|
+
maxSummaryTokens: opts.maxSummaryTokens,
|
|
160
162
|
},
|
|
161
163
|
},
|
|
162
164
|
returnContent: true,
|
|
@@ -244,6 +246,33 @@ function buildIndexTokenCountMap(
|
|
|
244
246
|
return map;
|
|
245
247
|
}
|
|
246
248
|
|
|
249
|
+
function sumTokenCountMap(map: Record<string, number | undefined>): number {
|
|
250
|
+
let total = 0;
|
|
251
|
+
for (const key in map) {
|
|
252
|
+
total += map[key] ?? 0;
|
|
253
|
+
}
|
|
254
|
+
return total;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
function createSeededTokenAuditHistory(): BaseMessage[] {
|
|
258
|
+
const details =
|
|
259
|
+
'Token audit context preserves index token counts, summary replacement, calibration data, and post-summary continuity. ' +
|
|
260
|
+
'Important retained values: alpha=1024, beta=2048, gamma=4096, checksum TOKEN-AUDIT-7F3. ' +
|
|
261
|
+
'The repeated detail intentionally exceeds a compact context budget. ';
|
|
262
|
+
const padding = details.repeat(8);
|
|
263
|
+
|
|
264
|
+
return [
|
|
265
|
+
new HumanMessage(
|
|
266
|
+
`Audit turn 1: establish the accounting scenario. ${padding}`
|
|
267
|
+
),
|
|
268
|
+
new AIMessage(`Recorded turn 1 accounting notes. ${padding}`),
|
|
269
|
+
new HumanMessage(`Audit turn 2: add more retained details. ${padding}`),
|
|
270
|
+
new AIMessage(`Recorded turn 2 accounting notes. ${padding}`),
|
|
271
|
+
new HumanMessage(`Audit turn 3: preserve final identifiers. ${padding}`),
|
|
272
|
+
new AIMessage(`Recorded turn 3 accounting notes. ${padding}`),
|
|
273
|
+
];
|
|
274
|
+
}
|
|
275
|
+
|
|
247
276
|
function logTurn(
|
|
248
277
|
label: string,
|
|
249
278
|
conversationHistory: BaseMessage[],
|
|
@@ -2417,10 +2446,10 @@ const hasAnyApiKey =
|
|
|
2417
2446
|
test('token count map is accurate after summarization cycle', async () => {
|
|
2418
2447
|
const spies = createSpies();
|
|
2419
2448
|
let collectedUsage: UsageMetadata[] = [];
|
|
2420
|
-
const conversationHistory
|
|
2449
|
+
const conversationHistory = createSeededTokenAuditHistory();
|
|
2421
2450
|
const tokenCounter = await createTokenCounter();
|
|
2422
2451
|
|
|
2423
|
-
const createRun = async (maxTokens =
|
|
2452
|
+
const createRun = async (maxTokens = 1200): Promise<Run<t.IState>> => {
|
|
2424
2453
|
collectedUsage = [];
|
|
2425
2454
|
const { aggregateContent } = createContentAggregator();
|
|
2426
2455
|
const indexTokenCountMap = buildIndexTokenCountMap(
|
|
@@ -2432,80 +2461,44 @@ const hasAnyApiKey =
|
|
|
2432
2461
|
summarizationProvider,
|
|
2433
2462
|
summarizationModel,
|
|
2434
2463
|
maxContextTokens: maxTokens,
|
|
2435
|
-
instructions:
|
|
2464
|
+
instructions:
|
|
2465
|
+
'You are a concise assistant. Preserve checkpoint context and answer in one short sentence.',
|
|
2436
2466
|
collectedUsage,
|
|
2437
2467
|
aggregateContent,
|
|
2438
2468
|
spies,
|
|
2439
2469
|
tokenCounter,
|
|
2440
2470
|
indexTokenCountMap,
|
|
2471
|
+
maxSummaryTokens: 300,
|
|
2472
|
+
tools: [],
|
|
2473
|
+
llmConfigOverride: {
|
|
2474
|
+
maxTokens: 128,
|
|
2475
|
+
},
|
|
2441
2476
|
});
|
|
2442
2477
|
};
|
|
2443
2478
|
|
|
2444
|
-
|
|
2445
|
-
|
|
2446
|
-
|
|
2447
|
-
{ run, conversationHistory },
|
|
2448
|
-
'What is 42 * 58? Calculator.',
|
|
2449
|
-
streamConfig
|
|
2450
|
-
);
|
|
2451
|
-
|
|
2452
|
-
run = await createRun();
|
|
2453
|
-
await runTurn(
|
|
2454
|
-
{ run, conversationHistory },
|
|
2455
|
-
'Now compute 2436 + 1000. Calculator.',
|
|
2456
|
-
streamConfig
|
|
2457
|
-
);
|
|
2458
|
-
|
|
2459
|
-
run = await createRun();
|
|
2460
|
-
await runTurn(
|
|
2461
|
-
{ run, conversationHistory },
|
|
2462
|
-
'What is 3436 / 4? Calculator.',
|
|
2463
|
-
streamConfig
|
|
2464
|
-
);
|
|
2465
|
-
|
|
2466
|
-
run = await createRun();
|
|
2467
|
-
await runTurn(
|
|
2468
|
-
{ run, conversationHistory },
|
|
2469
|
-
'Compute 999 * 2. Calculator.',
|
|
2470
|
-
streamConfig
|
|
2471
|
-
);
|
|
2472
|
-
|
|
2473
|
-
run = await createRun();
|
|
2474
|
-
await runTurn(
|
|
2475
|
-
{ run, conversationHistory },
|
|
2476
|
-
'What is 2^10? Calculator. Also list everything.',
|
|
2477
|
-
streamConfig
|
|
2479
|
+
const originalMap = buildIndexTokenCountMap(
|
|
2480
|
+
conversationHistory,
|
|
2481
|
+
tokenCounter
|
|
2478
2482
|
);
|
|
2483
|
+
const originalTokenTotal = sumTokenCountMap(originalMap);
|
|
2484
|
+
expect(originalTokenTotal).toBeGreaterThan(1200);
|
|
2479
2485
|
|
|
2480
|
-
run = await createRun();
|
|
2486
|
+
const run = await createRun();
|
|
2481
2487
|
await runTurn(
|
|
2482
2488
|
{ run, conversationHistory },
|
|
2483
|
-
'
|
|
2489
|
+
'Acknowledge the preserved token audit context in one short sentence.',
|
|
2484
2490
|
streamConfig
|
|
2485
2491
|
);
|
|
2486
2492
|
|
|
2487
|
-
// Squeeze progressively to force summarization
|
|
2488
|
-
for (const squeeze of [3500, 3200, 3100, 3000, 2800, 2500, 2000]) {
|
|
2489
|
-
if (spies.onSummarizeStartSpy.mock.calls.length > 0) {
|
|
2490
|
-
break;
|
|
2491
|
-
}
|
|
2492
|
-
run = await createRun(squeeze);
|
|
2493
|
-
await runTurn(
|
|
2494
|
-
{ run, conversationHistory },
|
|
2495
|
-
`What is ${squeeze} - 1000? Calculator.`,
|
|
2496
|
-
streamConfig
|
|
2497
|
-
);
|
|
2498
|
-
}
|
|
2499
|
-
|
|
2500
|
-
// Verify summarization fired
|
|
2501
2493
|
expect(spies.onSummarizeCompleteSpy).toHaveBeenCalled();
|
|
2502
2494
|
|
|
2503
2495
|
const completePayload = spies.onSummarizeCompleteSpy.mock
|
|
2504
2496
|
.calls[0][0] as t.SummarizeCompleteEvent;
|
|
2505
|
-
|
|
2506
|
-
expect(
|
|
2497
|
+
const summaryTokenCount = completePayload.summary!.tokenCount ?? 0;
|
|
2498
|
+
expect(summaryTokenCount).toBeGreaterThan(10);
|
|
2499
|
+
expect(summaryTokenCount).toBeLessThan(1500);
|
|
2500
|
+
expect(summaryTokenCount).toBeLessThan(originalTokenTotal);
|
|
2507
2501
|
|
|
2508
|
-
// Token accounting: collectedUsage should have valid entries
|
|
2509
2502
|
const validUsage = collectedUsage.filter(
|
|
2510
2503
|
(u: Partial<UsageMetadata>) =>
|
|
2511
2504
|
u.input_tokens != null && u.input_tokens > 0
|
|
@@ -2513,8 +2506,8 @@ const hasAnyApiKey =
|
|
|
2513
2506
|
expect(validUsage.length).toBeGreaterThan(0);
|
|
2514
2507
|
|
|
2515
2508
|
console.log(
|
|
2516
|
-
` Token audit: summary=${
|
|
2517
|
-
`usageEntries=${validUsage.length}`
|
|
2509
|
+
` Token audit: summary=${summaryTokenCount} tokens, ` +
|
|
2510
|
+
`preTotal=${originalTokenTotal}, usageEntries=${validUsage.length}`
|
|
2518
2511
|
);
|
|
2519
2512
|
}, 180_000);
|
|
2520
2513
|
|
package/src/tools/ToolNode.ts
CHANGED
|
@@ -33,6 +33,7 @@ import {
|
|
|
33
33
|
} from '@/utils/truncation';
|
|
34
34
|
import { safeDispatchCustomEvent } from '@/utils/events';
|
|
35
35
|
import { executeHooks } from '@/hooks';
|
|
36
|
+
import { toLangChainContent } from '@/messages/langchain';
|
|
36
37
|
import { Constants, GraphEvents, CODE_EXECUTION_TOOLS } from '@/common';
|
|
37
38
|
import {
|
|
38
39
|
buildReferenceKey,
|
|
@@ -1282,7 +1283,10 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
|
|
|
1282
1283
|
if (msg.skillName != null) additional_kwargs.skillName = msg.skillName;
|
|
1283
1284
|
|
|
1284
1285
|
converted.push(
|
|
1285
|
-
new HumanMessage({
|
|
1286
|
+
new HumanMessage({
|
|
1287
|
+
content: toLangChainContent(msg.content),
|
|
1288
|
+
additional_kwargs,
|
|
1289
|
+
})
|
|
1286
1290
|
);
|
|
1287
1291
|
}
|
|
1288
1292
|
return converted;
|
|
@@ -2,6 +2,7 @@ import axios from 'axios';
|
|
|
2
2
|
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
|
|
3
3
|
import type * as t from './types';
|
|
4
4
|
import { getAttribution, createDefaultLogger } from './utils';
|
|
5
|
+
import { createTavilyAPI } from './tavily-search';
|
|
5
6
|
import { BaseReranker } from './rerankers';
|
|
6
7
|
|
|
7
8
|
const chunker = {
|
|
@@ -418,15 +419,20 @@ export const createSearchAPI = (
|
|
|
418
419
|
serperApiKey,
|
|
419
420
|
searxngInstanceUrl,
|
|
420
421
|
searxngApiKey,
|
|
422
|
+
tavilyApiKey,
|
|
423
|
+
tavilySearchUrl,
|
|
424
|
+
tavilySearchOptions,
|
|
421
425
|
} = config;
|
|
422
426
|
|
|
423
427
|
if (searchProvider.toLowerCase() === 'serper') {
|
|
424
428
|
return createSerperAPI(serperApiKey);
|
|
425
429
|
} else if (searchProvider.toLowerCase() === 'searxng') {
|
|
426
430
|
return createSearXNGAPI(searxngInstanceUrl, searxngApiKey);
|
|
431
|
+
} else if (searchProvider.toLowerCase() === 'tavily') {
|
|
432
|
+
return createTavilyAPI(tavilyApiKey, tavilySearchUrl, tavilySearchOptions);
|
|
427
433
|
} else {
|
|
428
434
|
throw new Error(
|
|
429
|
-
`Invalid search provider: ${searchProvider}. Must be 'serper' or '
|
|
435
|
+
`Invalid search provider: ${searchProvider}. Must be 'serper', 'searxng', or 'tavily'`
|
|
430
436
|
);
|
|
431
437
|
}
|
|
432
438
|
};
|
|
@@ -454,6 +460,56 @@ export const createSourceProcessor = (
|
|
|
454
460
|
const logger_ = logger || createDefaultLogger();
|
|
455
461
|
const scraper = scraperInstance;
|
|
456
462
|
|
|
463
|
+
const processResponse = (
|
|
464
|
+
url: string,
|
|
465
|
+
response: t.AnyScraperResponse
|
|
466
|
+
): t.ScrapeResult => {
|
|
467
|
+
const rawMetadata = scraper.extractMetadata(response);
|
|
468
|
+
const metadata =
|
|
469
|
+
Object.keys(rawMetadata).length > 0 ? rawMetadata : undefined;
|
|
470
|
+
const attribution = getAttribution(url, metadata, logger_);
|
|
471
|
+
|
|
472
|
+
if (response.success && response.data) {
|
|
473
|
+
const [content, references] = scraper.extractContent(response);
|
|
474
|
+
return {
|
|
475
|
+
url,
|
|
476
|
+
references,
|
|
477
|
+
attribution,
|
|
478
|
+
content: chunker.cleanText(content),
|
|
479
|
+
};
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
logger_.error(
|
|
483
|
+
`Error scraping ${url}: ${response.error ?? 'Unknown error'}`
|
|
484
|
+
);
|
|
485
|
+
return { url, attribution, error: true, content: '' };
|
|
486
|
+
};
|
|
487
|
+
|
|
488
|
+
const addHighlights = async (
|
|
489
|
+
result: t.ScrapeResult,
|
|
490
|
+
query: string,
|
|
491
|
+
onGetHighlights: t.SearchToolConfig['onGetHighlights']
|
|
492
|
+
): Promise<t.ScrapeResult> => {
|
|
493
|
+
if (result.error != null) {
|
|
494
|
+
return result;
|
|
495
|
+
}
|
|
496
|
+
try {
|
|
497
|
+
const highlights = await getHighlights({
|
|
498
|
+
query,
|
|
499
|
+
reranker,
|
|
500
|
+
content: result.content,
|
|
501
|
+
logger: logger_,
|
|
502
|
+
});
|
|
503
|
+
if (onGetHighlights) {
|
|
504
|
+
onGetHighlights(result.url);
|
|
505
|
+
}
|
|
506
|
+
return { ...result, highlights };
|
|
507
|
+
} catch (error) {
|
|
508
|
+
logger_.error('Error processing scraped content:', error);
|
|
509
|
+
return result;
|
|
510
|
+
}
|
|
511
|
+
};
|
|
512
|
+
|
|
457
513
|
const webScraper = {
|
|
458
514
|
scrapeMany: async ({
|
|
459
515
|
query,
|
|
@@ -465,80 +521,34 @@ export const createSourceProcessor = (
|
|
|
465
521
|
onGetHighlights: t.SearchToolConfig['onGetHighlights'];
|
|
466
522
|
}): Promise<Array<t.ScrapeResult>> => {
|
|
467
523
|
logger_.debug(`Scraping ${links.length} links`);
|
|
468
|
-
const promises: Array<Promise<t.ScrapeResult>> = [];
|
|
469
524
|
try {
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
attribution,
|
|
486
|
-
content: chunker.cleanText(content),
|
|
487
|
-
} as t.ScrapeResult;
|
|
488
|
-
} else {
|
|
489
|
-
logger_.error(
|
|
490
|
-
`Error scraping ${url}: ${response.error ?? 'Unknown error'}`
|
|
491
|
-
);
|
|
492
|
-
}
|
|
493
|
-
|
|
494
|
-
return {
|
|
495
|
-
url,
|
|
496
|
-
attribution,
|
|
497
|
-
error: true,
|
|
498
|
-
content: '',
|
|
499
|
-
} as t.ScrapeResult;
|
|
500
|
-
})
|
|
501
|
-
.then(async (result) => {
|
|
502
|
-
try {
|
|
503
|
-
if (result.error != null) {
|
|
504
|
-
logger_.error(
|
|
505
|
-
`Error scraping ${result.url}: ${result.content}`
|
|
506
|
-
);
|
|
507
|
-
return {
|
|
508
|
-
...result,
|
|
509
|
-
};
|
|
510
|
-
}
|
|
511
|
-
const highlights = await getHighlights({
|
|
512
|
-
query,
|
|
513
|
-
reranker,
|
|
514
|
-
content: result.content,
|
|
515
|
-
logger: logger_,
|
|
516
|
-
});
|
|
517
|
-
if (onGetHighlights) {
|
|
518
|
-
onGetHighlights(result.url);
|
|
519
|
-
}
|
|
520
|
-
return {
|
|
521
|
-
...result,
|
|
522
|
-
highlights,
|
|
523
|
-
};
|
|
524
|
-
} catch (error) {
|
|
525
|
-
logger_.error('Error processing scraped content:', error);
|
|
526
|
-
return {
|
|
527
|
-
...result,
|
|
528
|
-
};
|
|
529
|
-
}
|
|
530
|
-
})
|
|
531
|
-
.catch((error) => {
|
|
532
|
-
logger_.error(`Error scraping ${currentLink}:`, error);
|
|
533
|
-
return {
|
|
534
|
-
url: currentLink,
|
|
535
|
-
error: true,
|
|
536
|
-
content: '',
|
|
537
|
-
};
|
|
538
|
-
});
|
|
539
|
-
promises.push(promise);
|
|
525
|
+
let responses: Array<[string, t.AnyScraperResponse]>;
|
|
526
|
+
|
|
527
|
+
if (scraper.scrapeUrls) {
|
|
528
|
+
responses = await scraper.scrapeUrls(links);
|
|
529
|
+
} else {
|
|
530
|
+
responses = await Promise.all(
|
|
531
|
+
links.map((link) =>
|
|
532
|
+
scraper
|
|
533
|
+
.scrapeUrl(link, {})
|
|
534
|
+
.catch((error): [string, t.AnyScraperResponse] => {
|
|
535
|
+
logger_.error(`Error scraping ${link}:`, error);
|
|
536
|
+
return [link, { success: false, error: String(error) }];
|
|
537
|
+
})
|
|
538
|
+
)
|
|
539
|
+
);
|
|
540
540
|
}
|
|
541
|
-
|
|
541
|
+
|
|
542
|
+
const withHighlights = await Promise.all(
|
|
543
|
+
responses.map(([url, response]) =>
|
|
544
|
+
addHighlights(
|
|
545
|
+
processResponse(url, response),
|
|
546
|
+
query,
|
|
547
|
+
onGetHighlights
|
|
548
|
+
)
|
|
549
|
+
)
|
|
550
|
+
);
|
|
551
|
+
return withHighlights;
|
|
542
552
|
} catch (error) {
|
|
543
553
|
logger_.error('Error in scrapeMany:', error);
|
|
544
554
|
return [];
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import type * as t from './types';
|
|
3
|
+
import { createDefaultLogger } from './utils';
|
|
4
|
+
|
|
5
|
+
const DEFAULT_BASIC_TIMEOUT = 15000;
|
|
6
|
+
const DEFAULT_ADVANCED_TIMEOUT = 30000;
|
|
7
|
+
const MAX_BATCH_SIZE = 20;
|
|
8
|
+
|
|
9
|
+
const getDefaultTimeout = (extractDepth: 'basic' | 'advanced'): number =>
|
|
10
|
+
extractDepth === 'advanced'
|
|
11
|
+
? DEFAULT_ADVANCED_TIMEOUT
|
|
12
|
+
: DEFAULT_BASIC_TIMEOUT;
|
|
13
|
+
|
|
14
|
+
const normalizeUrlKey = (url: string): string => {
|
|
15
|
+
try {
|
|
16
|
+
const parsedUrl = new URL(url);
|
|
17
|
+
parsedUrl.hash = '';
|
|
18
|
+
if (parsedUrl.pathname.length > 1) {
|
|
19
|
+
parsedUrl.pathname = parsedUrl.pathname.replace(/\/+$/, '');
|
|
20
|
+
}
|
|
21
|
+
return parsedUrl.toString();
|
|
22
|
+
} catch {
|
|
23
|
+
return url;
|
|
24
|
+
}
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
const setUrlResult = (
|
|
28
|
+
map: Map<string, t.TavilyExtractResult>,
|
|
29
|
+
result: t.TavilyExtractResult
|
|
30
|
+
): void => {
|
|
31
|
+
map.set(result.url, result);
|
|
32
|
+
const normalizedUrl = normalizeUrlKey(result.url);
|
|
33
|
+
if (!map.has(normalizedUrl)) {
|
|
34
|
+
map.set(normalizedUrl, result);
|
|
35
|
+
}
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
export class TavilyScraper implements t.BaseScraper {
|
|
39
|
+
private apiKey: string;
|
|
40
|
+
private apiUrl: string;
|
|
41
|
+
private timeout: number;
|
|
42
|
+
private payloadTimeout: number | undefined;
|
|
43
|
+
private logger: t.Logger;
|
|
44
|
+
private extractDepth: 'basic' | 'advanced';
|
|
45
|
+
private includeImages: boolean;
|
|
46
|
+
private includeFavicon: boolean;
|
|
47
|
+
private format: 'markdown' | 'text' | undefined;
|
|
48
|
+
|
|
49
|
+
constructor(config: t.TavilyScraperConfig = {}) {
|
|
50
|
+
this.apiKey = config.apiKey ?? process.env.TAVILY_API_KEY ?? '';
|
|
51
|
+
this.apiUrl =
|
|
52
|
+
config.apiUrl ??
|
|
53
|
+
process.env.TAVILY_EXTRACT_URL ??
|
|
54
|
+
'https://api.tavily.com/extract';
|
|
55
|
+
this.payloadTimeout = config.timeout;
|
|
56
|
+
this.extractDepth = config.extractDepth ?? 'basic';
|
|
57
|
+
this.timeout = config.timeout ?? getDefaultTimeout(this.extractDepth);
|
|
58
|
+
this.includeImages = config.includeImages ?? false;
|
|
59
|
+
this.includeFavicon = config.includeFavicon ?? false;
|
|
60
|
+
this.format = config.format;
|
|
61
|
+
this.logger = config.logger || createDefaultLogger();
|
|
62
|
+
|
|
63
|
+
if (!this.apiKey) {
|
|
64
|
+
this.logger.warn('TAVILY_API_KEY is not set. Scraping will not work.');
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
async scrapeUrl(
|
|
69
|
+
url: string,
|
|
70
|
+
options: t.TavilyScrapeOptions = {}
|
|
71
|
+
): Promise<[string, t.TavilyScrapeResponse]> {
|
|
72
|
+
const results = await this.scrapeUrls([url], options);
|
|
73
|
+
return results[0];
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
async scrapeUrls(
|
|
77
|
+
urls: string[],
|
|
78
|
+
options: t.TavilyScrapeOptions = {}
|
|
79
|
+
): Promise<Array<[string, t.TavilyScrapeResponse]>> {
|
|
80
|
+
if (!this.apiKey) {
|
|
81
|
+
return urls.map((url) => [
|
|
82
|
+
url,
|
|
83
|
+
{ success: false, error: 'TAVILY_API_KEY is not set' },
|
|
84
|
+
]);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const batches: string[][] = [];
|
|
88
|
+
for (let i = 0; i < urls.length; i += MAX_BATCH_SIZE) {
|
|
89
|
+
batches.push(urls.slice(i, i + MAX_BATCH_SIZE));
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const allResults: Array<[string, t.TavilyScrapeResponse]> = [];
|
|
93
|
+
|
|
94
|
+
for (const batch of batches) {
|
|
95
|
+
const batchResults = await this.extractBatch(batch, options);
|
|
96
|
+
allResults.push(...batchResults);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
return allResults;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
private async extractBatch(
|
|
103
|
+
urls: string[],
|
|
104
|
+
options: t.TavilyScrapeOptions = {}
|
|
105
|
+
): Promise<Array<[string, t.TavilyScrapeResponse]>> {
|
|
106
|
+
try {
|
|
107
|
+
const includeFavicon = options.includeFavicon ?? this.includeFavicon;
|
|
108
|
+
const format = options.format ?? this.format;
|
|
109
|
+
const extractDepth = options.extractDepth ?? this.extractDepth;
|
|
110
|
+
const payload: t.TavilyExtractPayload = {
|
|
111
|
+
urls,
|
|
112
|
+
extract_depth: extractDepth,
|
|
113
|
+
include_images: options.includeImages ?? this.includeImages,
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
if (includeFavicon) {
|
|
117
|
+
payload.include_favicon = true;
|
|
118
|
+
}
|
|
119
|
+
if (format != null) {
|
|
120
|
+
payload.format = format;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
const effectiveTimeout =
|
|
124
|
+
options.timeout ??
|
|
125
|
+
this.payloadTimeout ??
|
|
126
|
+
(options.extractDepth != null
|
|
127
|
+
? getDefaultTimeout(extractDepth)
|
|
128
|
+
: this.timeout);
|
|
129
|
+
const payloadTimeout = options.timeout ?? this.payloadTimeout;
|
|
130
|
+
if (payloadTimeout != null) {
|
|
131
|
+
payload.timeout = Math.min(Math.max(payloadTimeout / 1000, 1), 60);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const response = await axios.post<{
|
|
135
|
+
results?: t.TavilyExtractResult[];
|
|
136
|
+
failed_results?: t.TavilyExtractResult[];
|
|
137
|
+
}>(this.apiUrl, payload, {
|
|
138
|
+
headers: {
|
|
139
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
140
|
+
'Content-Type': 'application/json',
|
|
141
|
+
},
|
|
142
|
+
timeout: effectiveTimeout,
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
const data = response.data;
|
|
146
|
+
const successMap = new Map<string, t.TavilyExtractResult>();
|
|
147
|
+
const failedMap = new Map<string, t.TavilyExtractResult>();
|
|
148
|
+
|
|
149
|
+
for (const result of data.results ?? []) {
|
|
150
|
+
setUrlResult(successMap, result);
|
|
151
|
+
}
|
|
152
|
+
for (const result of data.failed_results ?? []) {
|
|
153
|
+
setUrlResult(failedMap, result);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
return urls.map((url): [string, t.TavilyScrapeResponse] => {
|
|
157
|
+
const success =
|
|
158
|
+
successMap.get(url) ?? successMap.get(normalizeUrlKey(url));
|
|
159
|
+
if (success && success.error == null) {
|
|
160
|
+
return [
|
|
161
|
+
url,
|
|
162
|
+
{
|
|
163
|
+
success: true,
|
|
164
|
+
data: {
|
|
165
|
+
rawContent: success.raw_content ?? '',
|
|
166
|
+
images: success.images ?? [],
|
|
167
|
+
favicon: success.favicon,
|
|
168
|
+
},
|
|
169
|
+
},
|
|
170
|
+
];
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
const failed =
|
|
174
|
+
failedMap.get(url) ?? failedMap.get(normalizeUrlKey(url));
|
|
175
|
+
const error =
|
|
176
|
+
success?.error ??
|
|
177
|
+
failed?.error ??
|
|
178
|
+
'URL not found in Tavily Extract response';
|
|
179
|
+
return [url, { success: false, error }];
|
|
180
|
+
});
|
|
181
|
+
} catch (error) {
|
|
182
|
+
const errorMessage =
|
|
183
|
+
error instanceof Error ? error.message : String(error);
|
|
184
|
+
return urls.map((url) => [
|
|
185
|
+
url,
|
|
186
|
+
{
|
|
187
|
+
success: false,
|
|
188
|
+
error: `Tavily Extract API request failed: ${errorMessage}`,
|
|
189
|
+
},
|
|
190
|
+
]);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
extractContent(
|
|
195
|
+
response: t.TavilyScrapeResponse
|
|
196
|
+
): [string, undefined | t.References] {
|
|
197
|
+
if (!response.success || !response.data) {
|
|
198
|
+
return ['', undefined];
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
const content = response.data.rawContent ?? '';
|
|
202
|
+
const images = response.data.images ?? [];
|
|
203
|
+
|
|
204
|
+
const references: t.References | undefined =
|
|
205
|
+
images.length > 0
|
|
206
|
+
? {
|
|
207
|
+
links: [],
|
|
208
|
+
images: images.map((imageUrl) => ({ originalUrl: imageUrl })),
|
|
209
|
+
videos: [],
|
|
210
|
+
}
|
|
211
|
+
: undefined;
|
|
212
|
+
|
|
213
|
+
return [content, references];
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
extractMetadata(response: t.TavilyScrapeResponse): t.GenericScrapeMetadata {
|
|
217
|
+
if (!response.success || !response.data) {
|
|
218
|
+
return {};
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
const metadata: t.GenericScrapeMetadata = {
|
|
222
|
+
images_count: response.data.images?.length ?? 0,
|
|
223
|
+
};
|
|
224
|
+
if (response.data.favicon != null) {
|
|
225
|
+
metadata.favicon = response.data.favicon;
|
|
226
|
+
}
|
|
227
|
+
return metadata;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
export const createTavilyScraper = (
|
|
232
|
+
config: t.TavilyScraperConfig = {}
|
|
233
|
+
): TavilyScraper => {
|
|
234
|
+
return new TavilyScraper(config);
|
|
235
|
+
};
|