@librechat/agents 3.1.75 → 3.1.76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (221) hide show
  1. package/dist/cjs/graphs/Graph.cjs +13 -3
  2. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  3. package/dist/cjs/langchain/google-common.cjs +3 -0
  4. package/dist/cjs/langchain/google-common.cjs.map +1 -0
  5. package/dist/cjs/langchain/index.cjs +86 -0
  6. package/dist/cjs/langchain/index.cjs.map +1 -0
  7. package/dist/cjs/langchain/language_models/chat_models.cjs +3 -0
  8. package/dist/cjs/langchain/language_models/chat_models.cjs.map +1 -0
  9. package/dist/cjs/langchain/messages/tool.cjs +3 -0
  10. package/dist/cjs/langchain/messages/tool.cjs.map +1 -0
  11. package/dist/cjs/langchain/messages.cjs +51 -0
  12. package/dist/cjs/langchain/messages.cjs.map +1 -0
  13. package/dist/cjs/langchain/openai.cjs +3 -0
  14. package/dist/cjs/langchain/openai.cjs.map +1 -0
  15. package/dist/cjs/langchain/prompts.cjs +11 -0
  16. package/dist/cjs/langchain/prompts.cjs.map +1 -0
  17. package/dist/cjs/langchain/runnables.cjs +19 -0
  18. package/dist/cjs/langchain/runnables.cjs.map +1 -0
  19. package/dist/cjs/langchain/tools.cjs +23 -0
  20. package/dist/cjs/langchain/tools.cjs.map +1 -0
  21. package/dist/cjs/langchain/utils/env.cjs +11 -0
  22. package/dist/cjs/langchain/utils/env.cjs.map +1 -0
  23. package/dist/cjs/llm/anthropic/index.cjs +145 -52
  24. package/dist/cjs/llm/anthropic/index.cjs.map +1 -1
  25. package/dist/cjs/llm/anthropic/types.cjs.map +1 -1
  26. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +21 -14
  27. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  28. package/dist/cjs/llm/anthropic/utils/message_outputs.cjs +84 -70
  29. package/dist/cjs/llm/anthropic/utils/message_outputs.cjs.map +1 -1
  30. package/dist/cjs/llm/bedrock/index.cjs +1 -1
  31. package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
  32. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +213 -3
  33. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  34. package/dist/cjs/llm/bedrock/utils/message_outputs.cjs +2 -1
  35. package/dist/cjs/llm/bedrock/utils/message_outputs.cjs.map +1 -1
  36. package/dist/cjs/llm/google/utils/common.cjs +5 -4
  37. package/dist/cjs/llm/google/utils/common.cjs.map +1 -1
  38. package/dist/cjs/llm/openai/index.cjs +519 -655
  39. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  40. package/dist/cjs/llm/openai/utils/index.cjs +20 -458
  41. package/dist/cjs/llm/openai/utils/index.cjs.map +1 -1
  42. package/dist/cjs/llm/openrouter/index.cjs +57 -175
  43. package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
  44. package/dist/cjs/llm/vertexai/index.cjs +5 -3
  45. package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
  46. package/dist/cjs/main.cjs +83 -3
  47. package/dist/cjs/main.cjs.map +1 -1
  48. package/dist/cjs/messages/cache.cjs +2 -1
  49. package/dist/cjs/messages/cache.cjs.map +1 -1
  50. package/dist/cjs/messages/core.cjs +7 -6
  51. package/dist/cjs/messages/core.cjs.map +1 -1
  52. package/dist/cjs/messages/format.cjs +73 -15
  53. package/dist/cjs/messages/format.cjs.map +1 -1
  54. package/dist/cjs/messages/langchain.cjs +26 -0
  55. package/dist/cjs/messages/langchain.cjs.map +1 -0
  56. package/dist/cjs/messages/prune.cjs +7 -6
  57. package/dist/cjs/messages/prune.cjs.map +1 -1
  58. package/dist/cjs/tools/ToolNode.cjs +5 -1
  59. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  60. package/dist/cjs/tools/search/search.cjs +55 -66
  61. package/dist/cjs/tools/search/search.cjs.map +1 -1
  62. package/dist/cjs/tools/search/tavily-scraper.cjs +189 -0
  63. package/dist/cjs/tools/search/tavily-scraper.cjs.map +1 -0
  64. package/dist/cjs/tools/search/tavily-search.cjs +372 -0
  65. package/dist/cjs/tools/search/tavily-search.cjs.map +1 -0
  66. package/dist/cjs/tools/search/tool.cjs +26 -4
  67. package/dist/cjs/tools/search/tool.cjs.map +1 -1
  68. package/dist/cjs/tools/search/utils.cjs +10 -3
  69. package/dist/cjs/tools/search/utils.cjs.map +1 -1
  70. package/dist/esm/graphs/Graph.mjs +13 -3
  71. package/dist/esm/graphs/Graph.mjs.map +1 -1
  72. package/dist/esm/langchain/google-common.mjs +2 -0
  73. package/dist/esm/langchain/google-common.mjs.map +1 -0
  74. package/dist/esm/langchain/index.mjs +5 -0
  75. package/dist/esm/langchain/index.mjs.map +1 -0
  76. package/dist/esm/langchain/language_models/chat_models.mjs +2 -0
  77. package/dist/esm/langchain/language_models/chat_models.mjs.map +1 -0
  78. package/dist/esm/langchain/messages/tool.mjs +2 -0
  79. package/dist/esm/langchain/messages/tool.mjs.map +1 -0
  80. package/dist/esm/langchain/messages.mjs +2 -0
  81. package/dist/esm/langchain/messages.mjs.map +1 -0
  82. package/dist/esm/langchain/openai.mjs +2 -0
  83. package/dist/esm/langchain/openai.mjs.map +1 -0
  84. package/dist/esm/langchain/prompts.mjs +2 -0
  85. package/dist/esm/langchain/prompts.mjs.map +1 -0
  86. package/dist/esm/langchain/runnables.mjs +2 -0
  87. package/dist/esm/langchain/runnables.mjs.map +1 -0
  88. package/dist/esm/langchain/tools.mjs +2 -0
  89. package/dist/esm/langchain/tools.mjs.map +1 -0
  90. package/dist/esm/langchain/utils/env.mjs +2 -0
  91. package/dist/esm/langchain/utils/env.mjs.map +1 -0
  92. package/dist/esm/llm/anthropic/index.mjs +146 -54
  93. package/dist/esm/llm/anthropic/index.mjs.map +1 -1
  94. package/dist/esm/llm/anthropic/types.mjs.map +1 -1
  95. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +21 -14
  96. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  97. package/dist/esm/llm/anthropic/utils/message_outputs.mjs +84 -71
  98. package/dist/esm/llm/anthropic/utils/message_outputs.mjs.map +1 -1
  99. package/dist/esm/llm/bedrock/index.mjs +1 -1
  100. package/dist/esm/llm/bedrock/index.mjs.map +1 -1
  101. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +214 -4
  102. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  103. package/dist/esm/llm/bedrock/utils/message_outputs.mjs +2 -1
  104. package/dist/esm/llm/bedrock/utils/message_outputs.mjs.map +1 -1
  105. package/dist/esm/llm/google/utils/common.mjs +5 -4
  106. package/dist/esm/llm/google/utils/common.mjs.map +1 -1
  107. package/dist/esm/llm/openai/index.mjs +520 -656
  108. package/dist/esm/llm/openai/index.mjs.map +1 -1
  109. package/dist/esm/llm/openai/utils/index.mjs +23 -459
  110. package/dist/esm/llm/openai/utils/index.mjs.map +1 -1
  111. package/dist/esm/llm/openrouter/index.mjs +57 -175
  112. package/dist/esm/llm/openrouter/index.mjs.map +1 -1
  113. package/dist/esm/llm/vertexai/index.mjs +5 -3
  114. package/dist/esm/llm/vertexai/index.mjs.map +1 -1
  115. package/dist/esm/main.mjs +4 -0
  116. package/dist/esm/main.mjs.map +1 -1
  117. package/dist/esm/messages/cache.mjs +2 -1
  118. package/dist/esm/messages/cache.mjs.map +1 -1
  119. package/dist/esm/messages/core.mjs +7 -6
  120. package/dist/esm/messages/core.mjs.map +1 -1
  121. package/dist/esm/messages/format.mjs +73 -15
  122. package/dist/esm/messages/format.mjs.map +1 -1
  123. package/dist/esm/messages/langchain.mjs +23 -0
  124. package/dist/esm/messages/langchain.mjs.map +1 -0
  125. package/dist/esm/messages/prune.mjs +7 -6
  126. package/dist/esm/messages/prune.mjs.map +1 -1
  127. package/dist/esm/tools/ToolNode.mjs +5 -1
  128. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  129. package/dist/esm/tools/search/search.mjs +55 -66
  130. package/dist/esm/tools/search/search.mjs.map +1 -1
  131. package/dist/esm/tools/search/tavily-scraper.mjs +186 -0
  132. package/dist/esm/tools/search/tavily-scraper.mjs.map +1 -0
  133. package/dist/esm/tools/search/tavily-search.mjs +370 -0
  134. package/dist/esm/tools/search/tavily-search.mjs.map +1 -0
  135. package/dist/esm/tools/search/tool.mjs +26 -4
  136. package/dist/esm/tools/search/tool.mjs.map +1 -1
  137. package/dist/esm/tools/search/utils.mjs +10 -3
  138. package/dist/esm/tools/search/utils.mjs.map +1 -1
  139. package/dist/types/index.d.ts +1 -0
  140. package/dist/types/langchain/google-common.d.ts +1 -0
  141. package/dist/types/langchain/index.d.ts +8 -0
  142. package/dist/types/langchain/language_models/chat_models.d.ts +1 -0
  143. package/dist/types/langchain/messages/tool.d.ts +1 -0
  144. package/dist/types/langchain/messages.d.ts +2 -0
  145. package/dist/types/langchain/openai.d.ts +1 -0
  146. package/dist/types/langchain/prompts.d.ts +1 -0
  147. package/dist/types/langchain/runnables.d.ts +2 -0
  148. package/dist/types/langchain/tools.d.ts +2 -0
  149. package/dist/types/langchain/utils/env.d.ts +1 -0
  150. package/dist/types/llm/anthropic/index.d.ts +22 -9
  151. package/dist/types/llm/anthropic/types.d.ts +5 -1
  152. package/dist/types/llm/anthropic/utils/message_outputs.d.ts +13 -6
  153. package/dist/types/llm/anthropic/utils/output_parsers.d.ts +1 -1
  154. package/dist/types/llm/openai/index.d.ts +21 -24
  155. package/dist/types/llm/openrouter/index.d.ts +11 -9
  156. package/dist/types/llm/vertexai/index.d.ts +1 -0
  157. package/dist/types/messages/cache.d.ts +4 -1
  158. package/dist/types/messages/format.d.ts +4 -1
  159. package/dist/types/messages/langchain.d.ts +27 -0
  160. package/dist/types/tools/search/tavily-scraper.d.ts +19 -0
  161. package/dist/types/tools/search/tavily-search.d.ts +4 -0
  162. package/dist/types/tools/search/types.d.ts +99 -5
  163. package/dist/types/tools/search/utils.d.ts +2 -2
  164. package/dist/types/types/graph.d.ts +23 -37
  165. package/dist/types/types/llm.d.ts +3 -3
  166. package/dist/types/types/stream.d.ts +1 -1
  167. package/package.json +80 -17
  168. package/src/graphs/Graph.ts +24 -4
  169. package/src/graphs/__tests__/composition.smoke.test.ts +188 -0
  170. package/src/index.ts +3 -0
  171. package/src/langchain/google-common.ts +1 -0
  172. package/src/langchain/index.ts +8 -0
  173. package/src/langchain/language_models/chat_models.ts +1 -0
  174. package/src/langchain/messages/tool.ts +5 -0
  175. package/src/langchain/messages.ts +21 -0
  176. package/src/langchain/openai.ts +1 -0
  177. package/src/langchain/prompts.ts +1 -0
  178. package/src/langchain/runnables.ts +7 -0
  179. package/src/langchain/tools.ts +8 -0
  180. package/src/langchain/utils/env.ts +1 -0
  181. package/src/llm/anthropic/index.ts +252 -84
  182. package/src/llm/anthropic/llm.spec.ts +751 -102
  183. package/src/llm/anthropic/types.ts +9 -1
  184. package/src/llm/anthropic/utils/message_inputs.ts +37 -19
  185. package/src/llm/anthropic/utils/message_outputs.ts +119 -101
  186. package/src/llm/bedrock/index.ts +2 -2
  187. package/src/llm/bedrock/llm.spec.ts +341 -0
  188. package/src/llm/bedrock/utils/message_inputs.ts +303 -4
  189. package/src/llm/bedrock/utils/message_outputs.ts +2 -1
  190. package/src/llm/custom-chat-models.smoke.test.ts +836 -0
  191. package/src/llm/google/llm.spec.ts +339 -57
  192. package/src/llm/google/utils/common.ts +53 -48
  193. package/src/llm/openai/contentBlocks.test.ts +346 -0
  194. package/src/llm/openai/index.ts +856 -833
  195. package/src/llm/openai/utils/index.ts +107 -78
  196. package/src/llm/openai/utils/messages.test.ts +159 -0
  197. package/src/llm/openrouter/index.ts +124 -247
  198. package/src/llm/openrouter/reasoning.test.ts +8 -1
  199. package/src/llm/vertexai/index.ts +11 -5
  200. package/src/llm/vertexai/llm.spec.ts +28 -1
  201. package/src/messages/cache.test.ts +4 -3
  202. package/src/messages/cache.ts +3 -2
  203. package/src/messages/core.ts +16 -9
  204. package/src/messages/format.ts +96 -16
  205. package/src/messages/formatAgentMessages.test.ts +166 -1
  206. package/src/messages/langchain.ts +39 -0
  207. package/src/messages/prune.ts +12 -8
  208. package/src/scripts/caching.ts +2 -3
  209. package/src/specs/summarization.test.ts +51 -58
  210. package/src/tools/ToolNode.ts +5 -1
  211. package/src/tools/search/search.ts +83 -73
  212. package/src/tools/search/tavily-scraper.ts +235 -0
  213. package/src/tools/search/tavily-search.ts +424 -0
  214. package/src/tools/search/tavily.test.ts +965 -0
  215. package/src/tools/search/tool.ts +36 -26
  216. package/src/tools/search/types.ts +134 -11
  217. package/src/tools/search/utils.ts +13 -5
  218. package/src/types/graph.ts +32 -87
  219. package/src/types/llm.ts +3 -3
  220. package/src/types/stream.ts +1 -1
  221. package/src/utils/llmConfig.ts +1 -6
@@ -138,6 +138,7 @@ async function createSummarizationRun(opts: {
138
138
  tools?: t.GraphTools;
139
139
  indexTokenCountMap?: Record<string, number>;
140
140
  llmConfigOverride?: Record<string, unknown>;
141
+ maxSummaryTokens?: number;
141
142
  }): Promise<Run<t.IState>> {
142
143
  const llmConfig = {
143
144
  ...getLLMConfig(opts.agentProvider),
@@ -157,6 +158,7 @@ async function createSummarizationRun(opts: {
157
158
  summarizationConfig: {
158
159
  provider: opts.summarizationProvider,
159
160
  model: opts.summarizationModel,
161
+ maxSummaryTokens: opts.maxSummaryTokens,
160
162
  },
161
163
  },
162
164
  returnContent: true,
@@ -244,6 +246,33 @@ function buildIndexTokenCountMap(
244
246
  return map;
245
247
  }
246
248
 
249
+ function sumTokenCountMap(map: Record<string, number | undefined>): number {
250
+ let total = 0;
251
+ for (const key in map) {
252
+ total += map[key] ?? 0;
253
+ }
254
+ return total;
255
+ }
256
+
257
+ function createSeededTokenAuditHistory(): BaseMessage[] {
258
+ const details =
259
+ 'Token audit context preserves index token counts, summary replacement, calibration data, and post-summary continuity. ' +
260
+ 'Important retained values: alpha=1024, beta=2048, gamma=4096, checksum TOKEN-AUDIT-7F3. ' +
261
+ 'The repeated detail intentionally exceeds a compact context budget. ';
262
+ const padding = details.repeat(8);
263
+
264
+ return [
265
+ new HumanMessage(
266
+ `Audit turn 1: establish the accounting scenario. ${padding}`
267
+ ),
268
+ new AIMessage(`Recorded turn 1 accounting notes. ${padding}`),
269
+ new HumanMessage(`Audit turn 2: add more retained details. ${padding}`),
270
+ new AIMessage(`Recorded turn 2 accounting notes. ${padding}`),
271
+ new HumanMessage(`Audit turn 3: preserve final identifiers. ${padding}`),
272
+ new AIMessage(`Recorded turn 3 accounting notes. ${padding}`),
273
+ ];
274
+ }
275
+
247
276
  function logTurn(
248
277
  label: string,
249
278
  conversationHistory: BaseMessage[],
@@ -2417,10 +2446,10 @@ const hasAnyApiKey =
2417
2446
  test('token count map is accurate after summarization cycle', async () => {
2418
2447
  const spies = createSpies();
2419
2448
  let collectedUsage: UsageMetadata[] = [];
2420
- const conversationHistory: BaseMessage[] = [];
2449
+ const conversationHistory = createSeededTokenAuditHistory();
2421
2450
  const tokenCounter = await createTokenCounter();
2422
2451
 
2423
- const createRun = async (maxTokens = 4000): Promise<Run<t.IState>> => {
2452
+ const createRun = async (maxTokens = 1200): Promise<Run<t.IState>> => {
2424
2453
  collectedUsage = [];
2425
2454
  const { aggregateContent } = createContentAggregator();
2426
2455
  const indexTokenCountMap = buildIndexTokenCountMap(
@@ -2432,80 +2461,44 @@ const hasAnyApiKey =
2432
2461
  summarizationProvider,
2433
2462
  summarizationModel,
2434
2463
  maxContextTokens: maxTokens,
2435
- instructions: INSTRUCTIONS,
2464
+ instructions:
2465
+ 'You are a concise assistant. Preserve checkpoint context and answer in one short sentence.',
2436
2466
  collectedUsage,
2437
2467
  aggregateContent,
2438
2468
  spies,
2439
2469
  tokenCounter,
2440
2470
  indexTokenCountMap,
2471
+ maxSummaryTokens: 300,
2472
+ tools: [],
2473
+ llmConfigOverride: {
2474
+ maxTokens: 128,
2475
+ },
2441
2476
  });
2442
2477
  };
2443
2478
 
2444
- // Accumulate messages over 6 turns at generous budget
2445
- let run = await createRun();
2446
- await runTurn(
2447
- { run, conversationHistory },
2448
- 'What is 42 * 58? Calculator.',
2449
- streamConfig
2450
- );
2451
-
2452
- run = await createRun();
2453
- await runTurn(
2454
- { run, conversationHistory },
2455
- 'Now compute 2436 + 1000. Calculator.',
2456
- streamConfig
2457
- );
2458
-
2459
- run = await createRun();
2460
- await runTurn(
2461
- { run, conversationHistory },
2462
- 'What is 3436 / 4? Calculator.',
2463
- streamConfig
2464
- );
2465
-
2466
- run = await createRun();
2467
- await runTurn(
2468
- { run, conversationHistory },
2469
- 'Compute 999 * 2. Calculator.',
2470
- streamConfig
2471
- );
2472
-
2473
- run = await createRun();
2474
- await runTurn(
2475
- { run, conversationHistory },
2476
- 'What is 2^10? Calculator. Also list everything.',
2477
- streamConfig
2479
+ const originalMap = buildIndexTokenCountMap(
2480
+ conversationHistory,
2481
+ tokenCounter
2478
2482
  );
2483
+ const originalTokenTotal = sumTokenCountMap(originalMap);
2484
+ expect(originalTokenTotal).toBeGreaterThan(1200);
2479
2485
 
2480
- run = await createRun();
2486
+ const run = await createRun();
2481
2487
  await runTurn(
2482
2488
  { run, conversationHistory },
2483
- 'Calculate 355 / 113. Calculator.',
2489
+ 'Acknowledge the preserved token audit context in one short sentence.',
2484
2490
  streamConfig
2485
2491
  );
2486
2492
 
2487
- // Squeeze progressively to force summarization
2488
- for (const squeeze of [3500, 3200, 3100, 3000, 2800, 2500, 2000]) {
2489
- if (spies.onSummarizeStartSpy.mock.calls.length > 0) {
2490
- break;
2491
- }
2492
- run = await createRun(squeeze);
2493
- await runTurn(
2494
- { run, conversationHistory },
2495
- `What is ${squeeze} - 1000? Calculator.`,
2496
- streamConfig
2497
- );
2498
- }
2499
-
2500
- // Verify summarization fired
2501
2493
  expect(spies.onSummarizeCompleteSpy).toHaveBeenCalled();
2502
2494
 
2503
2495
  const completePayload = spies.onSummarizeCompleteSpy.mock
2504
2496
  .calls[0][0] as t.SummarizeCompleteEvent;
2505
- expect(completePayload.summary!.tokenCount).toBeGreaterThan(10);
2506
- expect(completePayload.summary!.tokenCount).toBeLessThan(1500);
2497
+ const summaryTokenCount = completePayload.summary!.tokenCount ?? 0;
2498
+ expect(summaryTokenCount).toBeGreaterThan(10);
2499
+ expect(summaryTokenCount).toBeLessThan(1500);
2500
+ expect(summaryTokenCount).toBeLessThan(originalTokenTotal);
2507
2501
 
2508
- // Token accounting: collectedUsage should have valid entries
2509
2502
  const validUsage = collectedUsage.filter(
2510
2503
  (u: Partial<UsageMetadata>) =>
2511
2504
  u.input_tokens != null && u.input_tokens > 0
@@ -2513,8 +2506,8 @@ const hasAnyApiKey =
2513
2506
  expect(validUsage.length).toBeGreaterThan(0);
2514
2507
 
2515
2508
  console.log(
2516
- ` Token audit: summary=${completePayload.summary!.tokenCount} tokens, ` +
2517
- `usageEntries=${validUsage.length}`
2509
+ ` Token audit: summary=${summaryTokenCount} tokens, ` +
2510
+ `preTotal=${originalTokenTotal}, usageEntries=${validUsage.length}`
2518
2511
  );
2519
2512
  }, 180_000);
2520
2513
 
@@ -33,6 +33,7 @@ import {
33
33
  } from '@/utils/truncation';
34
34
  import { safeDispatchCustomEvent } from '@/utils/events';
35
35
  import { executeHooks } from '@/hooks';
36
+ import { toLangChainContent } from '@/messages/langchain';
36
37
  import { Constants, GraphEvents, CODE_EXECUTION_TOOLS } from '@/common';
37
38
  import {
38
39
  buildReferenceKey,
@@ -1282,7 +1283,10 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
1282
1283
  if (msg.skillName != null) additional_kwargs.skillName = msg.skillName;
1283
1284
 
1284
1285
  converted.push(
1285
- new HumanMessage({ content: msg.content, additional_kwargs })
1286
+ new HumanMessage({
1287
+ content: toLangChainContent(msg.content),
1288
+ additional_kwargs,
1289
+ })
1286
1290
  );
1287
1291
  }
1288
1292
  return converted;
@@ -2,6 +2,7 @@ import axios from 'axios';
2
2
  import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
3
3
  import type * as t from './types';
4
4
  import { getAttribution, createDefaultLogger } from './utils';
5
+ import { createTavilyAPI } from './tavily-search';
5
6
  import { BaseReranker } from './rerankers';
6
7
 
7
8
  const chunker = {
@@ -418,15 +419,20 @@ export const createSearchAPI = (
418
419
  serperApiKey,
419
420
  searxngInstanceUrl,
420
421
  searxngApiKey,
422
+ tavilyApiKey,
423
+ tavilySearchUrl,
424
+ tavilySearchOptions,
421
425
  } = config;
422
426
 
423
427
  if (searchProvider.toLowerCase() === 'serper') {
424
428
  return createSerperAPI(serperApiKey);
425
429
  } else if (searchProvider.toLowerCase() === 'searxng') {
426
430
  return createSearXNGAPI(searxngInstanceUrl, searxngApiKey);
431
+ } else if (searchProvider.toLowerCase() === 'tavily') {
432
+ return createTavilyAPI(tavilyApiKey, tavilySearchUrl, tavilySearchOptions);
427
433
  } else {
428
434
  throw new Error(
429
- `Invalid search provider: ${searchProvider}. Must be 'serper' or 'searxng'`
435
+ `Invalid search provider: ${searchProvider}. Must be 'serper', 'searxng', or 'tavily'`
430
436
  );
431
437
  }
432
438
  };
@@ -454,6 +460,56 @@ export const createSourceProcessor = (
454
460
  const logger_ = logger || createDefaultLogger();
455
461
  const scraper = scraperInstance;
456
462
 
463
+ const processResponse = (
464
+ url: string,
465
+ response: t.AnyScraperResponse
466
+ ): t.ScrapeResult => {
467
+ const rawMetadata = scraper.extractMetadata(response);
468
+ const metadata =
469
+ Object.keys(rawMetadata).length > 0 ? rawMetadata : undefined;
470
+ const attribution = getAttribution(url, metadata, logger_);
471
+
472
+ if (response.success && response.data) {
473
+ const [content, references] = scraper.extractContent(response);
474
+ return {
475
+ url,
476
+ references,
477
+ attribution,
478
+ content: chunker.cleanText(content),
479
+ };
480
+ }
481
+
482
+ logger_.error(
483
+ `Error scraping ${url}: ${response.error ?? 'Unknown error'}`
484
+ );
485
+ return { url, attribution, error: true, content: '' };
486
+ };
487
+
488
+ const addHighlights = async (
489
+ result: t.ScrapeResult,
490
+ query: string,
491
+ onGetHighlights: t.SearchToolConfig['onGetHighlights']
492
+ ): Promise<t.ScrapeResult> => {
493
+ if (result.error != null) {
494
+ return result;
495
+ }
496
+ try {
497
+ const highlights = await getHighlights({
498
+ query,
499
+ reranker,
500
+ content: result.content,
501
+ logger: logger_,
502
+ });
503
+ if (onGetHighlights) {
504
+ onGetHighlights(result.url);
505
+ }
506
+ return { ...result, highlights };
507
+ } catch (error) {
508
+ logger_.error('Error processing scraped content:', error);
509
+ return result;
510
+ }
511
+ };
512
+
457
513
  const webScraper = {
458
514
  scrapeMany: async ({
459
515
  query,
@@ -465,80 +521,34 @@ export const createSourceProcessor = (
465
521
  onGetHighlights: t.SearchToolConfig['onGetHighlights'];
466
522
  }): Promise<Array<t.ScrapeResult>> => {
467
523
  logger_.debug(`Scraping ${links.length} links`);
468
- const promises: Array<Promise<t.ScrapeResult>> = [];
469
524
  try {
470
- for (let i = 0; i < links.length; i++) {
471
- const currentLink = links[i];
472
- const promise: Promise<t.ScrapeResult> = scraper
473
- .scrapeUrl(currentLink, {})
474
- .then(([url, response]) => {
475
- const attribution = getAttribution(
476
- url,
477
- response.data?.metadata,
478
- logger_
479
- );
480
- if (response.success && response.data) {
481
- const [content, references] = scraper.extractContent(response);
482
- return {
483
- url,
484
- references,
485
- attribution,
486
- content: chunker.cleanText(content),
487
- } as t.ScrapeResult;
488
- } else {
489
- logger_.error(
490
- `Error scraping ${url}: ${response.error ?? 'Unknown error'}`
491
- );
492
- }
493
-
494
- return {
495
- url,
496
- attribution,
497
- error: true,
498
- content: '',
499
- } as t.ScrapeResult;
500
- })
501
- .then(async (result) => {
502
- try {
503
- if (result.error != null) {
504
- logger_.error(
505
- `Error scraping ${result.url}: ${result.content}`
506
- );
507
- return {
508
- ...result,
509
- };
510
- }
511
- const highlights = await getHighlights({
512
- query,
513
- reranker,
514
- content: result.content,
515
- logger: logger_,
516
- });
517
- if (onGetHighlights) {
518
- onGetHighlights(result.url);
519
- }
520
- return {
521
- ...result,
522
- highlights,
523
- };
524
- } catch (error) {
525
- logger_.error('Error processing scraped content:', error);
526
- return {
527
- ...result,
528
- };
529
- }
530
- })
531
- .catch((error) => {
532
- logger_.error(`Error scraping ${currentLink}:`, error);
533
- return {
534
- url: currentLink,
535
- error: true,
536
- content: '',
537
- };
538
- });
539
- promises.push(promise);
525
+ let responses: Array<[string, t.AnyScraperResponse]>;
526
+
527
+ if (scraper.scrapeUrls) {
528
+ responses = await scraper.scrapeUrls(links);
529
+ } else {
530
+ responses = await Promise.all(
531
+ links.map((link) =>
532
+ scraper
533
+ .scrapeUrl(link, {})
534
+ .catch((error): [string, t.AnyScraperResponse] => {
535
+ logger_.error(`Error scraping ${link}:`, error);
536
+ return [link, { success: false, error: String(error) }];
537
+ })
538
+ )
539
+ );
540
540
  }
541
- return await Promise.all(promises);
541
+
542
+ const withHighlights = await Promise.all(
543
+ responses.map(([url, response]) =>
544
+ addHighlights(
545
+ processResponse(url, response),
546
+ query,
547
+ onGetHighlights
548
+ )
549
+ )
550
+ );
551
+ return withHighlights;
542
552
  } catch (error) {
543
553
  logger_.error('Error in scrapeMany:', error);
544
554
  return [];
@@ -0,0 +1,235 @@
1
+ import axios from 'axios';
2
+ import type * as t from './types';
3
+ import { createDefaultLogger } from './utils';
4
+
5
+ const DEFAULT_BASIC_TIMEOUT = 15000;
6
+ const DEFAULT_ADVANCED_TIMEOUT = 30000;
7
+ const MAX_BATCH_SIZE = 20;
8
+
9
+ const getDefaultTimeout = (extractDepth: 'basic' | 'advanced'): number =>
10
+ extractDepth === 'advanced'
11
+ ? DEFAULT_ADVANCED_TIMEOUT
12
+ : DEFAULT_BASIC_TIMEOUT;
13
+
14
+ const normalizeUrlKey = (url: string): string => {
15
+ try {
16
+ const parsedUrl = new URL(url);
17
+ parsedUrl.hash = '';
18
+ if (parsedUrl.pathname.length > 1) {
19
+ parsedUrl.pathname = parsedUrl.pathname.replace(/\/+$/, '');
20
+ }
21
+ return parsedUrl.toString();
22
+ } catch {
23
+ return url;
24
+ }
25
+ };
26
+
27
+ const setUrlResult = (
28
+ map: Map<string, t.TavilyExtractResult>,
29
+ result: t.TavilyExtractResult
30
+ ): void => {
31
+ map.set(result.url, result);
32
+ const normalizedUrl = normalizeUrlKey(result.url);
33
+ if (!map.has(normalizedUrl)) {
34
+ map.set(normalizedUrl, result);
35
+ }
36
+ };
37
+
38
+ export class TavilyScraper implements t.BaseScraper {
39
+ private apiKey: string;
40
+ private apiUrl: string;
41
+ private timeout: number;
42
+ private payloadTimeout: number | undefined;
43
+ private logger: t.Logger;
44
+ private extractDepth: 'basic' | 'advanced';
45
+ private includeImages: boolean;
46
+ private includeFavicon: boolean;
47
+ private format: 'markdown' | 'text' | undefined;
48
+
49
+ constructor(config: t.TavilyScraperConfig = {}) {
50
+ this.apiKey = config.apiKey ?? process.env.TAVILY_API_KEY ?? '';
51
+ this.apiUrl =
52
+ config.apiUrl ??
53
+ process.env.TAVILY_EXTRACT_URL ??
54
+ 'https://api.tavily.com/extract';
55
+ this.payloadTimeout = config.timeout;
56
+ this.extractDepth = config.extractDepth ?? 'basic';
57
+ this.timeout = config.timeout ?? getDefaultTimeout(this.extractDepth);
58
+ this.includeImages = config.includeImages ?? false;
59
+ this.includeFavicon = config.includeFavicon ?? false;
60
+ this.format = config.format;
61
+ this.logger = config.logger || createDefaultLogger();
62
+
63
+ if (!this.apiKey) {
64
+ this.logger.warn('TAVILY_API_KEY is not set. Scraping will not work.');
65
+ }
66
+ }
67
+
68
+ async scrapeUrl(
69
+ url: string,
70
+ options: t.TavilyScrapeOptions = {}
71
+ ): Promise<[string, t.TavilyScrapeResponse]> {
72
+ const results = await this.scrapeUrls([url], options);
73
+ return results[0];
74
+ }
75
+
76
+ async scrapeUrls(
77
+ urls: string[],
78
+ options: t.TavilyScrapeOptions = {}
79
+ ): Promise<Array<[string, t.TavilyScrapeResponse]>> {
80
+ if (!this.apiKey) {
81
+ return urls.map((url) => [
82
+ url,
83
+ { success: false, error: 'TAVILY_API_KEY is not set' },
84
+ ]);
85
+ }
86
+
87
+ const batches: string[][] = [];
88
+ for (let i = 0; i < urls.length; i += MAX_BATCH_SIZE) {
89
+ batches.push(urls.slice(i, i + MAX_BATCH_SIZE));
90
+ }
91
+
92
+ const allResults: Array<[string, t.TavilyScrapeResponse]> = [];
93
+
94
+ for (const batch of batches) {
95
+ const batchResults = await this.extractBatch(batch, options);
96
+ allResults.push(...batchResults);
97
+ }
98
+
99
+ return allResults;
100
+ }
101
+
102
+ private async extractBatch(
103
+ urls: string[],
104
+ options: t.TavilyScrapeOptions = {}
105
+ ): Promise<Array<[string, t.TavilyScrapeResponse]>> {
106
+ try {
107
+ const includeFavicon = options.includeFavicon ?? this.includeFavicon;
108
+ const format = options.format ?? this.format;
109
+ const extractDepth = options.extractDepth ?? this.extractDepth;
110
+ const payload: t.TavilyExtractPayload = {
111
+ urls,
112
+ extract_depth: extractDepth,
113
+ include_images: options.includeImages ?? this.includeImages,
114
+ };
115
+
116
+ if (includeFavicon) {
117
+ payload.include_favicon = true;
118
+ }
119
+ if (format != null) {
120
+ payload.format = format;
121
+ }
122
+
123
+ const effectiveTimeout =
124
+ options.timeout ??
125
+ this.payloadTimeout ??
126
+ (options.extractDepth != null
127
+ ? getDefaultTimeout(extractDepth)
128
+ : this.timeout);
129
+ const payloadTimeout = options.timeout ?? this.payloadTimeout;
130
+ if (payloadTimeout != null) {
131
+ payload.timeout = Math.min(Math.max(payloadTimeout / 1000, 1), 60);
132
+ }
133
+
134
+ const response = await axios.post<{
135
+ results?: t.TavilyExtractResult[];
136
+ failed_results?: t.TavilyExtractResult[];
137
+ }>(this.apiUrl, payload, {
138
+ headers: {
139
+ Authorization: `Bearer ${this.apiKey}`,
140
+ 'Content-Type': 'application/json',
141
+ },
142
+ timeout: effectiveTimeout,
143
+ });
144
+
145
+ const data = response.data;
146
+ const successMap = new Map<string, t.TavilyExtractResult>();
147
+ const failedMap = new Map<string, t.TavilyExtractResult>();
148
+
149
+ for (const result of data.results ?? []) {
150
+ setUrlResult(successMap, result);
151
+ }
152
+ for (const result of data.failed_results ?? []) {
153
+ setUrlResult(failedMap, result);
154
+ }
155
+
156
+ return urls.map((url): [string, t.TavilyScrapeResponse] => {
157
+ const success =
158
+ successMap.get(url) ?? successMap.get(normalizeUrlKey(url));
159
+ if (success && success.error == null) {
160
+ return [
161
+ url,
162
+ {
163
+ success: true,
164
+ data: {
165
+ rawContent: success.raw_content ?? '',
166
+ images: success.images ?? [],
167
+ favicon: success.favicon,
168
+ },
169
+ },
170
+ ];
171
+ }
172
+
173
+ const failed =
174
+ failedMap.get(url) ?? failedMap.get(normalizeUrlKey(url));
175
+ const error =
176
+ success?.error ??
177
+ failed?.error ??
178
+ 'URL not found in Tavily Extract response';
179
+ return [url, { success: false, error }];
180
+ });
181
+ } catch (error) {
182
+ const errorMessage =
183
+ error instanceof Error ? error.message : String(error);
184
+ return urls.map((url) => [
185
+ url,
186
+ {
187
+ success: false,
188
+ error: `Tavily Extract API request failed: ${errorMessage}`,
189
+ },
190
+ ]);
191
+ }
192
+ }
193
+
194
+ extractContent(
195
+ response: t.TavilyScrapeResponse
196
+ ): [string, undefined | t.References] {
197
+ if (!response.success || !response.data) {
198
+ return ['', undefined];
199
+ }
200
+
201
+ const content = response.data.rawContent ?? '';
202
+ const images = response.data.images ?? [];
203
+
204
+ const references: t.References | undefined =
205
+ images.length > 0
206
+ ? {
207
+ links: [],
208
+ images: images.map((imageUrl) => ({ originalUrl: imageUrl })),
209
+ videos: [],
210
+ }
211
+ : undefined;
212
+
213
+ return [content, references];
214
+ }
215
+
216
+ extractMetadata(response: t.TavilyScrapeResponse): t.GenericScrapeMetadata {
217
+ if (!response.success || !response.data) {
218
+ return {};
219
+ }
220
+
221
+ const metadata: t.GenericScrapeMetadata = {
222
+ images_count: response.data.images?.length ?? 0,
223
+ };
224
+ if (response.data.favicon != null) {
225
+ metadata.favicon = response.data.favicon;
226
+ }
227
+ return metadata;
228
+ }
229
+ }
230
+
231
+ export const createTavilyScraper = (
232
+ config: t.TavilyScraperConfig = {}
233
+ ): TavilyScraper => {
234
+ return new TavilyScraper(config);
235
+ };