@librechat/agents 3.1.75 → 3.1.77-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (272) hide show
  1. package/dist/cjs/graphs/Graph.cjs +22 -3
  2. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  3. package/dist/cjs/hitl/askUserQuestion.cjs +67 -0
  4. package/dist/cjs/hitl/askUserQuestion.cjs.map +1 -0
  5. package/dist/cjs/hooks/HookRegistry.cjs +54 -0
  6. package/dist/cjs/hooks/HookRegistry.cjs.map +1 -1
  7. package/dist/cjs/hooks/createToolPolicyHook.cjs +115 -0
  8. package/dist/cjs/hooks/createToolPolicyHook.cjs.map +1 -0
  9. package/dist/cjs/hooks/executeHooks.cjs +40 -1
  10. package/dist/cjs/hooks/executeHooks.cjs.map +1 -1
  11. package/dist/cjs/hooks/types.cjs +1 -0
  12. package/dist/cjs/hooks/types.cjs.map +1 -1
  13. package/dist/cjs/langchain/google-common.cjs +3 -0
  14. package/dist/cjs/langchain/google-common.cjs.map +1 -0
  15. package/dist/cjs/langchain/index.cjs +86 -0
  16. package/dist/cjs/langchain/index.cjs.map +1 -0
  17. package/dist/cjs/langchain/language_models/chat_models.cjs +3 -0
  18. package/dist/cjs/langchain/language_models/chat_models.cjs.map +1 -0
  19. package/dist/cjs/langchain/messages/tool.cjs +3 -0
  20. package/dist/cjs/langchain/messages/tool.cjs.map +1 -0
  21. package/dist/cjs/langchain/messages.cjs +51 -0
  22. package/dist/cjs/langchain/messages.cjs.map +1 -0
  23. package/dist/cjs/langchain/openai.cjs +3 -0
  24. package/dist/cjs/langchain/openai.cjs.map +1 -0
  25. package/dist/cjs/langchain/prompts.cjs +11 -0
  26. package/dist/cjs/langchain/prompts.cjs.map +1 -0
  27. package/dist/cjs/langchain/runnables.cjs +19 -0
  28. package/dist/cjs/langchain/runnables.cjs.map +1 -0
  29. package/dist/cjs/langchain/tools.cjs +23 -0
  30. package/dist/cjs/langchain/tools.cjs.map +1 -0
  31. package/dist/cjs/langchain/utils/env.cjs +11 -0
  32. package/dist/cjs/langchain/utils/env.cjs.map +1 -0
  33. package/dist/cjs/llm/anthropic/index.cjs +145 -52
  34. package/dist/cjs/llm/anthropic/index.cjs.map +1 -1
  35. package/dist/cjs/llm/anthropic/types.cjs.map +1 -1
  36. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +21 -14
  37. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  38. package/dist/cjs/llm/anthropic/utils/message_outputs.cjs +84 -70
  39. package/dist/cjs/llm/anthropic/utils/message_outputs.cjs.map +1 -1
  40. package/dist/cjs/llm/bedrock/index.cjs +1 -1
  41. package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
  42. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +213 -3
  43. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  44. package/dist/cjs/llm/bedrock/utils/message_outputs.cjs +2 -1
  45. package/dist/cjs/llm/bedrock/utils/message_outputs.cjs.map +1 -1
  46. package/dist/cjs/llm/google/utils/common.cjs +5 -4
  47. package/dist/cjs/llm/google/utils/common.cjs.map +1 -1
  48. package/dist/cjs/llm/openai/index.cjs +519 -655
  49. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  50. package/dist/cjs/llm/openai/utils/index.cjs +20 -458
  51. package/dist/cjs/llm/openai/utils/index.cjs.map +1 -1
  52. package/dist/cjs/llm/openrouter/index.cjs +57 -175
  53. package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
  54. package/dist/cjs/llm/vertexai/index.cjs +5 -3
  55. package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
  56. package/dist/cjs/main.cjs +112 -3
  57. package/dist/cjs/main.cjs.map +1 -1
  58. package/dist/cjs/messages/cache.cjs +2 -1
  59. package/dist/cjs/messages/cache.cjs.map +1 -1
  60. package/dist/cjs/messages/core.cjs +7 -6
  61. package/dist/cjs/messages/core.cjs.map +1 -1
  62. package/dist/cjs/messages/format.cjs +73 -15
  63. package/dist/cjs/messages/format.cjs.map +1 -1
  64. package/dist/cjs/messages/langchain.cjs +26 -0
  65. package/dist/cjs/messages/langchain.cjs.map +1 -0
  66. package/dist/cjs/messages/prune.cjs +7 -6
  67. package/dist/cjs/messages/prune.cjs.map +1 -1
  68. package/dist/cjs/run.cjs +400 -42
  69. package/dist/cjs/run.cjs.map +1 -1
  70. package/dist/cjs/tools/ToolNode.cjs +556 -56
  71. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  72. package/dist/cjs/tools/search/search.cjs +55 -66
  73. package/dist/cjs/tools/search/search.cjs.map +1 -1
  74. package/dist/cjs/tools/search/tavily-scraper.cjs +189 -0
  75. package/dist/cjs/tools/search/tavily-scraper.cjs.map +1 -0
  76. package/dist/cjs/tools/search/tavily-search.cjs +372 -0
  77. package/dist/cjs/tools/search/tavily-search.cjs.map +1 -0
  78. package/dist/cjs/tools/search/tool.cjs +26 -4
  79. package/dist/cjs/tools/search/tool.cjs.map +1 -1
  80. package/dist/cjs/tools/search/utils.cjs +10 -3
  81. package/dist/cjs/tools/search/utils.cjs.map +1 -1
  82. package/dist/esm/graphs/Graph.mjs +22 -3
  83. package/dist/esm/graphs/Graph.mjs.map +1 -1
  84. package/dist/esm/hitl/askUserQuestion.mjs +65 -0
  85. package/dist/esm/hitl/askUserQuestion.mjs.map +1 -0
  86. package/dist/esm/hooks/HookRegistry.mjs +54 -0
  87. package/dist/esm/hooks/HookRegistry.mjs.map +1 -1
  88. package/dist/esm/hooks/createToolPolicyHook.mjs +113 -0
  89. package/dist/esm/hooks/createToolPolicyHook.mjs.map +1 -0
  90. package/dist/esm/hooks/executeHooks.mjs +40 -1
  91. package/dist/esm/hooks/executeHooks.mjs.map +1 -1
  92. package/dist/esm/hooks/types.mjs +1 -0
  93. package/dist/esm/hooks/types.mjs.map +1 -1
  94. package/dist/esm/langchain/google-common.mjs +2 -0
  95. package/dist/esm/langchain/google-common.mjs.map +1 -0
  96. package/dist/esm/langchain/index.mjs +5 -0
  97. package/dist/esm/langchain/index.mjs.map +1 -0
  98. package/dist/esm/langchain/language_models/chat_models.mjs +2 -0
  99. package/dist/esm/langchain/language_models/chat_models.mjs.map +1 -0
  100. package/dist/esm/langchain/messages/tool.mjs +2 -0
  101. package/dist/esm/langchain/messages/tool.mjs.map +1 -0
  102. package/dist/esm/langchain/messages.mjs +2 -0
  103. package/dist/esm/langchain/messages.mjs.map +1 -0
  104. package/dist/esm/langchain/openai.mjs +2 -0
  105. package/dist/esm/langchain/openai.mjs.map +1 -0
  106. package/dist/esm/langchain/prompts.mjs +2 -0
  107. package/dist/esm/langchain/prompts.mjs.map +1 -0
  108. package/dist/esm/langchain/runnables.mjs +2 -0
  109. package/dist/esm/langchain/runnables.mjs.map +1 -0
  110. package/dist/esm/langchain/tools.mjs +2 -0
  111. package/dist/esm/langchain/tools.mjs.map +1 -0
  112. package/dist/esm/langchain/utils/env.mjs +2 -0
  113. package/dist/esm/langchain/utils/env.mjs.map +1 -0
  114. package/dist/esm/llm/anthropic/index.mjs +146 -54
  115. package/dist/esm/llm/anthropic/index.mjs.map +1 -1
  116. package/dist/esm/llm/anthropic/types.mjs.map +1 -1
  117. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +21 -14
  118. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  119. package/dist/esm/llm/anthropic/utils/message_outputs.mjs +84 -71
  120. package/dist/esm/llm/anthropic/utils/message_outputs.mjs.map +1 -1
  121. package/dist/esm/llm/bedrock/index.mjs +1 -1
  122. package/dist/esm/llm/bedrock/index.mjs.map +1 -1
  123. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +214 -4
  124. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  125. package/dist/esm/llm/bedrock/utils/message_outputs.mjs +2 -1
  126. package/dist/esm/llm/bedrock/utils/message_outputs.mjs.map +1 -1
  127. package/dist/esm/llm/google/utils/common.mjs +5 -4
  128. package/dist/esm/llm/google/utils/common.mjs.map +1 -1
  129. package/dist/esm/llm/openai/index.mjs +520 -656
  130. package/dist/esm/llm/openai/index.mjs.map +1 -1
  131. package/dist/esm/llm/openai/utils/index.mjs +23 -459
  132. package/dist/esm/llm/openai/utils/index.mjs.map +1 -1
  133. package/dist/esm/llm/openrouter/index.mjs +57 -175
  134. package/dist/esm/llm/openrouter/index.mjs.map +1 -1
  135. package/dist/esm/llm/vertexai/index.mjs +5 -3
  136. package/dist/esm/llm/vertexai/index.mjs.map +1 -1
  137. package/dist/esm/main.mjs +7 -0
  138. package/dist/esm/main.mjs.map +1 -1
  139. package/dist/esm/messages/cache.mjs +2 -1
  140. package/dist/esm/messages/cache.mjs.map +1 -1
  141. package/dist/esm/messages/core.mjs +7 -6
  142. package/dist/esm/messages/core.mjs.map +1 -1
  143. package/dist/esm/messages/format.mjs +73 -15
  144. package/dist/esm/messages/format.mjs.map +1 -1
  145. package/dist/esm/messages/langchain.mjs +23 -0
  146. package/dist/esm/messages/langchain.mjs.map +1 -0
  147. package/dist/esm/messages/prune.mjs +7 -6
  148. package/dist/esm/messages/prune.mjs.map +1 -1
  149. package/dist/esm/run.mjs +400 -42
  150. package/dist/esm/run.mjs.map +1 -1
  151. package/dist/esm/tools/ToolNode.mjs +557 -57
  152. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  153. package/dist/esm/tools/search/search.mjs +55 -66
  154. package/dist/esm/tools/search/search.mjs.map +1 -1
  155. package/dist/esm/tools/search/tavily-scraper.mjs +186 -0
  156. package/dist/esm/tools/search/tavily-scraper.mjs.map +1 -0
  157. package/dist/esm/tools/search/tavily-search.mjs +370 -0
  158. package/dist/esm/tools/search/tavily-search.mjs.map +1 -0
  159. package/dist/esm/tools/search/tool.mjs +26 -4
  160. package/dist/esm/tools/search/tool.mjs.map +1 -1
  161. package/dist/esm/tools/search/utils.mjs +10 -3
  162. package/dist/esm/tools/search/utils.mjs.map +1 -1
  163. package/dist/types/graphs/Graph.d.ts +7 -0
  164. package/dist/types/hitl/askUserQuestion.d.ts +55 -0
  165. package/dist/types/hitl/index.d.ts +6 -0
  166. package/dist/types/hooks/HookRegistry.d.ts +58 -0
  167. package/dist/types/hooks/createToolPolicyHook.d.ts +87 -0
  168. package/dist/types/hooks/index.d.ts +4 -1
  169. package/dist/types/hooks/types.d.ts +109 -3
  170. package/dist/types/index.d.ts +10 -0
  171. package/dist/types/langchain/google-common.d.ts +1 -0
  172. package/dist/types/langchain/index.d.ts +8 -0
  173. package/dist/types/langchain/language_models/chat_models.d.ts +1 -0
  174. package/dist/types/langchain/messages/tool.d.ts +1 -0
  175. package/dist/types/langchain/messages.d.ts +2 -0
  176. package/dist/types/langchain/openai.d.ts +1 -0
  177. package/dist/types/langchain/prompts.d.ts +1 -0
  178. package/dist/types/langchain/runnables.d.ts +2 -0
  179. package/dist/types/langchain/tools.d.ts +2 -0
  180. package/dist/types/langchain/utils/env.d.ts +1 -0
  181. package/dist/types/llm/anthropic/index.d.ts +22 -9
  182. package/dist/types/llm/anthropic/types.d.ts +5 -1
  183. package/dist/types/llm/anthropic/utils/message_outputs.d.ts +13 -6
  184. package/dist/types/llm/anthropic/utils/output_parsers.d.ts +1 -1
  185. package/dist/types/llm/openai/index.d.ts +21 -24
  186. package/dist/types/llm/openrouter/index.d.ts +11 -9
  187. package/dist/types/llm/vertexai/index.d.ts +1 -0
  188. package/dist/types/messages/cache.d.ts +4 -1
  189. package/dist/types/messages/format.d.ts +4 -1
  190. package/dist/types/messages/langchain.d.ts +27 -0
  191. package/dist/types/run.d.ts +117 -1
  192. package/dist/types/tools/ToolNode.d.ts +26 -1
  193. package/dist/types/tools/search/tavily-scraper.d.ts +19 -0
  194. package/dist/types/tools/search/tavily-search.d.ts +4 -0
  195. package/dist/types/tools/search/types.d.ts +99 -5
  196. package/dist/types/tools/search/utils.d.ts +2 -2
  197. package/dist/types/types/graph.d.ts +23 -37
  198. package/dist/types/types/hitl.d.ts +272 -0
  199. package/dist/types/types/index.d.ts +1 -0
  200. package/dist/types/types/llm.d.ts +3 -3
  201. package/dist/types/types/run.d.ts +33 -0
  202. package/dist/types/types/stream.d.ts +1 -1
  203. package/dist/types/types/tools.d.ts +19 -0
  204. package/package.json +80 -17
  205. package/src/graphs/Graph.ts +33 -4
  206. package/src/graphs/__tests__/composition.smoke.test.ts +188 -0
  207. package/src/hitl/askUserQuestion.ts +72 -0
  208. package/src/hitl/index.ts +7 -0
  209. package/src/hooks/HookRegistry.ts +71 -0
  210. package/src/hooks/__tests__/createToolPolicyHook.test.ts +259 -0
  211. package/src/hooks/createToolPolicyHook.ts +184 -0
  212. package/src/hooks/executeHooks.ts +50 -1
  213. package/src/hooks/index.ts +6 -0
  214. package/src/hooks/types.ts +112 -0
  215. package/src/index.ts +22 -0
  216. package/src/langchain/google-common.ts +1 -0
  217. package/src/langchain/index.ts +8 -0
  218. package/src/langchain/language_models/chat_models.ts +1 -0
  219. package/src/langchain/messages/tool.ts +5 -0
  220. package/src/langchain/messages.ts +21 -0
  221. package/src/langchain/openai.ts +1 -0
  222. package/src/langchain/prompts.ts +1 -0
  223. package/src/langchain/runnables.ts +7 -0
  224. package/src/langchain/tools.ts +8 -0
  225. package/src/langchain/utils/env.ts +1 -0
  226. package/src/llm/anthropic/index.ts +252 -84
  227. package/src/llm/anthropic/llm.spec.ts +751 -102
  228. package/src/llm/anthropic/types.ts +9 -1
  229. package/src/llm/anthropic/utils/message_inputs.ts +37 -19
  230. package/src/llm/anthropic/utils/message_outputs.ts +119 -101
  231. package/src/llm/bedrock/index.ts +2 -2
  232. package/src/llm/bedrock/llm.spec.ts +341 -0
  233. package/src/llm/bedrock/utils/message_inputs.ts +303 -4
  234. package/src/llm/bedrock/utils/message_outputs.ts +2 -1
  235. package/src/llm/custom-chat-models.smoke.test.ts +836 -0
  236. package/src/llm/google/llm.spec.ts +339 -57
  237. package/src/llm/google/utils/common.ts +53 -48
  238. package/src/llm/openai/contentBlocks.test.ts +346 -0
  239. package/src/llm/openai/index.ts +856 -833
  240. package/src/llm/openai/utils/index.ts +107 -78
  241. package/src/llm/openai/utils/messages.test.ts +159 -0
  242. package/src/llm/openrouter/index.ts +124 -247
  243. package/src/llm/openrouter/reasoning.test.ts +8 -1
  244. package/src/llm/vertexai/index.ts +11 -5
  245. package/src/llm/vertexai/llm.spec.ts +28 -1
  246. package/src/messages/cache.test.ts +4 -3
  247. package/src/messages/cache.ts +3 -2
  248. package/src/messages/core.ts +16 -9
  249. package/src/messages/format.ts +96 -16
  250. package/src/messages/formatAgentMessages.test.ts +166 -1
  251. package/src/messages/langchain.ts +39 -0
  252. package/src/messages/prune.ts +12 -8
  253. package/src/run.ts +456 -47
  254. package/src/scripts/caching.ts +2 -3
  255. package/src/specs/summarization.test.ts +51 -58
  256. package/src/tools/ToolNode.ts +706 -63
  257. package/src/tools/__tests__/hitl.test.ts +3593 -0
  258. package/src/tools/search/search.ts +83 -73
  259. package/src/tools/search/tavily-scraper.ts +235 -0
  260. package/src/tools/search/tavily-search.ts +424 -0
  261. package/src/tools/search/tavily.test.ts +965 -0
  262. package/src/tools/search/tool.ts +36 -26
  263. package/src/tools/search/types.ts +133 -8
  264. package/src/tools/search/utils.ts +13 -5
  265. package/src/types/graph.ts +32 -87
  266. package/src/types/hitl.ts +303 -0
  267. package/src/types/index.ts +1 -0
  268. package/src/types/llm.ts +3 -3
  269. package/src/types/run.ts +33 -0
  270. package/src/types/stream.ts +1 -1
  271. package/src/types/tools.ts +19 -0
  272. package/src/utils/llmConfig.ts +1 -6
@@ -2,6 +2,7 @@ import axios from 'axios';
2
2
  import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
3
3
  import type * as t from './types';
4
4
  import { getAttribution, createDefaultLogger } from './utils';
5
+ import { createTavilyAPI } from './tavily-search';
5
6
  import { BaseReranker } from './rerankers';
6
7
 
7
8
  const chunker = {
@@ -418,15 +419,20 @@ export const createSearchAPI = (
418
419
  serperApiKey,
419
420
  searxngInstanceUrl,
420
421
  searxngApiKey,
422
+ tavilyApiKey,
423
+ tavilySearchUrl,
424
+ tavilySearchOptions,
421
425
  } = config;
422
426
 
423
427
  if (searchProvider.toLowerCase() === 'serper') {
424
428
  return createSerperAPI(serperApiKey);
425
429
  } else if (searchProvider.toLowerCase() === 'searxng') {
426
430
  return createSearXNGAPI(searxngInstanceUrl, searxngApiKey);
431
+ } else if (searchProvider.toLowerCase() === 'tavily') {
432
+ return createTavilyAPI(tavilyApiKey, tavilySearchUrl, tavilySearchOptions);
427
433
  } else {
428
434
  throw new Error(
429
- `Invalid search provider: ${searchProvider}. Must be 'serper' or 'searxng'`
435
+ `Invalid search provider: ${searchProvider}. Must be 'serper', 'searxng', or 'tavily'`
430
436
  );
431
437
  }
432
438
  };
@@ -454,6 +460,56 @@ export const createSourceProcessor = (
454
460
  const logger_ = logger || createDefaultLogger();
455
461
  const scraper = scraperInstance;
456
462
 
463
+ const processResponse = (
464
+ url: string,
465
+ response: t.AnyScraperResponse
466
+ ): t.ScrapeResult => {
467
+ const rawMetadata = scraper.extractMetadata(response);
468
+ const metadata =
469
+ Object.keys(rawMetadata).length > 0 ? rawMetadata : undefined;
470
+ const attribution = getAttribution(url, metadata, logger_);
471
+
472
+ if (response.success && response.data) {
473
+ const [content, references] = scraper.extractContent(response);
474
+ return {
475
+ url,
476
+ references,
477
+ attribution,
478
+ content: chunker.cleanText(content),
479
+ };
480
+ }
481
+
482
+ logger_.error(
483
+ `Error scraping ${url}: ${response.error ?? 'Unknown error'}`
484
+ );
485
+ return { url, attribution, error: true, content: '' };
486
+ };
487
+
488
+ const addHighlights = async (
489
+ result: t.ScrapeResult,
490
+ query: string,
491
+ onGetHighlights: t.SearchToolConfig['onGetHighlights']
492
+ ): Promise<t.ScrapeResult> => {
493
+ if (result.error != null) {
494
+ return result;
495
+ }
496
+ try {
497
+ const highlights = await getHighlights({
498
+ query,
499
+ reranker,
500
+ content: result.content,
501
+ logger: logger_,
502
+ });
503
+ if (onGetHighlights) {
504
+ onGetHighlights(result.url);
505
+ }
506
+ return { ...result, highlights };
507
+ } catch (error) {
508
+ logger_.error('Error processing scraped content:', error);
509
+ return result;
510
+ }
511
+ };
512
+
457
513
  const webScraper = {
458
514
  scrapeMany: async ({
459
515
  query,
@@ -465,80 +521,34 @@ export const createSourceProcessor = (
465
521
  onGetHighlights: t.SearchToolConfig['onGetHighlights'];
466
522
  }): Promise<Array<t.ScrapeResult>> => {
467
523
  logger_.debug(`Scraping ${links.length} links`);
468
- const promises: Array<Promise<t.ScrapeResult>> = [];
469
524
  try {
470
- for (let i = 0; i < links.length; i++) {
471
- const currentLink = links[i];
472
- const promise: Promise<t.ScrapeResult> = scraper
473
- .scrapeUrl(currentLink, {})
474
- .then(([url, response]) => {
475
- const attribution = getAttribution(
476
- url,
477
- response.data?.metadata,
478
- logger_
479
- );
480
- if (response.success && response.data) {
481
- const [content, references] = scraper.extractContent(response);
482
- return {
483
- url,
484
- references,
485
- attribution,
486
- content: chunker.cleanText(content),
487
- } as t.ScrapeResult;
488
- } else {
489
- logger_.error(
490
- `Error scraping ${url}: ${response.error ?? 'Unknown error'}`
491
- );
492
- }
493
-
494
- return {
495
- url,
496
- attribution,
497
- error: true,
498
- content: '',
499
- } as t.ScrapeResult;
500
- })
501
- .then(async (result) => {
502
- try {
503
- if (result.error != null) {
504
- logger_.error(
505
- `Error scraping ${result.url}: ${result.content}`
506
- );
507
- return {
508
- ...result,
509
- };
510
- }
511
- const highlights = await getHighlights({
512
- query,
513
- reranker,
514
- content: result.content,
515
- logger: logger_,
516
- });
517
- if (onGetHighlights) {
518
- onGetHighlights(result.url);
519
- }
520
- return {
521
- ...result,
522
- highlights,
523
- };
524
- } catch (error) {
525
- logger_.error('Error processing scraped content:', error);
526
- return {
527
- ...result,
528
- };
529
- }
530
- })
531
- .catch((error) => {
532
- logger_.error(`Error scraping ${currentLink}:`, error);
533
- return {
534
- url: currentLink,
535
- error: true,
536
- content: '',
537
- };
538
- });
539
- promises.push(promise);
525
+ let responses: Array<[string, t.AnyScraperResponse]>;
526
+
527
+ if (scraper.scrapeUrls) {
528
+ responses = await scraper.scrapeUrls(links);
529
+ } else {
530
+ responses = await Promise.all(
531
+ links.map((link) =>
532
+ scraper
533
+ .scrapeUrl(link, {})
534
+ .catch((error): [string, t.AnyScraperResponse] => {
535
+ logger_.error(`Error scraping ${link}:`, error);
536
+ return [link, { success: false, error: String(error) }];
537
+ })
538
+ )
539
+ );
540
540
  }
541
- return await Promise.all(promises);
541
+
542
+ const withHighlights = await Promise.all(
543
+ responses.map(([url, response]) =>
544
+ addHighlights(
545
+ processResponse(url, response),
546
+ query,
547
+ onGetHighlights
548
+ )
549
+ )
550
+ );
551
+ return withHighlights;
542
552
  } catch (error) {
543
553
  logger_.error('Error in scrapeMany:', error);
544
554
  return [];
@@ -0,0 +1,235 @@
1
+ import axios from 'axios';
2
+ import type * as t from './types';
3
+ import { createDefaultLogger } from './utils';
4
+
5
+ const DEFAULT_BASIC_TIMEOUT = 15000;
6
+ const DEFAULT_ADVANCED_TIMEOUT = 30000;
7
+ const MAX_BATCH_SIZE = 20;
8
+
9
+ const getDefaultTimeout = (extractDepth: 'basic' | 'advanced'): number =>
10
+ extractDepth === 'advanced'
11
+ ? DEFAULT_ADVANCED_TIMEOUT
12
+ : DEFAULT_BASIC_TIMEOUT;
13
+
14
+ const normalizeUrlKey = (url: string): string => {
15
+ try {
16
+ const parsedUrl = new URL(url);
17
+ parsedUrl.hash = '';
18
+ if (parsedUrl.pathname.length > 1) {
19
+ parsedUrl.pathname = parsedUrl.pathname.replace(/\/+$/, '');
20
+ }
21
+ return parsedUrl.toString();
22
+ } catch {
23
+ return url;
24
+ }
25
+ };
26
+
27
+ const setUrlResult = (
28
+ map: Map<string, t.TavilyExtractResult>,
29
+ result: t.TavilyExtractResult
30
+ ): void => {
31
+ map.set(result.url, result);
32
+ const normalizedUrl = normalizeUrlKey(result.url);
33
+ if (!map.has(normalizedUrl)) {
34
+ map.set(normalizedUrl, result);
35
+ }
36
+ };
37
+
38
+ export class TavilyScraper implements t.BaseScraper {
39
+ private apiKey: string;
40
+ private apiUrl: string;
41
+ private timeout: number;
42
+ private payloadTimeout: number | undefined;
43
+ private logger: t.Logger;
44
+ private extractDepth: 'basic' | 'advanced';
45
+ private includeImages: boolean;
46
+ private includeFavicon: boolean;
47
+ private format: 'markdown' | 'text' | undefined;
48
+
49
+ constructor(config: t.TavilyScraperConfig = {}) {
50
+ this.apiKey = config.apiKey ?? process.env.TAVILY_API_KEY ?? '';
51
+ this.apiUrl =
52
+ config.apiUrl ??
53
+ process.env.TAVILY_EXTRACT_URL ??
54
+ 'https://api.tavily.com/extract';
55
+ this.payloadTimeout = config.timeout;
56
+ this.extractDepth = config.extractDepth ?? 'basic';
57
+ this.timeout = config.timeout ?? getDefaultTimeout(this.extractDepth);
58
+ this.includeImages = config.includeImages ?? false;
59
+ this.includeFavicon = config.includeFavicon ?? false;
60
+ this.format = config.format;
61
+ this.logger = config.logger || createDefaultLogger();
62
+
63
+ if (!this.apiKey) {
64
+ this.logger.warn('TAVILY_API_KEY is not set. Scraping will not work.');
65
+ }
66
+ }
67
+
68
+ async scrapeUrl(
69
+ url: string,
70
+ options: t.TavilyScrapeOptions = {}
71
+ ): Promise<[string, t.TavilyScrapeResponse]> {
72
+ const results = await this.scrapeUrls([url], options);
73
+ return results[0];
74
+ }
75
+
76
+ async scrapeUrls(
77
+ urls: string[],
78
+ options: t.TavilyScrapeOptions = {}
79
+ ): Promise<Array<[string, t.TavilyScrapeResponse]>> {
80
+ if (!this.apiKey) {
81
+ return urls.map((url) => [
82
+ url,
83
+ { success: false, error: 'TAVILY_API_KEY is not set' },
84
+ ]);
85
+ }
86
+
87
+ const batches: string[][] = [];
88
+ for (let i = 0; i < urls.length; i += MAX_BATCH_SIZE) {
89
+ batches.push(urls.slice(i, i + MAX_BATCH_SIZE));
90
+ }
91
+
92
+ const allResults: Array<[string, t.TavilyScrapeResponse]> = [];
93
+
94
+ for (const batch of batches) {
95
+ const batchResults = await this.extractBatch(batch, options);
96
+ allResults.push(...batchResults);
97
+ }
98
+
99
+ return allResults;
100
+ }
101
+
102
+ private async extractBatch(
103
+ urls: string[],
104
+ options: t.TavilyScrapeOptions = {}
105
+ ): Promise<Array<[string, t.TavilyScrapeResponse]>> {
106
+ try {
107
+ const includeFavicon = options.includeFavicon ?? this.includeFavicon;
108
+ const format = options.format ?? this.format;
109
+ const extractDepth = options.extractDepth ?? this.extractDepth;
110
+ const payload: t.TavilyExtractPayload = {
111
+ urls,
112
+ extract_depth: extractDepth,
113
+ include_images: options.includeImages ?? this.includeImages,
114
+ };
115
+
116
+ if (includeFavicon) {
117
+ payload.include_favicon = true;
118
+ }
119
+ if (format != null) {
120
+ payload.format = format;
121
+ }
122
+
123
+ const effectiveTimeout =
124
+ options.timeout ??
125
+ this.payloadTimeout ??
126
+ (options.extractDepth != null
127
+ ? getDefaultTimeout(extractDepth)
128
+ : this.timeout);
129
+ const payloadTimeout = options.timeout ?? this.payloadTimeout;
130
+ if (payloadTimeout != null) {
131
+ payload.timeout = Math.min(Math.max(payloadTimeout / 1000, 1), 60);
132
+ }
133
+
134
+ const response = await axios.post<{
135
+ results?: t.TavilyExtractResult[];
136
+ failed_results?: t.TavilyExtractResult[];
137
+ }>(this.apiUrl, payload, {
138
+ headers: {
139
+ Authorization: `Bearer ${this.apiKey}`,
140
+ 'Content-Type': 'application/json',
141
+ },
142
+ timeout: effectiveTimeout,
143
+ });
144
+
145
+ const data = response.data;
146
+ const successMap = new Map<string, t.TavilyExtractResult>();
147
+ const failedMap = new Map<string, t.TavilyExtractResult>();
148
+
149
+ for (const result of data.results ?? []) {
150
+ setUrlResult(successMap, result);
151
+ }
152
+ for (const result of data.failed_results ?? []) {
153
+ setUrlResult(failedMap, result);
154
+ }
155
+
156
+ return urls.map((url): [string, t.TavilyScrapeResponse] => {
157
+ const success =
158
+ successMap.get(url) ?? successMap.get(normalizeUrlKey(url));
159
+ if (success && success.error == null) {
160
+ return [
161
+ url,
162
+ {
163
+ success: true,
164
+ data: {
165
+ rawContent: success.raw_content ?? '',
166
+ images: success.images ?? [],
167
+ favicon: success.favicon,
168
+ },
169
+ },
170
+ ];
171
+ }
172
+
173
+ const failed =
174
+ failedMap.get(url) ?? failedMap.get(normalizeUrlKey(url));
175
+ const error =
176
+ success?.error ??
177
+ failed?.error ??
178
+ 'URL not found in Tavily Extract response';
179
+ return [url, { success: false, error }];
180
+ });
181
+ } catch (error) {
182
+ const errorMessage =
183
+ error instanceof Error ? error.message : String(error);
184
+ return urls.map((url) => [
185
+ url,
186
+ {
187
+ success: false,
188
+ error: `Tavily Extract API request failed: ${errorMessage}`,
189
+ },
190
+ ]);
191
+ }
192
+ }
193
+
194
+ extractContent(
195
+ response: t.TavilyScrapeResponse
196
+ ): [string, undefined | t.References] {
197
+ if (!response.success || !response.data) {
198
+ return ['', undefined];
199
+ }
200
+
201
+ const content = response.data.rawContent ?? '';
202
+ const images = response.data.images ?? [];
203
+
204
+ const references: t.References | undefined =
205
+ images.length > 0
206
+ ? {
207
+ links: [],
208
+ images: images.map((imageUrl) => ({ originalUrl: imageUrl })),
209
+ videos: [],
210
+ }
211
+ : undefined;
212
+
213
+ return [content, references];
214
+ }
215
+
216
+ extractMetadata(response: t.TavilyScrapeResponse): t.GenericScrapeMetadata {
217
+ if (!response.success || !response.data) {
218
+ return {};
219
+ }
220
+
221
+ const metadata: t.GenericScrapeMetadata = {
222
+ images_count: response.data.images?.length ?? 0,
223
+ };
224
+ if (response.data.favicon != null) {
225
+ metadata.favicon = response.data.favicon;
226
+ }
227
+ return metadata;
228
+ }
229
+ }
230
+
231
+ export const createTavilyScraper = (
232
+ config: t.TavilyScraperConfig = {}
233
+ ): TavilyScraper => {
234
+ return new TavilyScraper(config);
235
+ };