@librechat/agents 3.1.75 → 3.1.77-dev.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/graphs/Graph.cjs +22 -3
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/hitl/askUserQuestion.cjs +67 -0
- package/dist/cjs/hitl/askUserQuestion.cjs.map +1 -0
- package/dist/cjs/hooks/HookRegistry.cjs +54 -0
- package/dist/cjs/hooks/HookRegistry.cjs.map +1 -1
- package/dist/cjs/hooks/createToolPolicyHook.cjs +115 -0
- package/dist/cjs/hooks/createToolPolicyHook.cjs.map +1 -0
- package/dist/cjs/hooks/executeHooks.cjs +40 -1
- package/dist/cjs/hooks/executeHooks.cjs.map +1 -1
- package/dist/cjs/hooks/types.cjs +1 -0
- package/dist/cjs/hooks/types.cjs.map +1 -1
- package/dist/cjs/langchain/google-common.cjs +3 -0
- package/dist/cjs/langchain/google-common.cjs.map +1 -0
- package/dist/cjs/langchain/index.cjs +86 -0
- package/dist/cjs/langchain/index.cjs.map +1 -0
- package/dist/cjs/langchain/language_models/chat_models.cjs +3 -0
- package/dist/cjs/langchain/language_models/chat_models.cjs.map +1 -0
- package/dist/cjs/langchain/messages/tool.cjs +3 -0
- package/dist/cjs/langchain/messages/tool.cjs.map +1 -0
- package/dist/cjs/langchain/messages.cjs +51 -0
- package/dist/cjs/langchain/messages.cjs.map +1 -0
- package/dist/cjs/langchain/openai.cjs +3 -0
- package/dist/cjs/langchain/openai.cjs.map +1 -0
- package/dist/cjs/langchain/prompts.cjs +11 -0
- package/dist/cjs/langchain/prompts.cjs.map +1 -0
- package/dist/cjs/langchain/runnables.cjs +19 -0
- package/dist/cjs/langchain/runnables.cjs.map +1 -0
- package/dist/cjs/langchain/tools.cjs +23 -0
- package/dist/cjs/langchain/tools.cjs.map +1 -0
- package/dist/cjs/langchain/utils/env.cjs +11 -0
- package/dist/cjs/langchain/utils/env.cjs.map +1 -0
- package/dist/cjs/llm/anthropic/index.cjs +145 -52
- package/dist/cjs/llm/anthropic/index.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/types.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +21 -14
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_outputs.cjs +84 -70
- package/dist/cjs/llm/anthropic/utils/message_outputs.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/index.cjs +1 -1
- package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +213 -3
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_outputs.cjs +2 -1
- package/dist/cjs/llm/bedrock/utils/message_outputs.cjs.map +1 -1
- package/dist/cjs/llm/google/utils/common.cjs +5 -4
- package/dist/cjs/llm/google/utils/common.cjs.map +1 -1
- package/dist/cjs/llm/openai/index.cjs +519 -655
- package/dist/cjs/llm/openai/index.cjs.map +1 -1
- package/dist/cjs/llm/openai/utils/index.cjs +20 -458
- package/dist/cjs/llm/openai/utils/index.cjs.map +1 -1
- package/dist/cjs/llm/openrouter/index.cjs +57 -175
- package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
- package/dist/cjs/llm/vertexai/index.cjs +5 -3
- package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
- package/dist/cjs/main.cjs +112 -3
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/cache.cjs +2 -1
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/messages/core.cjs +7 -6
- package/dist/cjs/messages/core.cjs.map +1 -1
- package/dist/cjs/messages/format.cjs +73 -15
- package/dist/cjs/messages/format.cjs.map +1 -1
- package/dist/cjs/messages/langchain.cjs +26 -0
- package/dist/cjs/messages/langchain.cjs.map +1 -0
- package/dist/cjs/messages/prune.cjs +7 -6
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/cjs/run.cjs +400 -42
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +556 -56
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/search/search.cjs +55 -66
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/tavily-scraper.cjs +189 -0
- package/dist/cjs/tools/search/tavily-scraper.cjs.map +1 -0
- package/dist/cjs/tools/search/tavily-search.cjs +372 -0
- package/dist/cjs/tools/search/tavily-search.cjs.map +1 -0
- package/dist/cjs/tools/search/tool.cjs +26 -4
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/search/utils.cjs +10 -3
- package/dist/cjs/tools/search/utils.cjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +22 -3
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/hitl/askUserQuestion.mjs +65 -0
- package/dist/esm/hitl/askUserQuestion.mjs.map +1 -0
- package/dist/esm/hooks/HookRegistry.mjs +54 -0
- package/dist/esm/hooks/HookRegistry.mjs.map +1 -1
- package/dist/esm/hooks/createToolPolicyHook.mjs +113 -0
- package/dist/esm/hooks/createToolPolicyHook.mjs.map +1 -0
- package/dist/esm/hooks/executeHooks.mjs +40 -1
- package/dist/esm/hooks/executeHooks.mjs.map +1 -1
- package/dist/esm/hooks/types.mjs +1 -0
- package/dist/esm/hooks/types.mjs.map +1 -1
- package/dist/esm/langchain/google-common.mjs +2 -0
- package/dist/esm/langchain/google-common.mjs.map +1 -0
- package/dist/esm/langchain/index.mjs +5 -0
- package/dist/esm/langchain/index.mjs.map +1 -0
- package/dist/esm/langchain/language_models/chat_models.mjs +2 -0
- package/dist/esm/langchain/language_models/chat_models.mjs.map +1 -0
- package/dist/esm/langchain/messages/tool.mjs +2 -0
- package/dist/esm/langchain/messages/tool.mjs.map +1 -0
- package/dist/esm/langchain/messages.mjs +2 -0
- package/dist/esm/langchain/messages.mjs.map +1 -0
- package/dist/esm/langchain/openai.mjs +2 -0
- package/dist/esm/langchain/openai.mjs.map +1 -0
- package/dist/esm/langchain/prompts.mjs +2 -0
- package/dist/esm/langchain/prompts.mjs.map +1 -0
- package/dist/esm/langchain/runnables.mjs +2 -0
- package/dist/esm/langchain/runnables.mjs.map +1 -0
- package/dist/esm/langchain/tools.mjs +2 -0
- package/dist/esm/langchain/tools.mjs.map +1 -0
- package/dist/esm/langchain/utils/env.mjs +2 -0
- package/dist/esm/langchain/utils/env.mjs.map +1 -0
- package/dist/esm/llm/anthropic/index.mjs +146 -54
- package/dist/esm/llm/anthropic/index.mjs.map +1 -1
- package/dist/esm/llm/anthropic/types.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs +21 -14
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_outputs.mjs +84 -71
- package/dist/esm/llm/anthropic/utils/message_outputs.mjs.map +1 -1
- package/dist/esm/llm/bedrock/index.mjs +1 -1
- package/dist/esm/llm/bedrock/index.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs +214 -4
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_outputs.mjs +2 -1
- package/dist/esm/llm/bedrock/utils/message_outputs.mjs.map +1 -1
- package/dist/esm/llm/google/utils/common.mjs +5 -4
- package/dist/esm/llm/google/utils/common.mjs.map +1 -1
- package/dist/esm/llm/openai/index.mjs +520 -656
- package/dist/esm/llm/openai/index.mjs.map +1 -1
- package/dist/esm/llm/openai/utils/index.mjs +23 -459
- package/dist/esm/llm/openai/utils/index.mjs.map +1 -1
- package/dist/esm/llm/openrouter/index.mjs +57 -175
- package/dist/esm/llm/openrouter/index.mjs.map +1 -1
- package/dist/esm/llm/vertexai/index.mjs +5 -3
- package/dist/esm/llm/vertexai/index.mjs.map +1 -1
- package/dist/esm/main.mjs +7 -0
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/cache.mjs +2 -1
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/messages/core.mjs +7 -6
- package/dist/esm/messages/core.mjs.map +1 -1
- package/dist/esm/messages/format.mjs +73 -15
- package/dist/esm/messages/format.mjs.map +1 -1
- package/dist/esm/messages/langchain.mjs +23 -0
- package/dist/esm/messages/langchain.mjs.map +1 -0
- package/dist/esm/messages/prune.mjs +7 -6
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/esm/run.mjs +400 -42
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +557 -57
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/search/search.mjs +55 -66
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/tavily-scraper.mjs +186 -0
- package/dist/esm/tools/search/tavily-scraper.mjs.map +1 -0
- package/dist/esm/tools/search/tavily-search.mjs +370 -0
- package/dist/esm/tools/search/tavily-search.mjs.map +1 -0
- package/dist/esm/tools/search/tool.mjs +26 -4
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/search/utils.mjs +10 -3
- package/dist/esm/tools/search/utils.mjs.map +1 -1
- package/dist/types/graphs/Graph.d.ts +7 -0
- package/dist/types/hitl/askUserQuestion.d.ts +55 -0
- package/dist/types/hitl/index.d.ts +6 -0
- package/dist/types/hooks/HookRegistry.d.ts +58 -0
- package/dist/types/hooks/createToolPolicyHook.d.ts +87 -0
- package/dist/types/hooks/index.d.ts +4 -1
- package/dist/types/hooks/types.d.ts +109 -3
- package/dist/types/index.d.ts +10 -0
- package/dist/types/langchain/google-common.d.ts +1 -0
- package/dist/types/langchain/index.d.ts +8 -0
- package/dist/types/langchain/language_models/chat_models.d.ts +1 -0
- package/dist/types/langchain/messages/tool.d.ts +1 -0
- package/dist/types/langchain/messages.d.ts +2 -0
- package/dist/types/langchain/openai.d.ts +1 -0
- package/dist/types/langchain/prompts.d.ts +1 -0
- package/dist/types/langchain/runnables.d.ts +2 -0
- package/dist/types/langchain/tools.d.ts +2 -0
- package/dist/types/langchain/utils/env.d.ts +1 -0
- package/dist/types/llm/anthropic/index.d.ts +22 -9
- package/dist/types/llm/anthropic/types.d.ts +5 -1
- package/dist/types/llm/anthropic/utils/message_outputs.d.ts +13 -6
- package/dist/types/llm/anthropic/utils/output_parsers.d.ts +1 -1
- package/dist/types/llm/openai/index.d.ts +21 -24
- package/dist/types/llm/openrouter/index.d.ts +11 -9
- package/dist/types/llm/vertexai/index.d.ts +1 -0
- package/dist/types/messages/cache.d.ts +4 -1
- package/dist/types/messages/format.d.ts +4 -1
- package/dist/types/messages/langchain.d.ts +27 -0
- package/dist/types/run.d.ts +117 -1
- package/dist/types/tools/ToolNode.d.ts +26 -1
- package/dist/types/tools/search/tavily-scraper.d.ts +19 -0
- package/dist/types/tools/search/tavily-search.d.ts +4 -0
- package/dist/types/tools/search/types.d.ts +99 -5
- package/dist/types/tools/search/utils.d.ts +2 -2
- package/dist/types/types/graph.d.ts +23 -37
- package/dist/types/types/hitl.d.ts +272 -0
- package/dist/types/types/index.d.ts +1 -0
- package/dist/types/types/llm.d.ts +3 -3
- package/dist/types/types/run.d.ts +33 -0
- package/dist/types/types/stream.d.ts +1 -1
- package/dist/types/types/tools.d.ts +19 -0
- package/package.json +80 -17
- package/src/graphs/Graph.ts +33 -4
- package/src/graphs/__tests__/composition.smoke.test.ts +188 -0
- package/src/hitl/askUserQuestion.ts +72 -0
- package/src/hitl/index.ts +7 -0
- package/src/hooks/HookRegistry.ts +71 -0
- package/src/hooks/__tests__/createToolPolicyHook.test.ts +259 -0
- package/src/hooks/createToolPolicyHook.ts +184 -0
- package/src/hooks/executeHooks.ts +50 -1
- package/src/hooks/index.ts +6 -0
- package/src/hooks/types.ts +112 -0
- package/src/index.ts +22 -0
- package/src/langchain/google-common.ts +1 -0
- package/src/langchain/index.ts +8 -0
- package/src/langchain/language_models/chat_models.ts +1 -0
- package/src/langchain/messages/tool.ts +5 -0
- package/src/langchain/messages.ts +21 -0
- package/src/langchain/openai.ts +1 -0
- package/src/langchain/prompts.ts +1 -0
- package/src/langchain/runnables.ts +7 -0
- package/src/langchain/tools.ts +8 -0
- package/src/langchain/utils/env.ts +1 -0
- package/src/llm/anthropic/index.ts +252 -84
- package/src/llm/anthropic/llm.spec.ts +751 -102
- package/src/llm/anthropic/types.ts +9 -1
- package/src/llm/anthropic/utils/message_inputs.ts +37 -19
- package/src/llm/anthropic/utils/message_outputs.ts +119 -101
- package/src/llm/bedrock/index.ts +2 -2
- package/src/llm/bedrock/llm.spec.ts +341 -0
- package/src/llm/bedrock/utils/message_inputs.ts +303 -4
- package/src/llm/bedrock/utils/message_outputs.ts +2 -1
- package/src/llm/custom-chat-models.smoke.test.ts +836 -0
- package/src/llm/google/llm.spec.ts +339 -57
- package/src/llm/google/utils/common.ts +53 -48
- package/src/llm/openai/contentBlocks.test.ts +346 -0
- package/src/llm/openai/index.ts +856 -833
- package/src/llm/openai/utils/index.ts +107 -78
- package/src/llm/openai/utils/messages.test.ts +159 -0
- package/src/llm/openrouter/index.ts +124 -247
- package/src/llm/openrouter/reasoning.test.ts +8 -1
- package/src/llm/vertexai/index.ts +11 -5
- package/src/llm/vertexai/llm.spec.ts +28 -1
- package/src/messages/cache.test.ts +4 -3
- package/src/messages/cache.ts +3 -2
- package/src/messages/core.ts +16 -9
- package/src/messages/format.ts +96 -16
- package/src/messages/formatAgentMessages.test.ts +166 -1
- package/src/messages/langchain.ts +39 -0
- package/src/messages/prune.ts +12 -8
- package/src/run.ts +456 -47
- package/src/scripts/caching.ts +2 -3
- package/src/specs/summarization.test.ts +51 -58
- package/src/tools/ToolNode.ts +706 -63
- package/src/tools/__tests__/hitl.test.ts +3593 -0
- package/src/tools/search/search.ts +83 -73
- package/src/tools/search/tavily-scraper.ts +235 -0
- package/src/tools/search/tavily-search.ts +424 -0
- package/src/tools/search/tavily.test.ts +965 -0
- package/src/tools/search/tool.ts +36 -26
- package/src/tools/search/types.ts +133 -8
- package/src/tools/search/utils.ts +13 -5
- package/src/types/graph.ts +32 -87
- package/src/types/hitl.ts +303 -0
- package/src/types/index.ts +1 -0
- package/src/types/llm.ts +3 -3
- package/src/types/run.ts +33 -0
- package/src/types/stream.ts +1 -1
- package/src/types/tools.ts +19 -0
- package/src/utils/llmConfig.ts +1 -6
|
@@ -2,6 +2,7 @@ import axios from 'axios';
|
|
|
2
2
|
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
|
|
3
3
|
import type * as t from './types';
|
|
4
4
|
import { getAttribution, createDefaultLogger } from './utils';
|
|
5
|
+
import { createTavilyAPI } from './tavily-search';
|
|
5
6
|
import { BaseReranker } from './rerankers';
|
|
6
7
|
|
|
7
8
|
const chunker = {
|
|
@@ -418,15 +419,20 @@ export const createSearchAPI = (
|
|
|
418
419
|
serperApiKey,
|
|
419
420
|
searxngInstanceUrl,
|
|
420
421
|
searxngApiKey,
|
|
422
|
+
tavilyApiKey,
|
|
423
|
+
tavilySearchUrl,
|
|
424
|
+
tavilySearchOptions,
|
|
421
425
|
} = config;
|
|
422
426
|
|
|
423
427
|
if (searchProvider.toLowerCase() === 'serper') {
|
|
424
428
|
return createSerperAPI(serperApiKey);
|
|
425
429
|
} else if (searchProvider.toLowerCase() === 'searxng') {
|
|
426
430
|
return createSearXNGAPI(searxngInstanceUrl, searxngApiKey);
|
|
431
|
+
} else if (searchProvider.toLowerCase() === 'tavily') {
|
|
432
|
+
return createTavilyAPI(tavilyApiKey, tavilySearchUrl, tavilySearchOptions);
|
|
427
433
|
} else {
|
|
428
434
|
throw new Error(
|
|
429
|
-
`Invalid search provider: ${searchProvider}. Must be 'serper' or '
|
|
435
|
+
`Invalid search provider: ${searchProvider}. Must be 'serper', 'searxng', or 'tavily'`
|
|
430
436
|
);
|
|
431
437
|
}
|
|
432
438
|
};
|
|
@@ -454,6 +460,56 @@ export const createSourceProcessor = (
|
|
|
454
460
|
const logger_ = logger || createDefaultLogger();
|
|
455
461
|
const scraper = scraperInstance;
|
|
456
462
|
|
|
463
|
+
const processResponse = (
|
|
464
|
+
url: string,
|
|
465
|
+
response: t.AnyScraperResponse
|
|
466
|
+
): t.ScrapeResult => {
|
|
467
|
+
const rawMetadata = scraper.extractMetadata(response);
|
|
468
|
+
const metadata =
|
|
469
|
+
Object.keys(rawMetadata).length > 0 ? rawMetadata : undefined;
|
|
470
|
+
const attribution = getAttribution(url, metadata, logger_);
|
|
471
|
+
|
|
472
|
+
if (response.success && response.data) {
|
|
473
|
+
const [content, references] = scraper.extractContent(response);
|
|
474
|
+
return {
|
|
475
|
+
url,
|
|
476
|
+
references,
|
|
477
|
+
attribution,
|
|
478
|
+
content: chunker.cleanText(content),
|
|
479
|
+
};
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
logger_.error(
|
|
483
|
+
`Error scraping ${url}: ${response.error ?? 'Unknown error'}`
|
|
484
|
+
);
|
|
485
|
+
return { url, attribution, error: true, content: '' };
|
|
486
|
+
};
|
|
487
|
+
|
|
488
|
+
const addHighlights = async (
|
|
489
|
+
result: t.ScrapeResult,
|
|
490
|
+
query: string,
|
|
491
|
+
onGetHighlights: t.SearchToolConfig['onGetHighlights']
|
|
492
|
+
): Promise<t.ScrapeResult> => {
|
|
493
|
+
if (result.error != null) {
|
|
494
|
+
return result;
|
|
495
|
+
}
|
|
496
|
+
try {
|
|
497
|
+
const highlights = await getHighlights({
|
|
498
|
+
query,
|
|
499
|
+
reranker,
|
|
500
|
+
content: result.content,
|
|
501
|
+
logger: logger_,
|
|
502
|
+
});
|
|
503
|
+
if (onGetHighlights) {
|
|
504
|
+
onGetHighlights(result.url);
|
|
505
|
+
}
|
|
506
|
+
return { ...result, highlights };
|
|
507
|
+
} catch (error) {
|
|
508
|
+
logger_.error('Error processing scraped content:', error);
|
|
509
|
+
return result;
|
|
510
|
+
}
|
|
511
|
+
};
|
|
512
|
+
|
|
457
513
|
const webScraper = {
|
|
458
514
|
scrapeMany: async ({
|
|
459
515
|
query,
|
|
@@ -465,80 +521,34 @@ export const createSourceProcessor = (
|
|
|
465
521
|
onGetHighlights: t.SearchToolConfig['onGetHighlights'];
|
|
466
522
|
}): Promise<Array<t.ScrapeResult>> => {
|
|
467
523
|
logger_.debug(`Scraping ${links.length} links`);
|
|
468
|
-
const promises: Array<Promise<t.ScrapeResult>> = [];
|
|
469
524
|
try {
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
attribution,
|
|
486
|
-
content: chunker.cleanText(content),
|
|
487
|
-
} as t.ScrapeResult;
|
|
488
|
-
} else {
|
|
489
|
-
logger_.error(
|
|
490
|
-
`Error scraping ${url}: ${response.error ?? 'Unknown error'}`
|
|
491
|
-
);
|
|
492
|
-
}
|
|
493
|
-
|
|
494
|
-
return {
|
|
495
|
-
url,
|
|
496
|
-
attribution,
|
|
497
|
-
error: true,
|
|
498
|
-
content: '',
|
|
499
|
-
} as t.ScrapeResult;
|
|
500
|
-
})
|
|
501
|
-
.then(async (result) => {
|
|
502
|
-
try {
|
|
503
|
-
if (result.error != null) {
|
|
504
|
-
logger_.error(
|
|
505
|
-
`Error scraping ${result.url}: ${result.content}`
|
|
506
|
-
);
|
|
507
|
-
return {
|
|
508
|
-
...result,
|
|
509
|
-
};
|
|
510
|
-
}
|
|
511
|
-
const highlights = await getHighlights({
|
|
512
|
-
query,
|
|
513
|
-
reranker,
|
|
514
|
-
content: result.content,
|
|
515
|
-
logger: logger_,
|
|
516
|
-
});
|
|
517
|
-
if (onGetHighlights) {
|
|
518
|
-
onGetHighlights(result.url);
|
|
519
|
-
}
|
|
520
|
-
return {
|
|
521
|
-
...result,
|
|
522
|
-
highlights,
|
|
523
|
-
};
|
|
524
|
-
} catch (error) {
|
|
525
|
-
logger_.error('Error processing scraped content:', error);
|
|
526
|
-
return {
|
|
527
|
-
...result,
|
|
528
|
-
};
|
|
529
|
-
}
|
|
530
|
-
})
|
|
531
|
-
.catch((error) => {
|
|
532
|
-
logger_.error(`Error scraping ${currentLink}:`, error);
|
|
533
|
-
return {
|
|
534
|
-
url: currentLink,
|
|
535
|
-
error: true,
|
|
536
|
-
content: '',
|
|
537
|
-
};
|
|
538
|
-
});
|
|
539
|
-
promises.push(promise);
|
|
525
|
+
let responses: Array<[string, t.AnyScraperResponse]>;
|
|
526
|
+
|
|
527
|
+
if (scraper.scrapeUrls) {
|
|
528
|
+
responses = await scraper.scrapeUrls(links);
|
|
529
|
+
} else {
|
|
530
|
+
responses = await Promise.all(
|
|
531
|
+
links.map((link) =>
|
|
532
|
+
scraper
|
|
533
|
+
.scrapeUrl(link, {})
|
|
534
|
+
.catch((error): [string, t.AnyScraperResponse] => {
|
|
535
|
+
logger_.error(`Error scraping ${link}:`, error);
|
|
536
|
+
return [link, { success: false, error: String(error) }];
|
|
537
|
+
})
|
|
538
|
+
)
|
|
539
|
+
);
|
|
540
540
|
}
|
|
541
|
-
|
|
541
|
+
|
|
542
|
+
const withHighlights = await Promise.all(
|
|
543
|
+
responses.map(([url, response]) =>
|
|
544
|
+
addHighlights(
|
|
545
|
+
processResponse(url, response),
|
|
546
|
+
query,
|
|
547
|
+
onGetHighlights
|
|
548
|
+
)
|
|
549
|
+
)
|
|
550
|
+
);
|
|
551
|
+
return withHighlights;
|
|
542
552
|
} catch (error) {
|
|
543
553
|
logger_.error('Error in scrapeMany:', error);
|
|
544
554
|
return [];
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import type * as t from './types';
|
|
3
|
+
import { createDefaultLogger } from './utils';
|
|
4
|
+
|
|
5
|
+
const DEFAULT_BASIC_TIMEOUT = 15000;
|
|
6
|
+
const DEFAULT_ADVANCED_TIMEOUT = 30000;
|
|
7
|
+
const MAX_BATCH_SIZE = 20;
|
|
8
|
+
|
|
9
|
+
const getDefaultTimeout = (extractDepth: 'basic' | 'advanced'): number =>
|
|
10
|
+
extractDepth === 'advanced'
|
|
11
|
+
? DEFAULT_ADVANCED_TIMEOUT
|
|
12
|
+
: DEFAULT_BASIC_TIMEOUT;
|
|
13
|
+
|
|
14
|
+
const normalizeUrlKey = (url: string): string => {
|
|
15
|
+
try {
|
|
16
|
+
const parsedUrl = new URL(url);
|
|
17
|
+
parsedUrl.hash = '';
|
|
18
|
+
if (parsedUrl.pathname.length > 1) {
|
|
19
|
+
parsedUrl.pathname = parsedUrl.pathname.replace(/\/+$/, '');
|
|
20
|
+
}
|
|
21
|
+
return parsedUrl.toString();
|
|
22
|
+
} catch {
|
|
23
|
+
return url;
|
|
24
|
+
}
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
const setUrlResult = (
|
|
28
|
+
map: Map<string, t.TavilyExtractResult>,
|
|
29
|
+
result: t.TavilyExtractResult
|
|
30
|
+
): void => {
|
|
31
|
+
map.set(result.url, result);
|
|
32
|
+
const normalizedUrl = normalizeUrlKey(result.url);
|
|
33
|
+
if (!map.has(normalizedUrl)) {
|
|
34
|
+
map.set(normalizedUrl, result);
|
|
35
|
+
}
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
export class TavilyScraper implements t.BaseScraper {
|
|
39
|
+
private apiKey: string;
|
|
40
|
+
private apiUrl: string;
|
|
41
|
+
private timeout: number;
|
|
42
|
+
private payloadTimeout: number | undefined;
|
|
43
|
+
private logger: t.Logger;
|
|
44
|
+
private extractDepth: 'basic' | 'advanced';
|
|
45
|
+
private includeImages: boolean;
|
|
46
|
+
private includeFavicon: boolean;
|
|
47
|
+
private format: 'markdown' | 'text' | undefined;
|
|
48
|
+
|
|
49
|
+
constructor(config: t.TavilyScraperConfig = {}) {
|
|
50
|
+
this.apiKey = config.apiKey ?? process.env.TAVILY_API_KEY ?? '';
|
|
51
|
+
this.apiUrl =
|
|
52
|
+
config.apiUrl ??
|
|
53
|
+
process.env.TAVILY_EXTRACT_URL ??
|
|
54
|
+
'https://api.tavily.com/extract';
|
|
55
|
+
this.payloadTimeout = config.timeout;
|
|
56
|
+
this.extractDepth = config.extractDepth ?? 'basic';
|
|
57
|
+
this.timeout = config.timeout ?? getDefaultTimeout(this.extractDepth);
|
|
58
|
+
this.includeImages = config.includeImages ?? false;
|
|
59
|
+
this.includeFavicon = config.includeFavicon ?? false;
|
|
60
|
+
this.format = config.format;
|
|
61
|
+
this.logger = config.logger || createDefaultLogger();
|
|
62
|
+
|
|
63
|
+
if (!this.apiKey) {
|
|
64
|
+
this.logger.warn('TAVILY_API_KEY is not set. Scraping will not work.');
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
async scrapeUrl(
|
|
69
|
+
url: string,
|
|
70
|
+
options: t.TavilyScrapeOptions = {}
|
|
71
|
+
): Promise<[string, t.TavilyScrapeResponse]> {
|
|
72
|
+
const results = await this.scrapeUrls([url], options);
|
|
73
|
+
return results[0];
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
async scrapeUrls(
|
|
77
|
+
urls: string[],
|
|
78
|
+
options: t.TavilyScrapeOptions = {}
|
|
79
|
+
): Promise<Array<[string, t.TavilyScrapeResponse]>> {
|
|
80
|
+
if (!this.apiKey) {
|
|
81
|
+
return urls.map((url) => [
|
|
82
|
+
url,
|
|
83
|
+
{ success: false, error: 'TAVILY_API_KEY is not set' },
|
|
84
|
+
]);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const batches: string[][] = [];
|
|
88
|
+
for (let i = 0; i < urls.length; i += MAX_BATCH_SIZE) {
|
|
89
|
+
batches.push(urls.slice(i, i + MAX_BATCH_SIZE));
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const allResults: Array<[string, t.TavilyScrapeResponse]> = [];
|
|
93
|
+
|
|
94
|
+
for (const batch of batches) {
|
|
95
|
+
const batchResults = await this.extractBatch(batch, options);
|
|
96
|
+
allResults.push(...batchResults);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
return allResults;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
private async extractBatch(
|
|
103
|
+
urls: string[],
|
|
104
|
+
options: t.TavilyScrapeOptions = {}
|
|
105
|
+
): Promise<Array<[string, t.TavilyScrapeResponse]>> {
|
|
106
|
+
try {
|
|
107
|
+
const includeFavicon = options.includeFavicon ?? this.includeFavicon;
|
|
108
|
+
const format = options.format ?? this.format;
|
|
109
|
+
const extractDepth = options.extractDepth ?? this.extractDepth;
|
|
110
|
+
const payload: t.TavilyExtractPayload = {
|
|
111
|
+
urls,
|
|
112
|
+
extract_depth: extractDepth,
|
|
113
|
+
include_images: options.includeImages ?? this.includeImages,
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
if (includeFavicon) {
|
|
117
|
+
payload.include_favicon = true;
|
|
118
|
+
}
|
|
119
|
+
if (format != null) {
|
|
120
|
+
payload.format = format;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
const effectiveTimeout =
|
|
124
|
+
options.timeout ??
|
|
125
|
+
this.payloadTimeout ??
|
|
126
|
+
(options.extractDepth != null
|
|
127
|
+
? getDefaultTimeout(extractDepth)
|
|
128
|
+
: this.timeout);
|
|
129
|
+
const payloadTimeout = options.timeout ?? this.payloadTimeout;
|
|
130
|
+
if (payloadTimeout != null) {
|
|
131
|
+
payload.timeout = Math.min(Math.max(payloadTimeout / 1000, 1), 60);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const response = await axios.post<{
|
|
135
|
+
results?: t.TavilyExtractResult[];
|
|
136
|
+
failed_results?: t.TavilyExtractResult[];
|
|
137
|
+
}>(this.apiUrl, payload, {
|
|
138
|
+
headers: {
|
|
139
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
140
|
+
'Content-Type': 'application/json',
|
|
141
|
+
},
|
|
142
|
+
timeout: effectiveTimeout,
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
const data = response.data;
|
|
146
|
+
const successMap = new Map<string, t.TavilyExtractResult>();
|
|
147
|
+
const failedMap = new Map<string, t.TavilyExtractResult>();
|
|
148
|
+
|
|
149
|
+
for (const result of data.results ?? []) {
|
|
150
|
+
setUrlResult(successMap, result);
|
|
151
|
+
}
|
|
152
|
+
for (const result of data.failed_results ?? []) {
|
|
153
|
+
setUrlResult(failedMap, result);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
return urls.map((url): [string, t.TavilyScrapeResponse] => {
|
|
157
|
+
const success =
|
|
158
|
+
successMap.get(url) ?? successMap.get(normalizeUrlKey(url));
|
|
159
|
+
if (success && success.error == null) {
|
|
160
|
+
return [
|
|
161
|
+
url,
|
|
162
|
+
{
|
|
163
|
+
success: true,
|
|
164
|
+
data: {
|
|
165
|
+
rawContent: success.raw_content ?? '',
|
|
166
|
+
images: success.images ?? [],
|
|
167
|
+
favicon: success.favicon,
|
|
168
|
+
},
|
|
169
|
+
},
|
|
170
|
+
];
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
const failed =
|
|
174
|
+
failedMap.get(url) ?? failedMap.get(normalizeUrlKey(url));
|
|
175
|
+
const error =
|
|
176
|
+
success?.error ??
|
|
177
|
+
failed?.error ??
|
|
178
|
+
'URL not found in Tavily Extract response';
|
|
179
|
+
return [url, { success: false, error }];
|
|
180
|
+
});
|
|
181
|
+
} catch (error) {
|
|
182
|
+
const errorMessage =
|
|
183
|
+
error instanceof Error ? error.message : String(error);
|
|
184
|
+
return urls.map((url) => [
|
|
185
|
+
url,
|
|
186
|
+
{
|
|
187
|
+
success: false,
|
|
188
|
+
error: `Tavily Extract API request failed: ${errorMessage}`,
|
|
189
|
+
},
|
|
190
|
+
]);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
extractContent(
|
|
195
|
+
response: t.TavilyScrapeResponse
|
|
196
|
+
): [string, undefined | t.References] {
|
|
197
|
+
if (!response.success || !response.data) {
|
|
198
|
+
return ['', undefined];
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
const content = response.data.rawContent ?? '';
|
|
202
|
+
const images = response.data.images ?? [];
|
|
203
|
+
|
|
204
|
+
const references: t.References | undefined =
|
|
205
|
+
images.length > 0
|
|
206
|
+
? {
|
|
207
|
+
links: [],
|
|
208
|
+
images: images.map((imageUrl) => ({ originalUrl: imageUrl })),
|
|
209
|
+
videos: [],
|
|
210
|
+
}
|
|
211
|
+
: undefined;
|
|
212
|
+
|
|
213
|
+
return [content, references];
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
extractMetadata(response: t.TavilyScrapeResponse): t.GenericScrapeMetadata {
|
|
217
|
+
if (!response.success || !response.data) {
|
|
218
|
+
return {};
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
const metadata: t.GenericScrapeMetadata = {
|
|
222
|
+
images_count: response.data.images?.length ?? 0,
|
|
223
|
+
};
|
|
224
|
+
if (response.data.favicon != null) {
|
|
225
|
+
metadata.favicon = response.data.favicon;
|
|
226
|
+
}
|
|
227
|
+
return metadata;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
export const createTavilyScraper = (
|
|
232
|
+
config: t.TavilyScraperConfig = {}
|
|
233
|
+
): TavilyScraper => {
|
|
234
|
+
return new TavilyScraper(config);
|
|
235
|
+
};
|