@librechat/agents 2.4.83 → 2.4.84
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/tools/search/firecrawl.cjs +3 -1
- package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
- package/dist/cjs/tools/search/search.cjs +5 -5
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/serper-scraper.cjs +132 -0
- package/dist/cjs/tools/search/serper-scraper.cjs.map +1 -0
- package/dist/cjs/tools/search/tool.cjs +45 -9
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/esm/tools/search/firecrawl.mjs +3 -1
- package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
- package/dist/esm/tools/search/search.mjs +5 -5
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/serper-scraper.mjs +129 -0
- package/dist/esm/tools/search/serper-scraper.mjs.map +1 -0
- package/dist/esm/tools/search/tool.mjs +45 -9
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/types/tools/search/firecrawl.d.ts +2 -1
- package/dist/types/tools/search/search.d.ts +1 -2
- package/dist/types/tools/search/serper-scraper.d.ts +59 -0
- package/dist/types/tools/search/tool.d.ts +21 -0
- package/dist/types/tools/search/types.d.ts +30 -1
- package/package.json +1 -1
- package/src/scripts/search.ts +5 -1
- package/src/tools/search/firecrawl.ts +5 -2
- package/src/tools/search/search.ts +6 -8
- package/src/tools/search/serper-scraper.ts +155 -0
- package/src/tools/search/tool.ts +47 -8
- package/src/tools/search/types.ts +45 -0
|
@@ -2,6 +2,7 @@ import { z } from 'zod';
|
|
|
2
2
|
import { tool } from '@langchain/core/tools';
|
|
3
3
|
import { newsSchema, videosSchema, imagesSchema, dateSchema, querySchema, countrySchema } from './schema.mjs';
|
|
4
4
|
import { createSearchAPI, createSourceProcessor } from './search.mjs';
|
|
5
|
+
import { createSerperScraper } from './serper-scraper.mjs';
|
|
5
6
|
import { createFirecrawlScraper } from './firecrawl.mjs';
|
|
6
7
|
import { expandHighlights } from './highlights.mjs';
|
|
7
8
|
import { formatResultsForLLM } from './format.mjs';
|
|
@@ -230,11 +231,32 @@ Use anchor marker(s) immediately after the statement:
|
|
|
230
231
|
* Creates a search tool with a schema that dynamically includes the country field
|
|
231
232
|
* only when the searchProvider is 'serper'.
|
|
232
233
|
*
|
|
234
|
+
* Supports multiple scraper providers:
|
|
235
|
+
* - Firecrawl (default): Full-featured web scraping with multiple formats
|
|
236
|
+
* - Serper: Lightweight scraping using Serper's scrape API
|
|
237
|
+
*
|
|
238
|
+
* @example
|
|
239
|
+
* ```typescript
|
|
240
|
+
* // Using Firecrawl scraper (default)
|
|
241
|
+
* const searchTool = createSearchTool({
|
|
242
|
+
* searchProvider: 'serper',
|
|
243
|
+
* scraperProvider: 'firecrawl',
|
|
244
|
+
* firecrawlApiKey: 'your-firecrawl-key'
|
|
245
|
+
* });
|
|
246
|
+
*
|
|
247
|
+
* // Using Serper scraper
|
|
248
|
+
* const searchTool = createSearchTool({
|
|
249
|
+
* searchProvider: 'serper',
|
|
250
|
+
* scraperProvider: 'serper',
|
|
251
|
+
* serperApiKey: 'your-serper-key'
|
|
252
|
+
* });
|
|
253
|
+
* ```
|
|
254
|
+
*
|
|
233
255
|
* @param config - The search tool configuration
|
|
234
256
|
* @returns A DynamicStructuredTool with a schema that depends on the searchProvider
|
|
235
257
|
*/
|
|
236
258
|
const createSearchTool = (config = {}) => {
|
|
237
|
-
const { searchProvider = 'serper', serperApiKey, searxngInstanceUrl, searxngApiKey, rerankerType = 'cohere', topResults = 5, strategies = ['no_extraction'], filterContent = true, safeSearch = 1, firecrawlApiKey, firecrawlApiUrl, firecrawlOptions, scraperTimeout, jinaApiKey, jinaApiUrl, cohereApiKey, onSearchResults: _onSearchResults, onGetHighlights, } = config;
|
|
259
|
+
const { searchProvider = 'serper', serperApiKey, searxngInstanceUrl, searxngApiKey, rerankerType = 'cohere', topResults = 5, strategies = ['no_extraction'], filterContent = true, safeSearch = 1, scraperProvider = 'firecrawl', firecrawlApiKey, firecrawlApiUrl, firecrawlVersion, firecrawlOptions, serperScraperOptions, scraperTimeout, jinaApiKey, jinaApiUrl, cohereApiKey, onSearchResults: _onSearchResults, onGetHighlights, } = config;
|
|
238
260
|
const logger = config.logger || createDefaultLogger();
|
|
239
261
|
const schemaObject = {
|
|
240
262
|
query: querySchema,
|
|
@@ -253,13 +275,27 @@ const createSearchTool = (config = {}) => {
|
|
|
253
275
|
searxngInstanceUrl,
|
|
254
276
|
searxngApiKey,
|
|
255
277
|
});
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
278
|
+
/** Create scraper based on scraperProvider */
|
|
279
|
+
let scraperInstance;
|
|
280
|
+
if (scraperProvider === 'serper') {
|
|
281
|
+
scraperInstance = createSerperScraper({
|
|
282
|
+
...serperScraperOptions,
|
|
283
|
+
apiKey: serperApiKey,
|
|
284
|
+
timeout: scraperTimeout ?? serperScraperOptions?.timeout,
|
|
285
|
+
logger,
|
|
286
|
+
});
|
|
287
|
+
}
|
|
288
|
+
else {
|
|
289
|
+
scraperInstance = createFirecrawlScraper({
|
|
290
|
+
...firecrawlOptions,
|
|
291
|
+
apiKey: firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY,
|
|
292
|
+
apiUrl: firecrawlApiUrl,
|
|
293
|
+
version: firecrawlVersion,
|
|
294
|
+
timeout: scraperTimeout ?? firecrawlOptions?.timeout,
|
|
295
|
+
formats: firecrawlOptions?.formats ?? ['markdown', 'rawHtml'],
|
|
296
|
+
logger,
|
|
297
|
+
});
|
|
298
|
+
}
|
|
263
299
|
const selectedReranker = createReranker({
|
|
264
300
|
rerankerType,
|
|
265
301
|
jinaApiKey,
|
|
@@ -274,7 +310,7 @@ const createSearchTool = (config = {}) => {
|
|
|
274
310
|
reranker: selectedReranker,
|
|
275
311
|
topResults,
|
|
276
312
|
logger,
|
|
277
|
-
},
|
|
313
|
+
}, scraperInstance);
|
|
278
314
|
const search = createSearchProcessor({
|
|
279
315
|
searchAPI,
|
|
280
316
|
safeSearch,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tool.mjs","sources":["../../../../src/tools/search/tool.ts"],"sourcesContent":["import { z } from 'zod';\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\nimport type { RunnableConfig } from '@langchain/core/runnables';\nimport type * as t from './types';\nimport {\n DATE_RANGE,\n querySchema,\n dateSchema,\n countrySchema,\n imagesSchema,\n videosSchema,\n newsSchema,\n} from './schema';\nimport { createSearchAPI, createSourceProcessor } from './search';\nimport { createFirecrawlScraper } from './firecrawl';\nimport { expandHighlights } from './highlights';\nimport { formatResultsForLLM } from './format';\nimport { createDefaultLogger } from './utils';\nimport { createReranker } from './rerankers';\nimport { Constants } from '@/common';\n\n/**\n * Executes parallel searches and merges the results\n */\nasync function executeParallelSearches({\n searchAPI,\n query,\n date,\n country,\n safeSearch,\n images,\n videos,\n news,\n logger,\n}: {\n searchAPI: ReturnType<typeof createSearchAPI>;\n query: string;\n date?: DATE_RANGE;\n country?: string;\n safeSearch: t.SearchToolConfig['safeSearch'];\n images: boolean;\n videos: boolean;\n news: boolean;\n logger: t.Logger;\n}): Promise<t.SearchResult> {\n // Prepare all search tasks to run in parallel\n const searchTasks: Promise<t.SearchResult>[] = [\n // Main search\n searchAPI.getSources({\n query,\n date,\n country,\n safeSearch,\n }),\n ];\n\n if (images) {\n searchTasks.push(\n searchAPI\n .getSources({\n query,\n date,\n country,\n safeSearch,\n type: 'images',\n })\n .catch((error) => {\n logger.error('Error fetching images:', error);\n return {\n success: false,\n error: `Images search failed: ${error instanceof Error ? error.message : String(error)}`,\n };\n })\n );\n }\n if (videos) {\n searchTasks.push(\n searchAPI\n .getSources({\n query,\n date,\n country,\n safeSearch,\n type: 'videos',\n })\n .catch((error) => {\n logger.error('Error fetching videos:', error);\n return {\n success: false,\n error: `Videos search failed: ${error instanceof Error ? error.message : String(error)}`,\n };\n })\n );\n }\n if (news) {\n searchTasks.push(\n searchAPI\n .getSources({\n query,\n date,\n country,\n safeSearch,\n type: 'news',\n })\n .catch((error) => {\n logger.error('Error fetching news:', error);\n return {\n success: false,\n error: `News search failed: ${error instanceof Error ? error.message : String(error)}`,\n };\n })\n );\n }\n\n // Run all searches in parallel\n const results = await Promise.all(searchTasks);\n\n // Get the main search result (first result)\n const mainResult = results[0];\n if (!mainResult.success) {\n throw new Error(mainResult.error ?? 'Search failed');\n }\n\n // Merge additional results with the main results\n const mergedResults = { ...mainResult.data };\n\n // Convert existing news to topStories if present\n if (mergedResults.news !== undefined && mergedResults.news.length > 0) {\n const existingNewsAsTopStories = mergedResults.news\n .filter((newsItem) => newsItem.link !== undefined && newsItem.link !== '')\n .map((newsItem) => ({\n title: newsItem.title ?? '',\n link: newsItem.link ?? '',\n source: newsItem.source ?? '',\n date: newsItem.date ?? '',\n imageUrl: newsItem.imageUrl ?? '',\n processed: false,\n }));\n mergedResults.topStories = [\n ...(mergedResults.topStories ?? []),\n ...existingNewsAsTopStories,\n ];\n delete mergedResults.news;\n }\n\n results.slice(1).forEach((result) => {\n if (result.success && result.data !== undefined) {\n if (result.data.images !== undefined && result.data.images.length > 0) {\n mergedResults.images = [\n ...(mergedResults.images ?? []),\n ...result.data.images,\n ];\n }\n if (result.data.videos !== undefined && result.data.videos.length > 0) {\n mergedResults.videos = [\n ...(mergedResults.videos ?? []),\n ...result.data.videos,\n ];\n }\n if (result.data.news !== undefined && result.data.news.length > 0) {\n const newsAsTopStories = result.data.news.map((newsItem) => ({\n ...newsItem,\n link: newsItem.link ?? '',\n }));\n mergedResults.topStories = [\n ...(mergedResults.topStories ?? []),\n ...newsAsTopStories,\n ];\n }\n }\n });\n\n return { success: true, data: mergedResults };\n}\n\nfunction createSearchProcessor({\n searchAPI,\n safeSearch,\n sourceProcessor,\n onGetHighlights,\n logger,\n}: {\n safeSearch: t.SearchToolConfig['safeSearch'];\n searchAPI: ReturnType<typeof createSearchAPI>;\n sourceProcessor: ReturnType<typeof createSourceProcessor>;\n onGetHighlights: t.SearchToolConfig['onGetHighlights'];\n logger: t.Logger;\n}) {\n return async function ({\n query,\n date,\n country,\n proMode = true,\n maxSources = 5,\n onSearchResults,\n images = false,\n videos = false,\n news = false,\n }: {\n query: string;\n country?: string;\n date?: DATE_RANGE;\n proMode?: boolean;\n maxSources?: number;\n onSearchResults: t.SearchToolConfig['onSearchResults'];\n images?: boolean;\n videos?: boolean;\n news?: boolean;\n }): Promise<t.SearchResultData> {\n try {\n // Execute parallel searches and merge results\n const searchResult = await executeParallelSearches({\n searchAPI,\n query,\n date,\n country,\n safeSearch,\n images,\n videos,\n news,\n logger,\n });\n\n onSearchResults?.(searchResult);\n\n const processedSources = await sourceProcessor.processSources({\n query,\n news,\n result: searchResult,\n proMode,\n onGetHighlights,\n numElements: maxSources,\n });\n\n return expandHighlights(processedSources);\n } catch (error) {\n logger.error('Error in search:', error);\n return {\n organic: [],\n topStories: [],\n images: [],\n videos: [],\n news: [],\n relatedSearches: [],\n error: error instanceof Error ? error.message : String(error),\n };\n }\n };\n}\n\nfunction createOnSearchResults({\n runnableConfig,\n onSearchResults,\n}: {\n runnableConfig: RunnableConfig;\n onSearchResults: t.SearchToolConfig['onSearchResults'];\n}) {\n return function (results: t.SearchResult): void {\n if (!onSearchResults) {\n return;\n }\n onSearchResults(results, runnableConfig);\n };\n}\n\nfunction createTool({\n schema,\n search,\n onSearchResults: _onSearchResults,\n}: {\n schema: t.SearchToolSchema;\n search: ReturnType<typeof createSearchProcessor>;\n onSearchResults: t.SearchToolConfig['onSearchResults'];\n}): DynamicStructuredTool<typeof schema> {\n return tool<typeof schema>(\n async (params, runnableConfig) => {\n const { query, date, country: _c, images, videos, news } = params;\n const country = typeof _c === 'string' && _c ? _c : undefined;\n const searchResult = await search({\n query,\n date,\n country,\n images,\n videos,\n news,\n onSearchResults: createOnSearchResults({\n runnableConfig,\n onSearchResults: _onSearchResults,\n }),\n });\n const turn = runnableConfig.toolCall?.turn ?? 0;\n const { output, references } = formatResultsForLLM(turn, searchResult);\n const data: t.SearchResultData = { turn, ...searchResult, references };\n return [output, { [Constants.WEB_SEARCH]: data }];\n },\n {\n name: Constants.WEB_SEARCH,\n description: `Real-time search. Results have required citation anchors.\n\nNote: Use ONCE per reply unless instructed otherwise.\n\nAnchors:\n- \\\\ue202turnXtypeY\n- X = turn idx, type = 'search' | 'news' | 'image' | 'ref', Y = item idx\n\nSpecial Markers:\n- \\\\ue203...\\\\ue204 — highlight start/end of cited text (for Standalone or Group citations)\n- \\\\ue200...\\\\ue201 — group block (e.g. \\\\ue200\\\\ue202turn0search1\\\\ue202turn0news2\\\\ue201)\n\n**CITE EVERY NON-OBVIOUS FACT/QUOTE:**\nUse anchor marker(s) immediately after the statement:\n- Standalone: \"Pure functions produce same output. \\\\ue202turn0search0\"\n- Standalone (multiple): \"Today's News \\\\ue202turn0search0\\\\ue202turn0news0\"\n- Highlight: \"\\\\ue203Highlight text.\\\\ue204\\\\ue202turn0news1\"\n- Group: \"Sources. \\\\ue200\\\\ue202turn0search0\\\\ue202turn0news1\\\\ue201\"\n- Group Highlight: \"\\\\ue203Highlight for group.\\\\ue204 \\\\ue200\\\\ue202turn0search0\\\\ue202turn0news1\\\\ue201\"\n- Image: \"See photo \\\\ue202turn0image0.\"\n\n**NEVER use markdown links, [1], or footnotes. CITE ONLY with anchors provided.**\n`.trim(),\n schema: schema,\n responseFormat: Constants.CONTENT_AND_ARTIFACT,\n }\n );\n}\n\n/**\n * Creates a search tool with a schema that dynamically includes the country field\n * only when the searchProvider is 'serper'.\n *\n * @param config - The search tool configuration\n * @returns A DynamicStructuredTool with a schema that depends on the searchProvider\n */\nexport const createSearchTool = (\n config: t.SearchToolConfig = {}\n): DynamicStructuredTool<typeof toolSchema> => {\n const {\n searchProvider = 'serper',\n serperApiKey,\n searxngInstanceUrl,\n searxngApiKey,\n rerankerType = 'cohere',\n topResults = 5,\n strategies = ['no_extraction'],\n filterContent = true,\n safeSearch = 1,\n firecrawlApiKey,\n firecrawlApiUrl,\n firecrawlOptions,\n scraperTimeout,\n jinaApiKey,\n jinaApiUrl,\n cohereApiKey,\n onSearchResults: _onSearchResults,\n onGetHighlights,\n } = config;\n\n const logger = config.logger || createDefaultLogger();\n\n const schemaObject: {\n query: z.ZodString;\n date: z.ZodOptional<z.ZodNativeEnum<typeof DATE_RANGE>>;\n country?: z.ZodOptional<z.ZodString>;\n images: z.ZodOptional<z.ZodBoolean>;\n videos: z.ZodOptional<z.ZodBoolean>;\n news: z.ZodOptional<z.ZodBoolean>;\n } = {\n query: querySchema,\n date: dateSchema,\n images: imagesSchema,\n videos: videosSchema,\n news: newsSchema,\n };\n\n if (searchProvider === 'serper') {\n schemaObject.country = countrySchema;\n }\n\n const toolSchema = z.object(schemaObject);\n\n const searchAPI = createSearchAPI({\n searchProvider,\n serperApiKey,\n searxngInstanceUrl,\n searxngApiKey,\n });\n\n const firecrawlScraper = createFirecrawlScraper({\n ...firecrawlOptions,\n apiKey: firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY,\n apiUrl: firecrawlApiUrl,\n timeout: scraperTimeout ?? firecrawlOptions?.timeout,\n formats: firecrawlOptions?.formats ?? ['markdown', 'rawHtml'],\n });\n\n const selectedReranker = createReranker({\n rerankerType,\n jinaApiKey,\n jinaApiUrl,\n cohereApiKey,\n logger,\n });\n\n if (!selectedReranker) {\n logger.warn('No reranker selected. Using default ranking.');\n }\n\n const sourceProcessor = createSourceProcessor(\n {\n reranker: selectedReranker,\n topResults,\n strategies,\n filterContent,\n logger,\n },\n firecrawlScraper\n );\n\n const search = createSearchProcessor({\n searchAPI,\n safeSearch,\n sourceProcessor,\n onGetHighlights,\n logger,\n });\n\n return createTool({\n search,\n schema: toolSchema,\n onSearchResults: _onSearchResults,\n });\n};\n"],"names":[],"mappings":";;;;;;;;;;;AAqBA;;AAEG;AACH,eAAe,uBAAuB,CAAC,EACrC,SAAS,EACT,KAAK,EACL,IAAI,EACJ,OAAO,EACP,UAAU,EACV,MAAM,EACN,MAAM,EACN,IAAI,EACJ,MAAM,GAWP,EAAA;;AAEC,IAAA,MAAM,WAAW,GAA8B;;QAE7C,SAAS,CAAC,UAAU,CAAC;YACnB,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;SACX,CAAC;KACH;IAED,IAAI,MAAM,EAAE;QACV,WAAW,CAAC,IAAI,CACd;AACG,aAAA,UAAU,CAAC;YACV,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;AACV,YAAA,IAAI,EAAE,QAAQ;SACf;AACA,aAAA,KAAK,CAAC,CAAC,KAAK,KAAI;AACf,YAAA,MAAM,CAAC,KAAK,CAAC,wBAAwB,EAAE,KAAK,CAAC;YAC7C,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;AACd,gBAAA,KAAK,EAAE,CAAyB,sBAAA,EAAA,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAE,CAAA;aACzF;SACF,CAAC,CACL;;IAEH,IAAI,MAAM,EAAE;QACV,WAAW,CAAC,IAAI,CACd;AACG,aAAA,UAAU,CAAC;YACV,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;AACV,YAAA,IAAI,EAAE,QAAQ;SACf;AACA,aAAA,KAAK,CAAC,CAAC,KAAK,KAAI;AACf,YAAA,MAAM,CAAC,KAAK,CAAC,wBAAwB,EAAE,KAAK,CAAC;YAC7C,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;AACd,gBAAA,KAAK,EAAE,CAAyB,sBAAA,EAAA,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAE,CAAA;aACzF;SACF,CAAC,CACL;;IAEH,IAAI,IAAI,EAAE;QACR,WAAW,CAAC,IAAI,CACd;AACG,aAAA,UAAU,CAAC;YACV,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;AACV,YAAA,IAAI,EAAE,MAAM;SACb;AACA,aAAA,KAAK,CAAC,CAAC,KAAK,KAAI;AACf,YAAA,MAAM,CAAC,KAAK,CAAC,sBAAsB,EAAE,KAAK,CAAC;YAC3C,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;AACd,gBAAA,KAAK,EAAE,CAAuB,oBAAA,EAAA,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAE,CAAA;aACvF;SACF,CAAC,CACL;;;IAIH,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;;AAG9C,IAAA,MAAM,UAAU,GAAG,OAAO,CAAC,CAAC,CAAC;AAC7B,IAAA,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE;QACvB,MAAM,IAAI,KAAK,CAAC,UAAU,CAAC,KAAK,IAAI,eAAe,CAAC;;;IAItD,MAAM,aAAa,GAAG,EAAE,GAAG,UAAU,CAAC,IAAI,EAAE;;AAG5C,IAAA,IAAI,aAAa,CAAC,IAAI,KAAK,SAAS,IAAI,aAAa,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;AACrE,QAAA,MAAM,wBAAwB,GAAG,aAAa,CAAC;AAC5C,aAAA,MAAM,CAAC,CAAC,QAAQ,KAAK,QAAQ,CAAC,IAAI,KAAK,SAAS,IAAI,QAAQ,CAAC,IAAI,KAAK,EAAE;AACxE,aAAA,GAAG,CAAC,CAAC,QAAQ,MAAM;AAClB,YAAA,KAAK,EAAE,QAAQ,CAAC,KAAK,IAAI,EAAE;AAC3B,YAAA,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;AACzB,YAAA,MAAM,EAAE,QAAQ,CAAC,MAAM,IAAI,EAAE;AAC7B,YAAA,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;AACzB,YAAA,QAAQ,EAAE,QAAQ,CAAC,QAAQ,IAAI,EAAE;AACjC,YAAA,SAAS,EAAE,KAAK;AACjB,SAAA,CAAC,CAAC;QACL,aAAa,CAAC,UAAU,GAAG;AACzB,YAAA,IAAI,aAAa,CAAC,UAAU,IAAI,EAAE,CAAC;AACnC,YAAA,GAAG,wBAAwB;SAC5B;QACD,OAAO,aAAa,CAAC,IAAI;;IAG3B,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,KAAI;QAClC,IAAI,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,IAAI,KAAK,SAAS,EAAE;AAC/C,YAAA,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE;gBACrE,aAAa,CAAC,MAAM,GAAG;AACrB,oBAAA,IAAI,aAAa,CAAC,MAAM,IAAI,EAAE,CAAC;AAC/B,oBAAA,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM;iBACtB;;AAEH,YAAA,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE;gBACrE,aAAa,CAAC,MAAM,GAAG;AACrB,oBAAA,IAAI,aAAa,CAAC,MAAM,IAAI,EAAE,CAAC;AAC/B,oBAAA,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM;iBACtB;;AAEH,YAAA,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;AACjE,gBAAA,MAAM,gBAAgB,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,QAAQ,MAAM;AAC3D,oBAAA,GAAG,QAAQ;AACX,oBAAA,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;AAC1B,iBAAA,CAAC,CAAC;gBACH,aAAa,CAAC,UAAU,GAAG;AACzB,oBAAA,IAAI,aAAa,CAAC,UAAU,IAAI,EAAE,CAAC;AACnC,oBAAA,GAAG,gBAAgB;iBACpB;;;AAGP,KAAC,CAAC;IAEF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,aAAa,EAAE;AAC/C;AAEA,SAAS,qBAAqB,CAAC,EAC7B,SAAS,EACT,UAAU,EACV,eAAe,EACf,eAAe,EACf,MAAM,GAOP,EAAA;AACC,IAAA,OAAO,gBAAgB,EACrB,KAAK,EACL,IAAI,EACJ,OAAO,EACP,OAAO,GAAG,IAAI,EACd,UAAU,GAAG,CAAC,EACd,eAAe,EACf,MAAM,GAAG,KAAK,EACd,MAAM,GAAG,KAAK,EACd,IAAI,GAAG,KAAK,GAWb,EAAA;AACC,QAAA,IAAI;;AAEF,YAAA,MAAM,YAAY,GAAG,MAAM,uBAAuB,CAAC;gBACjD,SAAS;gBACT,KAAK;gBACL,IAAI;gBACJ,OAAO;gBACP,UAAU;gBACV,MAAM;gBACN,MAAM;gBACN,IAAI;gBACJ,MAAM;AACP,aAAA,CAAC;AAEF,YAAA,eAAe,GAAG,YAAY,CAAC;AAE/B,YAAA,MAAM,gBAAgB,GAAG,MAAM,eAAe,CAAC,cAAc,CAAC;gBAC5D,KAAK;gBACL,IAAI;AACJ,gBAAA,MAAM,EAAE,YAAY;gBACpB,OAAO;gBACP,eAAe;AACf,gBAAA,WAAW,EAAE,UAAU;AACxB,aAAA,CAAC;AAEF,YAAA,OAAO,gBAAgB,CAAC,gBAAgB,CAAC;;QACzC,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,CAAC,KAAK,CAAC,kBAAkB,EAAE,KAAK,CAAC;YACvC,OAAO;AACL,gBAAA,OAAO,EAAE,EAAE;AACX,gBAAA,UAAU,EAAE,EAAE;AACd,gBAAA,MAAM,EAAE,EAAE;AACV,gBAAA,MAAM,EAAE,EAAE;AACV,gBAAA,IAAI,EAAE,EAAE;AACR,gBAAA,eAAe,EAAE,EAAE;AACnB,gBAAA,KAAK,EAAE,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;aAC9D;;AAEL,KAAC;AACH;AAEA,SAAS,qBAAqB,CAAC,EAC7B,cAAc,EACd,eAAe,GAIhB,EAAA;AACC,IAAA,OAAO,UAAU,OAAuB,EAAA;QACtC,IAAI,CAAC,eAAe,EAAE;YACpB;;AAEF,QAAA,eAAe,CAAC,OAAO,EAAE,cAAc,CAAC;AAC1C,KAAC;AACH;AAEA,SAAS,UAAU,CAAC,EAClB,MAAM,EACN,MAAM,EACN,eAAe,EAAE,gBAAgB,GAKlC,EAAA;IACC,OAAO,IAAI,CACT,OAAO,MAAM,EAAE,cAAc,KAAI;AAC/B,QAAA,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM;AACjE,QAAA,MAAM,OAAO,GAAG,OAAO,EAAE,KAAK,QAAQ,IAAI,EAAE,GAAG,EAAE,GAAG,SAAS;AAC7D,QAAA,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC;YAChC,KAAK;YACL,IAAI;YACJ,OAAO;YACP,MAAM;YACN,MAAM;YACN,IAAI;YACJ,eAAe,EAAE,qBAAqB,CAAC;gBACrC,cAAc;AACd,gBAAA,eAAe,EAAE,gBAAgB;aAClC,CAAC;AACH,SAAA,CAAC;QACF,MAAM,IAAI,GAAG,cAAc,CAAC,QAAQ,EAAE,IAAI,IAAI,CAAC;AAC/C,QAAA,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,mBAAmB,CAAC,IAAI,EAAE,YAAY,CAAC;QACtE,MAAM,IAAI,GAAuB,EAAE,IAAI,EAAE,GAAG,YAAY,EAAE,UAAU,EAAE;AACtE,QAAA,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,SAAS,CAAC,UAAU,GAAG,IAAI,EAAE,CAAC;AACnD,KAAC,EACD;QACE,IAAI,EAAE,SAAS,CAAC,UAAU;AAC1B,QAAA,WAAW,EAAE,CAAA;;;;;;;;;;;;;;;;;;;;;;AAsBlB,CAAA,CAAC,IAAI,EAAE;AACF,QAAA,MAAM,EAAE,MAAM;QACd,cAAc,EAAE,SAAS,CAAC,oBAAoB;AAC/C,KAAA,CACF;AACH;AAEA;;;;;;AAMG;MACU,gBAAgB,GAAG,CAC9B,MAA6B,GAAA,EAAE,KACa;IAC5C,MAAM,EACJ,cAAc,GAAG,QAAQ,EACzB,YAAY,EACZ,kBAAkB,EAClB,aAAa,EACb,YAAY,GAAG,QAAQ,EACvB,UAAU,GAAG,CAAC,EACd,UAAU,GAAG,CAAC,eAAe,CAAC,EAC9B,aAAa,GAAG,IAAI,EACpB,UAAU,GAAG,CAAC,EACd,eAAe,EACf,eAAe,EACf,gBAAgB,EAChB,cAAc,EACd,UAAU,EACV,UAAU,EACV,YAAY,EACZ,eAAe,EAAE,gBAAgB,EACjC,eAAe,GAChB,GAAG,MAAM;IAEV,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,mBAAmB,EAAE;AAErD,IAAA,MAAM,YAAY,GAOd;AACF,QAAA,KAAK,EAAE,WAAW;AAClB,QAAA,IAAI,EAAE,UAAU;AAChB,QAAA,MAAM,EAAE,YAAY;AACpB,QAAA,MAAM,EAAE,YAAY;AACpB,QAAA,IAAI,EAAE,UAAU;KACjB;AAED,IAAA,IAAI,cAAc,KAAK,QAAQ,EAAE;AAC/B,QAAA,YAAY,CAAC,OAAO,GAAG,aAAa;;IAGtC,MAAM,UAAU,GAAG,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC;IAEzC,MAAM,SAAS,GAAG,eAAe,CAAC;QAChC,cAAc;QACd,YAAY;QACZ,kBAAkB;QAClB,aAAa;AACd,KAAA,CAAC;IAEF,MAAM,gBAAgB,GAAG,sBAAsB,CAAC;AAC9C,QAAA,GAAG,gBAAgB;AACnB,QAAA,MAAM,EAAE,eAAe,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB;AACxD,QAAA,MAAM,EAAE,eAAe;AACvB,QAAA,OAAO,EAAE,cAAc,IAAI,gBAAgB,EAAE,OAAO;QACpD,OAAO,EAAE,gBAAgB,EAAE,OAAO,IAAI,CAAC,UAAU,EAAE,SAAS,CAAC;AAC9D,KAAA,CAAC;IAEF,MAAM,gBAAgB,GAAG,cAAc,CAAC;QACtC,YAAY;QACZ,UAAU;QACV,UAAU;QACV,YAAY;QACZ,MAAM;AACP,KAAA,CAAC;IAEF,IAAI,CAAC,gBAAgB,EAAE;AACrB,QAAA,MAAM,CAAC,IAAI,CAAC,8CAA8C,CAAC;;IAG7D,MAAM,eAAe,GAAG,qBAAqB,CAC3C;AACE,QAAA,QAAQ,EAAE,gBAAgB;QAC1B,UAAU;QAGV,MAAM;KACP,EACD,gBAAgB,CACjB;IAED,MAAM,MAAM,GAAG,qBAAqB,CAAC;QACnC,SAAS;QACT,UAAU;QACV,eAAe;QACf,eAAe;QACf,MAAM;AACP,KAAA,CAAC;AAEF,IAAA,OAAO,UAAU,CAAC;QAChB,MAAM;AACN,QAAA,MAAM,EAAE,UAAU;AAClB,QAAA,eAAe,EAAE,gBAAgB;AAClC,KAAA,CAAC;AACJ;;;;"}
|
|
1
|
+
{"version":3,"file":"tool.mjs","sources":["../../../../src/tools/search/tool.ts"],"sourcesContent":["import { z } from 'zod';\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\nimport type { RunnableConfig } from '@langchain/core/runnables';\nimport type * as t from './types';\nimport {\n DATE_RANGE,\n querySchema,\n dateSchema,\n countrySchema,\n imagesSchema,\n videosSchema,\n newsSchema,\n} from './schema';\nimport { createSearchAPI, createSourceProcessor } from './search';\nimport { createSerperScraper } from './serper-scraper';\nimport { createFirecrawlScraper } from './firecrawl';\nimport { expandHighlights } from './highlights';\nimport { formatResultsForLLM } from './format';\nimport { createDefaultLogger } from './utils';\nimport { createReranker } from './rerankers';\nimport { Constants } from '@/common';\n\n/**\n * Executes parallel searches and merges the results\n */\nasync function executeParallelSearches({\n searchAPI,\n query,\n date,\n country,\n safeSearch,\n images,\n videos,\n news,\n logger,\n}: {\n searchAPI: ReturnType<typeof createSearchAPI>;\n query: string;\n date?: DATE_RANGE;\n country?: string;\n safeSearch: t.SearchToolConfig['safeSearch'];\n images: boolean;\n videos: boolean;\n news: boolean;\n logger: t.Logger;\n}): Promise<t.SearchResult> {\n // Prepare all search tasks to run in parallel\n const searchTasks: Promise<t.SearchResult>[] = [\n // Main search\n searchAPI.getSources({\n query,\n date,\n country,\n safeSearch,\n }),\n ];\n\n if (images) {\n searchTasks.push(\n searchAPI\n .getSources({\n query,\n date,\n country,\n safeSearch,\n type: 'images',\n })\n .catch((error) => {\n logger.error('Error fetching images:', error);\n return {\n success: false,\n error: `Images search failed: ${error instanceof Error ? error.message : String(error)}`,\n };\n })\n );\n }\n if (videos) {\n searchTasks.push(\n searchAPI\n .getSources({\n query,\n date,\n country,\n safeSearch,\n type: 'videos',\n })\n .catch((error) => {\n logger.error('Error fetching videos:', error);\n return {\n success: false,\n error: `Videos search failed: ${error instanceof Error ? error.message : String(error)}`,\n };\n })\n );\n }\n if (news) {\n searchTasks.push(\n searchAPI\n .getSources({\n query,\n date,\n country,\n safeSearch,\n type: 'news',\n })\n .catch((error) => {\n logger.error('Error fetching news:', error);\n return {\n success: false,\n error: `News search failed: ${error instanceof Error ? error.message : String(error)}`,\n };\n })\n );\n }\n\n // Run all searches in parallel\n const results = await Promise.all(searchTasks);\n\n // Get the main search result (first result)\n const mainResult = results[0];\n if (!mainResult.success) {\n throw new Error(mainResult.error ?? 'Search failed');\n }\n\n // Merge additional results with the main results\n const mergedResults = { ...mainResult.data };\n\n // Convert existing news to topStories if present\n if (mergedResults.news !== undefined && mergedResults.news.length > 0) {\n const existingNewsAsTopStories = mergedResults.news\n .filter((newsItem) => newsItem.link !== undefined && newsItem.link !== '')\n .map((newsItem) => ({\n title: newsItem.title ?? '',\n link: newsItem.link ?? '',\n source: newsItem.source ?? '',\n date: newsItem.date ?? '',\n imageUrl: newsItem.imageUrl ?? '',\n processed: false,\n }));\n mergedResults.topStories = [\n ...(mergedResults.topStories ?? []),\n ...existingNewsAsTopStories,\n ];\n delete mergedResults.news;\n }\n\n results.slice(1).forEach((result) => {\n if (result.success && result.data !== undefined) {\n if (result.data.images !== undefined && result.data.images.length > 0) {\n mergedResults.images = [\n ...(mergedResults.images ?? []),\n ...result.data.images,\n ];\n }\n if (result.data.videos !== undefined && result.data.videos.length > 0) {\n mergedResults.videos = [\n ...(mergedResults.videos ?? []),\n ...result.data.videos,\n ];\n }\n if (result.data.news !== undefined && result.data.news.length > 0) {\n const newsAsTopStories = result.data.news.map((newsItem) => ({\n ...newsItem,\n link: newsItem.link ?? '',\n }));\n mergedResults.topStories = [\n ...(mergedResults.topStories ?? []),\n ...newsAsTopStories,\n ];\n }\n }\n });\n\n return { success: true, data: mergedResults };\n}\n\nfunction createSearchProcessor({\n searchAPI,\n safeSearch,\n sourceProcessor,\n onGetHighlights,\n logger,\n}: {\n safeSearch: t.SearchToolConfig['safeSearch'];\n searchAPI: ReturnType<typeof createSearchAPI>;\n sourceProcessor: ReturnType<typeof createSourceProcessor>;\n onGetHighlights: t.SearchToolConfig['onGetHighlights'];\n logger: t.Logger;\n}) {\n return async function ({\n query,\n date,\n country,\n proMode = true,\n maxSources = 5,\n onSearchResults,\n images = false,\n videos = false,\n news = false,\n }: {\n query: string;\n country?: string;\n date?: DATE_RANGE;\n proMode?: boolean;\n maxSources?: number;\n onSearchResults: t.SearchToolConfig['onSearchResults'];\n images?: boolean;\n videos?: boolean;\n news?: boolean;\n }): Promise<t.SearchResultData> {\n try {\n // Execute parallel searches and merge results\n const searchResult = await executeParallelSearches({\n searchAPI,\n query,\n date,\n country,\n safeSearch,\n images,\n videos,\n news,\n logger,\n });\n\n onSearchResults?.(searchResult);\n\n const processedSources = await sourceProcessor.processSources({\n query,\n news,\n result: searchResult,\n proMode,\n onGetHighlights,\n numElements: maxSources,\n });\n\n return expandHighlights(processedSources);\n } catch (error) {\n logger.error('Error in search:', error);\n return {\n organic: [],\n topStories: [],\n images: [],\n videos: [],\n news: [],\n relatedSearches: [],\n error: error instanceof Error ? error.message : String(error),\n };\n }\n };\n}\n\nfunction createOnSearchResults({\n runnableConfig,\n onSearchResults,\n}: {\n runnableConfig: RunnableConfig;\n onSearchResults: t.SearchToolConfig['onSearchResults'];\n}) {\n return function (results: t.SearchResult): void {\n if (!onSearchResults) {\n return;\n }\n onSearchResults(results, runnableConfig);\n };\n}\n\nfunction createTool({\n schema,\n search,\n onSearchResults: _onSearchResults,\n}: {\n schema: t.SearchToolSchema;\n search: ReturnType<typeof createSearchProcessor>;\n onSearchResults: t.SearchToolConfig['onSearchResults'];\n}): DynamicStructuredTool<typeof schema> {\n return tool<typeof schema>(\n async (params, runnableConfig) => {\n const { query, date, country: _c, images, videos, news } = params;\n const country = typeof _c === 'string' && _c ? _c : undefined;\n const searchResult = await search({\n query,\n date,\n country,\n images,\n videos,\n news,\n onSearchResults: createOnSearchResults({\n runnableConfig,\n onSearchResults: _onSearchResults,\n }),\n });\n const turn = runnableConfig.toolCall?.turn ?? 0;\n const { output, references } = formatResultsForLLM(turn, searchResult);\n const data: t.SearchResultData = { turn, ...searchResult, references };\n return [output, { [Constants.WEB_SEARCH]: data }];\n },\n {\n name: Constants.WEB_SEARCH,\n description: `Real-time search. Results have required citation anchors.\n\nNote: Use ONCE per reply unless instructed otherwise.\n\nAnchors:\n- \\\\ue202turnXtypeY\n- X = turn idx, type = 'search' | 'news' | 'image' | 'ref', Y = item idx\n\nSpecial Markers:\n- \\\\ue203...\\\\ue204 — highlight start/end of cited text (for Standalone or Group citations)\n- \\\\ue200...\\\\ue201 — group block (e.g. \\\\ue200\\\\ue202turn0search1\\\\ue202turn0news2\\\\ue201)\n\n**CITE EVERY NON-OBVIOUS FACT/QUOTE:**\nUse anchor marker(s) immediately after the statement:\n- Standalone: \"Pure functions produce same output. \\\\ue202turn0search0\"\n- Standalone (multiple): \"Today's News \\\\ue202turn0search0\\\\ue202turn0news0\"\n- Highlight: \"\\\\ue203Highlight text.\\\\ue204\\\\ue202turn0news1\"\n- Group: \"Sources. \\\\ue200\\\\ue202turn0search0\\\\ue202turn0news1\\\\ue201\"\n- Group Highlight: \"\\\\ue203Highlight for group.\\\\ue204 \\\\ue200\\\\ue202turn0search0\\\\ue202turn0news1\\\\ue201\"\n- Image: \"See photo \\\\ue202turn0image0.\"\n\n**NEVER use markdown links, [1], or footnotes. CITE ONLY with anchors provided.**\n`.trim(),\n schema: schema,\n responseFormat: Constants.CONTENT_AND_ARTIFACT,\n }\n );\n}\n\n/**\n * Creates a search tool with a schema that dynamically includes the country field\n * only when the searchProvider is 'serper'.\n *\n * Supports multiple scraper providers:\n * - Firecrawl (default): Full-featured web scraping with multiple formats\n * - Serper: Lightweight scraping using Serper's scrape API\n *\n * @example\n * ```typescript\n * // Using Firecrawl scraper (default)\n * const searchTool = createSearchTool({\n * searchProvider: 'serper',\n * scraperProvider: 'firecrawl',\n * firecrawlApiKey: 'your-firecrawl-key'\n * });\n *\n * // Using Serper scraper\n * const searchTool = createSearchTool({\n * searchProvider: 'serper',\n * scraperProvider: 'serper',\n * serperApiKey: 'your-serper-key'\n * });\n * ```\n *\n * @param config - The search tool configuration\n * @returns A DynamicStructuredTool with a schema that depends on the searchProvider\n */\nexport const createSearchTool = (\n config: t.SearchToolConfig = {}\n): DynamicStructuredTool<typeof toolSchema> => {\n const {\n searchProvider = 'serper',\n serperApiKey,\n searxngInstanceUrl,\n searxngApiKey,\n rerankerType = 'cohere',\n topResults = 5,\n strategies = ['no_extraction'],\n filterContent = true,\n safeSearch = 1,\n scraperProvider = 'firecrawl',\n firecrawlApiKey,\n firecrawlApiUrl,\n firecrawlVersion,\n firecrawlOptions,\n serperScraperOptions,\n scraperTimeout,\n jinaApiKey,\n jinaApiUrl,\n cohereApiKey,\n onSearchResults: _onSearchResults,\n onGetHighlights,\n } = config;\n\n const logger = config.logger || createDefaultLogger();\n\n const schemaObject: {\n query: z.ZodString;\n date: z.ZodOptional<z.ZodNativeEnum<typeof DATE_RANGE>>;\n country?: z.ZodOptional<z.ZodString>;\n images: z.ZodOptional<z.ZodBoolean>;\n videos: z.ZodOptional<z.ZodBoolean>;\n news: z.ZodOptional<z.ZodBoolean>;\n } = {\n query: querySchema,\n date: dateSchema,\n images: imagesSchema,\n videos: videosSchema,\n news: newsSchema,\n };\n\n if (searchProvider === 'serper') {\n schemaObject.country = countrySchema;\n }\n\n const toolSchema = z.object(schemaObject);\n\n const searchAPI = createSearchAPI({\n searchProvider,\n serperApiKey,\n searxngInstanceUrl,\n searxngApiKey,\n });\n\n /** Create scraper based on scraperProvider */\n let scraperInstance: t.BaseScraper;\n\n if (scraperProvider === 'serper') {\n scraperInstance = createSerperScraper({\n ...serperScraperOptions,\n apiKey: serperApiKey,\n timeout: scraperTimeout ?? serperScraperOptions?.timeout,\n logger,\n });\n } else {\n scraperInstance = createFirecrawlScraper({\n ...firecrawlOptions,\n apiKey: firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY,\n apiUrl: firecrawlApiUrl,\n version: firecrawlVersion,\n timeout: scraperTimeout ?? firecrawlOptions?.timeout,\n formats: firecrawlOptions?.formats ?? ['markdown', 'rawHtml'],\n logger,\n });\n }\n\n const selectedReranker = createReranker({\n rerankerType,\n jinaApiKey,\n jinaApiUrl,\n cohereApiKey,\n logger,\n });\n\n if (!selectedReranker) {\n logger.warn('No reranker selected. Using default ranking.');\n }\n\n const sourceProcessor = createSourceProcessor(\n {\n reranker: selectedReranker,\n topResults,\n strategies,\n filterContent,\n logger,\n },\n scraperInstance\n );\n\n const search = createSearchProcessor({\n searchAPI,\n safeSearch,\n sourceProcessor,\n onGetHighlights,\n logger,\n });\n\n return createTool({\n search,\n schema: toolSchema,\n onSearchResults: _onSearchResults,\n });\n};\n"],"names":[],"mappings":";;;;;;;;;;;;AAsBA;;AAEG;AACH,eAAe,uBAAuB,CAAC,EACrC,SAAS,EACT,KAAK,EACL,IAAI,EACJ,OAAO,EACP,UAAU,EACV,MAAM,EACN,MAAM,EACN,IAAI,EACJ,MAAM,GAWP,EAAA;;AAEC,IAAA,MAAM,WAAW,GAA8B;;QAE7C,SAAS,CAAC,UAAU,CAAC;YACnB,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;SACX,CAAC;KACH;IAED,IAAI,MAAM,EAAE;QACV,WAAW,CAAC,IAAI,CACd;AACG,aAAA,UAAU,CAAC;YACV,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;AACV,YAAA,IAAI,EAAE,QAAQ;SACf;AACA,aAAA,KAAK,CAAC,CAAC,KAAK,KAAI;AACf,YAAA,MAAM,CAAC,KAAK,CAAC,wBAAwB,EAAE,KAAK,CAAC;YAC7C,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;AACd,gBAAA,KAAK,EAAE,CAAyB,sBAAA,EAAA,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAE,CAAA;aACzF;SACF,CAAC,CACL;;IAEH,IAAI,MAAM,EAAE;QACV,WAAW,CAAC,IAAI,CACd;AACG,aAAA,UAAU,CAAC;YACV,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;AACV,YAAA,IAAI,EAAE,QAAQ;SACf;AACA,aAAA,KAAK,CAAC,CAAC,KAAK,KAAI;AACf,YAAA,MAAM,CAAC,KAAK,CAAC,wBAAwB,EAAE,KAAK,CAAC;YAC7C,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;AACd,gBAAA,KAAK,EAAE,CAAyB,sBAAA,EAAA,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAE,CAAA;aACzF;SACF,CAAC,CACL;;IAEH,IAAI,IAAI,EAAE;QACR,WAAW,CAAC,IAAI,CACd;AACG,aAAA,UAAU,CAAC;YACV,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;AACV,YAAA,IAAI,EAAE,MAAM;SACb;AACA,aAAA,KAAK,CAAC,CAAC,KAAK,KAAI;AACf,YAAA,MAAM,CAAC,KAAK,CAAC,sBAAsB,EAAE,KAAK,CAAC;YAC3C,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;AACd,gBAAA,KAAK,EAAE,CAAuB,oBAAA,EAAA,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAE,CAAA;aACvF;SACF,CAAC,CACL;;;IAIH,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;;AAG9C,IAAA,MAAM,UAAU,GAAG,OAAO,CAAC,CAAC,CAAC;AAC7B,IAAA,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE;QACvB,MAAM,IAAI,KAAK,CAAC,UAAU,CAAC,KAAK,IAAI,eAAe,CAAC;;;IAItD,MAAM,aAAa,GAAG,EAAE,GAAG,UAAU,CAAC,IAAI,EAAE;;AAG5C,IAAA,IAAI,aAAa,CAAC,IAAI,KAAK,SAAS,IAAI,aAAa,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;AACrE,QAAA,MAAM,wBAAwB,GAAG,aAAa,CAAC;AAC5C,aAAA,MAAM,CAAC,CAAC,QAAQ,KAAK,QAAQ,CAAC,IAAI,KAAK,SAAS,IAAI,QAAQ,CAAC,IAAI,KAAK,EAAE;AACxE,aAAA,GAAG,CAAC,CAAC,QAAQ,MAAM;AAClB,YAAA,KAAK,EAAE,QAAQ,CAAC,KAAK,IAAI,EAAE;AAC3B,YAAA,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;AACzB,YAAA,MAAM,EAAE,QAAQ,CAAC,MAAM,IAAI,EAAE;AAC7B,YAAA,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;AACzB,YAAA,QAAQ,EAAE,QAAQ,CAAC,QAAQ,IAAI,EAAE;AACjC,YAAA,SAAS,EAAE,KAAK;AACjB,SAAA,CAAC,CAAC;QACL,aAAa,CAAC,UAAU,GAAG;AACzB,YAAA,IAAI,aAAa,CAAC,UAAU,IAAI,EAAE,CAAC;AACnC,YAAA,GAAG,wBAAwB;SAC5B;QACD,OAAO,aAAa,CAAC,IAAI;;IAG3B,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,KAAI;QAClC,IAAI,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,IAAI,KAAK,SAAS,EAAE;AAC/C,YAAA,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE;gBACrE,aAAa,CAAC,MAAM,GAAG;AACrB,oBAAA,IAAI,aAAa,CAAC,MAAM,IAAI,EAAE,CAAC;AAC/B,oBAAA,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM;iBACtB;;AAEH,YAAA,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE;gBACrE,aAAa,CAAC,MAAM,GAAG;AACrB,oBAAA,IAAI,aAAa,CAAC,MAAM,IAAI,EAAE,CAAC;AAC/B,oBAAA,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM;iBACtB;;AAEH,YAAA,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;AACjE,gBAAA,MAAM,gBAAgB,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,QAAQ,MAAM;AAC3D,oBAAA,GAAG,QAAQ;AACX,oBAAA,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;AAC1B,iBAAA,CAAC,CAAC;gBACH,aAAa,CAAC,UAAU,GAAG;AACzB,oBAAA,IAAI,aAAa,CAAC,UAAU,IAAI,EAAE,CAAC;AACnC,oBAAA,GAAG,gBAAgB;iBACpB;;;AAGP,KAAC,CAAC;IAEF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,aAAa,EAAE;AAC/C;AAEA,SAAS,qBAAqB,CAAC,EAC7B,SAAS,EACT,UAAU,EACV,eAAe,EACf,eAAe,EACf,MAAM,GAOP,EAAA;AACC,IAAA,OAAO,gBAAgB,EACrB,KAAK,EACL,IAAI,EACJ,OAAO,EACP,OAAO,GAAG,IAAI,EACd,UAAU,GAAG,CAAC,EACd,eAAe,EACf,MAAM,GAAG,KAAK,EACd,MAAM,GAAG,KAAK,EACd,IAAI,GAAG,KAAK,GAWb,EAAA;AACC,QAAA,IAAI;;AAEF,YAAA,MAAM,YAAY,GAAG,MAAM,uBAAuB,CAAC;gBACjD,SAAS;gBACT,KAAK;gBACL,IAAI;gBACJ,OAAO;gBACP,UAAU;gBACV,MAAM;gBACN,MAAM;gBACN,IAAI;gBACJ,MAAM;AACP,aAAA,CAAC;AAEF,YAAA,eAAe,GAAG,YAAY,CAAC;AAE/B,YAAA,MAAM,gBAAgB,GAAG,MAAM,eAAe,CAAC,cAAc,CAAC;gBAC5D,KAAK;gBACL,IAAI;AACJ,gBAAA,MAAM,EAAE,YAAY;gBACpB,OAAO;gBACP,eAAe;AACf,gBAAA,WAAW,EAAE,UAAU;AACxB,aAAA,CAAC;AAEF,YAAA,OAAO,gBAAgB,CAAC,gBAAgB,CAAC;;QACzC,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,CAAC,KAAK,CAAC,kBAAkB,EAAE,KAAK,CAAC;YACvC,OAAO;AACL,gBAAA,OAAO,EAAE,EAAE;AACX,gBAAA,UAAU,EAAE,EAAE;AACd,gBAAA,MAAM,EAAE,EAAE;AACV,gBAAA,MAAM,EAAE,EAAE;AACV,gBAAA,IAAI,EAAE,EAAE;AACR,gBAAA,eAAe,EAAE,EAAE;AACnB,gBAAA,KAAK,EAAE,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;aAC9D;;AAEL,KAAC;AACH;AAEA,SAAS,qBAAqB,CAAC,EAC7B,cAAc,EACd,eAAe,GAIhB,EAAA;AACC,IAAA,OAAO,UAAU,OAAuB,EAAA;QACtC,IAAI,CAAC,eAAe,EAAE;YACpB;;AAEF,QAAA,eAAe,CAAC,OAAO,EAAE,cAAc,CAAC;AAC1C,KAAC;AACH;AAEA,SAAS,UAAU,CAAC,EAClB,MAAM,EACN,MAAM,EACN,eAAe,EAAE,gBAAgB,GAKlC,EAAA;IACC,OAAO,IAAI,CACT,OAAO,MAAM,EAAE,cAAc,KAAI;AAC/B,QAAA,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM;AACjE,QAAA,MAAM,OAAO,GAAG,OAAO,EAAE,KAAK,QAAQ,IAAI,EAAE,GAAG,EAAE,GAAG,SAAS;AAC7D,QAAA,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC;YAChC,KAAK;YACL,IAAI;YACJ,OAAO;YACP,MAAM;YACN,MAAM;YACN,IAAI;YACJ,eAAe,EAAE,qBAAqB,CAAC;gBACrC,cAAc;AACd,gBAAA,eAAe,EAAE,gBAAgB;aAClC,CAAC;AACH,SAAA,CAAC;QACF,MAAM,IAAI,GAAG,cAAc,CAAC,QAAQ,EAAE,IAAI,IAAI,CAAC;AAC/C,QAAA,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,mBAAmB,CAAC,IAAI,EAAE,YAAY,CAAC;QACtE,MAAM,IAAI,GAAuB,EAAE,IAAI,EAAE,GAAG,YAAY,EAAE,UAAU,EAAE;AACtE,QAAA,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,SAAS,CAAC,UAAU,GAAG,IAAI,EAAE,CAAC;AACnD,KAAC,EACD;QACE,IAAI,EAAE,SAAS,CAAC,UAAU;AAC1B,QAAA,WAAW,EAAE,CAAA;;;;;;;;;;;;;;;;;;;;;;AAsBlB,CAAA,CAAC,IAAI,EAAE;AACF,QAAA,MAAM,EAAE,MAAM;QACd,cAAc,EAAE,SAAS,CAAC,oBAAoB;AAC/C,KAAA,CACF;AACH;AAEA;;;;;;;;;;;;;;;;;;;;;;;;;;;AA2BG;MACU,gBAAgB,GAAG,CAC9B,MAA6B,GAAA,EAAE,KACa;AAC5C,IAAA,MAAM,EACJ,cAAc,GAAG,QAAQ,EACzB,YAAY,EACZ,kBAAkB,EAClB,aAAa,EACb,YAAY,GAAG,QAAQ,EACvB,UAAU,GAAG,CAAC,EACd,UAAU,GAAG,CAAC,eAAe,CAAC,EAC9B,aAAa,GAAG,IAAI,EACpB,UAAU,GAAG,CAAC,EACd,eAAe,GAAG,WAAW,EAC7B,eAAe,EACf,eAAe,EACf,gBAAgB,EAChB,gBAAgB,EAChB,oBAAoB,EACpB,cAAc,EACd,UAAU,EACV,UAAU,EACV,YAAY,EACZ,eAAe,EAAE,gBAAgB,EACjC,eAAe,GAChB,GAAG,MAAM;IAEV,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,mBAAmB,EAAE;AAErD,IAAA,MAAM,YAAY,GAOd;AACF,QAAA,KAAK,EAAE,WAAW;AAClB,QAAA,IAAI,EAAE,UAAU;AAChB,QAAA,MAAM,EAAE,YAAY;AACpB,QAAA,MAAM,EAAE,YAAY;AACpB,QAAA,IAAI,EAAE,UAAU;KACjB;AAED,IAAA,IAAI,cAAc,KAAK,QAAQ,EAAE;AAC/B,QAAA,YAAY,CAAC,OAAO,GAAG,aAAa;;IAGtC,MAAM,UAAU,GAAG,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC;IAEzC,MAAM,SAAS,GAAG,eAAe,CAAC;QAChC,cAAc;QACd,YAAY;QACZ,kBAAkB;QAClB,aAAa;AACd,KAAA,CAAC;;AAGF,IAAA,IAAI,eAA8B;AAElC,IAAA,IAAI,eAAe,KAAK,QAAQ,EAAE;QAChC,eAAe,GAAG,mBAAmB,CAAC;AACpC,YAAA,GAAG,oBAAoB;AACvB,YAAA,MAAM,EAAE,YAAY;AACpB,YAAA,OAAO,EAAE,cAAc,IAAI,oBAAoB,EAAE,OAAO;YACxD,MAAM;AACP,SAAA,CAAC;;SACG;QACL,eAAe,GAAG,sBAAsB,CAAC;AACvC,YAAA,GAAG,gBAAgB;AACnB,YAAA,MAAM,EAAE,eAAe,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB;AACxD,YAAA,MAAM,EAAE,eAAe;AACvB,YAAA,OAAO,EAAE,gBAAgB;AACzB,YAAA,OAAO,EAAE,cAAc,IAAI,gBAAgB,EAAE,OAAO;YACpD,OAAO,EAAE,gBAAgB,EAAE,OAAO,IAAI,CAAC,UAAU,EAAE,SAAS,CAAC;YAC7D,MAAM;AACP,SAAA,CAAC;;IAGJ,MAAM,gBAAgB,GAAG,cAAc,CAAC;QACtC,YAAY;QACZ,UAAU;QACV,UAAU;QACV,YAAY;QACZ,MAAM;AACP,KAAA,CAAC;IAEF,IAAI,CAAC,gBAAgB,EAAE;AACrB,QAAA,MAAM,CAAC,IAAI,CAAC,8CAA8C,CAAC;;IAG7D,MAAM,eAAe,GAAG,qBAAqB,CAC3C;AACE,QAAA,QAAQ,EAAE,gBAAgB;QAC1B,UAAU;QAGV,MAAM;KACP,EACD,eAAe,CAChB;IAED,MAAM,MAAM,GAAG,qBAAqB,CAAC;QACnC,SAAS;QACT,UAAU;QACV,eAAe;QACf,eAAe;QACf,MAAM;AACP,KAAA,CAAC;AAEF,IAAA,OAAO,UAAU,CAAC;QAChB,MAAM;AACN,QAAA,MAAM,EAAE,UAAU;AAClB,QAAA,eAAe,EAAE,gBAAgB;AAClC,KAAA,CAAC;AACJ;;;;"}
|
|
@@ -3,9 +3,10 @@ import type * as t from './types';
|
|
|
3
3
|
* Firecrawl scraper implementation
|
|
4
4
|
* Uses the Firecrawl API to scrape web pages
|
|
5
5
|
*/
|
|
6
|
-
export declare class FirecrawlScraper {
|
|
6
|
+
export declare class FirecrawlScraper implements t.BaseScraper {
|
|
7
7
|
private apiKey;
|
|
8
8
|
private apiUrl;
|
|
9
|
+
private version;
|
|
9
10
|
private defaultFormats;
|
|
10
11
|
private timeout;
|
|
11
12
|
private logger;
|
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
import type * as t from './types';
|
|
2
|
-
import { FirecrawlScraper } from './firecrawl';
|
|
3
2
|
export declare const createSearchAPI: (config: t.SearchConfig) => {
|
|
4
3
|
getSources: (params: t.GetSourcesParams) => Promise<t.SearchResult>;
|
|
5
4
|
};
|
|
6
|
-
export declare const createSourceProcessor: (config?: t.ProcessSourcesConfig, scraperInstance?:
|
|
5
|
+
export declare const createSourceProcessor: (config?: t.ProcessSourcesConfig, scraperInstance?: t.BaseScraper) => {
|
|
7
6
|
processSources: (fields: t.ProcessSourcesFields) => Promise<t.SearchResultData>;
|
|
8
7
|
topResults: number;
|
|
9
8
|
};
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import type * as t from './types';
|
|
2
|
+
/**
|
|
3
|
+
* Serper scraper implementation
|
|
4
|
+
* Uses the Serper Scrape API (https://scrape.serper.dev) to scrape web pages
|
|
5
|
+
*
|
|
6
|
+
* Features:
|
|
7
|
+
* - Simple API with single endpoint
|
|
8
|
+
* - Returns both text and markdown content
|
|
9
|
+
* - Includes metadata from scraped pages
|
|
10
|
+
* - Credits-based pricing model
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* ```typescript
|
|
14
|
+
* const scraper = createSerperScraper({
|
|
15
|
+
* apiKey: 'your-serper-api-key',
|
|
16
|
+
* includeMarkdown: true,
|
|
17
|
+
* timeout: 10000
|
|
18
|
+
* });
|
|
19
|
+
*
|
|
20
|
+
* const [url, response] = await scraper.scrapeUrl('https://example.com');
|
|
21
|
+
* if (response.success) {
|
|
22
|
+
* const [content] = scraper.extractContent(response);
|
|
23
|
+
* console.log(content);
|
|
24
|
+
* }
|
|
25
|
+
* ```
|
|
26
|
+
*/
|
|
27
|
+
export declare class SerperScraper implements t.BaseScraper {
|
|
28
|
+
private apiKey;
|
|
29
|
+
private apiUrl;
|
|
30
|
+
private timeout;
|
|
31
|
+
private logger;
|
|
32
|
+
private includeMarkdown;
|
|
33
|
+
constructor(config?: t.SerperScraperConfig);
|
|
34
|
+
/**
|
|
35
|
+
* Scrape a single URL
|
|
36
|
+
* @param url URL to scrape
|
|
37
|
+
* @param options Scrape options
|
|
38
|
+
* @returns Scrape response
|
|
39
|
+
*/
|
|
40
|
+
scrapeUrl(url: string, options?: t.SerperScrapeOptions): Promise<[string, t.SerperScrapeResponse]>;
|
|
41
|
+
/**
|
|
42
|
+
* Extract content from scrape response
|
|
43
|
+
* @param response Scrape response
|
|
44
|
+
* @returns Extracted content or empty string if not available
|
|
45
|
+
*/
|
|
46
|
+
extractContent(response: t.SerperScrapeResponse): [string, undefined | t.References];
|
|
47
|
+
/**
|
|
48
|
+
* Extract metadata from scrape response
|
|
49
|
+
* @param response Scrape response
|
|
50
|
+
* @returns Metadata object
|
|
51
|
+
*/
|
|
52
|
+
extractMetadata(response: t.SerperScrapeResponse): Record<string, string | number | boolean | null | undefined>;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Create a Serper scraper instance
|
|
56
|
+
* @param config Scraper configuration
|
|
57
|
+
* @returns Serper scraper instance
|
|
58
|
+
*/
|
|
59
|
+
export declare const createSerperScraper: (config?: t.SerperScraperConfig) => SerperScraper;
|
|
@@ -6,6 +6,27 @@ import { DATE_RANGE } from './schema';
|
|
|
6
6
|
* Creates a search tool with a schema that dynamically includes the country field
|
|
7
7
|
* only when the searchProvider is 'serper'.
|
|
8
8
|
*
|
|
9
|
+
* Supports multiple scraper providers:
|
|
10
|
+
* - Firecrawl (default): Full-featured web scraping with multiple formats
|
|
11
|
+
* - Serper: Lightweight scraping using Serper's scrape API
|
|
12
|
+
*
|
|
13
|
+
* @example
|
|
14
|
+
* ```typescript
|
|
15
|
+
* // Using Firecrawl scraper (default)
|
|
16
|
+
* const searchTool = createSearchTool({
|
|
17
|
+
* searchProvider: 'serper',
|
|
18
|
+
* scraperProvider: 'firecrawl',
|
|
19
|
+
* firecrawlApiKey: 'your-firecrawl-key'
|
|
20
|
+
* });
|
|
21
|
+
*
|
|
22
|
+
* // Using Serper scraper
|
|
23
|
+
* const searchTool = createSearchTool({
|
|
24
|
+
* searchProvider: 'serper',
|
|
25
|
+
* scraperProvider: 'serper',
|
|
26
|
+
* serperApiKey: 'your-serper-key'
|
|
27
|
+
* });
|
|
28
|
+
* ```
|
|
29
|
+
*
|
|
9
30
|
* @param config - The search tool configuration
|
|
10
31
|
* @returns A DynamicStructuredTool with a schema that depends on the searchProvider
|
|
11
32
|
*/
|
|
@@ -4,6 +4,7 @@ import type { RunnableConfig } from '@langchain/core/runnables';
|
|
|
4
4
|
import type { BaseReranker } from './rerankers';
|
|
5
5
|
import { DATE_RANGE } from './schema';
|
|
6
6
|
export type SearchProvider = 'serper' | 'searxng';
|
|
7
|
+
export type ScraperProvider = 'firecrawl' | 'serper';
|
|
7
8
|
export type RerankerType = 'infinity' | 'jina' | 'cohere' | 'none';
|
|
8
9
|
export interface Highlight {
|
|
9
10
|
score: number;
|
|
@@ -85,8 +86,16 @@ export interface ProcessSourcesConfig {
|
|
|
85
86
|
export interface FirecrawlConfig {
|
|
86
87
|
firecrawlApiKey?: string;
|
|
87
88
|
firecrawlApiUrl?: string;
|
|
89
|
+
firecrawlVersion?: string;
|
|
88
90
|
firecrawlOptions?: FirecrawlScraperConfig;
|
|
89
91
|
}
|
|
92
|
+
export interface SerperScraperConfig {
|
|
93
|
+
apiKey?: string;
|
|
94
|
+
apiUrl?: string;
|
|
95
|
+
timeout?: number;
|
|
96
|
+
logger?: Logger;
|
|
97
|
+
includeMarkdown?: boolean;
|
|
98
|
+
}
|
|
90
99
|
export interface ScraperContentResult {
|
|
91
100
|
content: string;
|
|
92
101
|
}
|
|
@@ -133,7 +142,9 @@ export interface SearchToolConfig extends SearchConfig, ProcessSourcesConfig, Fi
|
|
|
133
142
|
jinaApiUrl?: string;
|
|
134
143
|
cohereApiKey?: string;
|
|
135
144
|
rerankerType?: RerankerType;
|
|
145
|
+
scraperProvider?: ScraperProvider;
|
|
136
146
|
scraperTimeout?: number;
|
|
147
|
+
serperScraperOptions?: SerperScraperConfig;
|
|
137
148
|
onSearchResults?: (results: SearchResult, runnableConfig?: RunnableConfig) => void;
|
|
138
149
|
onGetHighlights?: (link: string) => void;
|
|
139
150
|
}
|
|
@@ -147,8 +158,15 @@ export type UsedReferences = {
|
|
|
147
158
|
originalIndex: number;
|
|
148
159
|
reference: MediaReference;
|
|
149
160
|
}[];
|
|
161
|
+
/** Base Scraper Interface */
|
|
162
|
+
export interface BaseScraper {
|
|
163
|
+
scrapeUrl(url: string, options?: unknown): Promise<[string, FirecrawlScrapeResponse | SerperScrapeResponse]>;
|
|
164
|
+
extractContent(response: FirecrawlScrapeResponse | SerperScrapeResponse): [string, undefined | References];
|
|
165
|
+
extractMetadata(response: FirecrawlScrapeResponse | SerperScrapeResponse): ScrapeMetadata | Record<string, string | number | boolean | null | undefined>;
|
|
166
|
+
}
|
|
150
167
|
/** Firecrawl */
|
|
151
|
-
export type FirecrawlScrapeOptions = Omit<FirecrawlScraperConfig, 'apiKey' | 'apiUrl' | 'logger'>;
|
|
168
|
+
export type FirecrawlScrapeOptions = Omit<FirecrawlScraperConfig, 'apiKey' | 'apiUrl' | 'version' | 'logger'>;
|
|
169
|
+
export type SerperScrapeOptions = Omit<SerperScraperConfig, 'apiKey' | 'apiUrl' | 'logger'>;
|
|
152
170
|
export interface ScrapeMetadata {
|
|
153
171
|
sourceURL?: string;
|
|
154
172
|
url?: string;
|
|
@@ -214,9 +232,20 @@ export interface FirecrawlScrapeResponse {
|
|
|
214
232
|
};
|
|
215
233
|
error?: string;
|
|
216
234
|
}
|
|
235
|
+
export interface SerperScrapeResponse {
|
|
236
|
+
success: boolean;
|
|
237
|
+
data?: {
|
|
238
|
+
text?: string;
|
|
239
|
+
markdown?: string;
|
|
240
|
+
metadata?: Record<string, string | number | boolean | null | undefined>;
|
|
241
|
+
credits?: number;
|
|
242
|
+
};
|
|
243
|
+
error?: string;
|
|
244
|
+
}
|
|
217
245
|
export interface FirecrawlScraperConfig {
|
|
218
246
|
apiKey?: string;
|
|
219
247
|
apiUrl?: string;
|
|
248
|
+
version?: string;
|
|
220
249
|
formats?: string[];
|
|
221
250
|
timeout?: number;
|
|
222
251
|
logger?: Logger;
|
package/package.json
CHANGED
package/src/scripts/search.ts
CHANGED
|
@@ -83,7 +83,11 @@ async function testStandardStreaming(): Promise<void> {
|
|
|
83
83
|
graphConfig: {
|
|
84
84
|
type: 'standard',
|
|
85
85
|
llmConfig,
|
|
86
|
-
tools: [
|
|
86
|
+
tools: [
|
|
87
|
+
createSearchTool({
|
|
88
|
+
scraperProvider: 'serper',
|
|
89
|
+
}),
|
|
90
|
+
],
|
|
87
91
|
instructions:
|
|
88
92
|
'You are a friendly AI assistant. Always address the user by their name.',
|
|
89
93
|
// additional_instructions: `The user's name is ${userName} and they are located in ${location}.`,
|
|
@@ -7,9 +7,10 @@ import { createDefaultLogger } from './utils';
|
|
|
7
7
|
* Firecrawl scraper implementation
|
|
8
8
|
* Uses the Firecrawl API to scrape web pages
|
|
9
9
|
*/
|
|
10
|
-
export class FirecrawlScraper {
|
|
10
|
+
export class FirecrawlScraper implements t.BaseScraper {
|
|
11
11
|
private apiKey: string;
|
|
12
12
|
private apiUrl: string;
|
|
13
|
+
private version: string;
|
|
13
14
|
private defaultFormats: string[];
|
|
14
15
|
private timeout: number;
|
|
15
16
|
private logger: t.Logger;
|
|
@@ -32,11 +33,13 @@ export class FirecrawlScraper {
|
|
|
32
33
|
constructor(config: t.FirecrawlScraperConfig = {}) {
|
|
33
34
|
this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';
|
|
34
35
|
|
|
36
|
+
this.version = config.version ?? 'v2';
|
|
37
|
+
|
|
35
38
|
const baseUrl =
|
|
36
39
|
config.apiUrl ??
|
|
37
40
|
process.env.FIRECRAWL_BASE_URL ??
|
|
38
41
|
'https://api.firecrawl.dev';
|
|
39
|
-
this.apiUrl = `${baseUrl.replace(/\/+$/, '')}/
|
|
42
|
+
this.apiUrl = `${baseUrl.replace(/\/+$/, '')}/${this.version}/scrape`;
|
|
40
43
|
|
|
41
44
|
this.defaultFormats = config.formats ?? ['markdown', 'rawHtml'];
|
|
42
45
|
this.timeout = config.timeout ?? 7500;
|
|
@@ -2,7 +2,6 @@ import axios from 'axios';
|
|
|
2
2
|
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
|
|
3
3
|
import type * as t from './types';
|
|
4
4
|
import { getAttribution, createDefaultLogger } from './utils';
|
|
5
|
-
import { FirecrawlScraper } from './firecrawl';
|
|
6
5
|
import { BaseReranker } from './rerankers';
|
|
7
6
|
|
|
8
7
|
const chunker = {
|
|
@@ -434,7 +433,7 @@ export const createSearchAPI = (
|
|
|
434
433
|
|
|
435
434
|
export const createSourceProcessor = (
|
|
436
435
|
config: t.ProcessSourcesConfig = {},
|
|
437
|
-
scraperInstance?:
|
|
436
|
+
scraperInstance?: t.BaseScraper
|
|
438
437
|
): {
|
|
439
438
|
processSources: (
|
|
440
439
|
fields: t.ProcessSourcesFields
|
|
@@ -442,7 +441,7 @@ export const createSourceProcessor = (
|
|
|
442
441
|
topResults: number;
|
|
443
442
|
} => {
|
|
444
443
|
if (!scraperInstance) {
|
|
445
|
-
throw new Error('
|
|
444
|
+
throw new Error('Scraper instance is required');
|
|
446
445
|
}
|
|
447
446
|
const {
|
|
448
447
|
topResults = 5,
|
|
@@ -453,7 +452,7 @@ export const createSourceProcessor = (
|
|
|
453
452
|
} = config;
|
|
454
453
|
|
|
455
454
|
const logger_ = logger || createDefaultLogger();
|
|
456
|
-
const
|
|
455
|
+
const scraper = scraperInstance;
|
|
457
456
|
|
|
458
457
|
const webScraper = {
|
|
459
458
|
scrapeMany: async ({
|
|
@@ -465,12 +464,12 @@ export const createSourceProcessor = (
|
|
|
465
464
|
links: string[];
|
|
466
465
|
onGetHighlights: t.SearchToolConfig['onGetHighlights'];
|
|
467
466
|
}): Promise<Array<t.ScrapeResult>> => {
|
|
468
|
-
logger_.debug(`Scraping ${links.length} links
|
|
467
|
+
logger_.debug(`Scraping ${links.length} links`);
|
|
469
468
|
const promises: Array<Promise<t.ScrapeResult>> = [];
|
|
470
469
|
try {
|
|
471
470
|
for (let i = 0; i < links.length; i++) {
|
|
472
471
|
const currentLink = links[i];
|
|
473
|
-
const promise: Promise<t.ScrapeResult> =
|
|
472
|
+
const promise: Promise<t.ScrapeResult> = scraper
|
|
474
473
|
.scrapeUrl(currentLink, {})
|
|
475
474
|
.then(([url, response]) => {
|
|
476
475
|
const attribution = getAttribution(
|
|
@@ -479,8 +478,7 @@ export const createSourceProcessor = (
|
|
|
479
478
|
logger_
|
|
480
479
|
);
|
|
481
480
|
if (response.success && response.data) {
|
|
482
|
-
const [content, references] =
|
|
483
|
-
firecrawlScraper.extractContent(response);
|
|
481
|
+
const [content, references] = scraper.extractContent(response);
|
|
484
482
|
return {
|
|
485
483
|
url,
|
|
486
484
|
references,
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import type * as t from './types';
|
|
3
|
+
import { createDefaultLogger } from './utils';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Serper scraper implementation
|
|
7
|
+
* Uses the Serper Scrape API (https://scrape.serper.dev) to scrape web pages
|
|
8
|
+
*
|
|
9
|
+
* Features:
|
|
10
|
+
* - Simple API with single endpoint
|
|
11
|
+
* - Returns both text and markdown content
|
|
12
|
+
* - Includes metadata from scraped pages
|
|
13
|
+
* - Credits-based pricing model
|
|
14
|
+
*
|
|
15
|
+
* @example
|
|
16
|
+
* ```typescript
|
|
17
|
+
* const scraper = createSerperScraper({
|
|
18
|
+
* apiKey: 'your-serper-api-key',
|
|
19
|
+
* includeMarkdown: true,
|
|
20
|
+
* timeout: 10000
|
|
21
|
+
* });
|
|
22
|
+
*
|
|
23
|
+
* const [url, response] = await scraper.scrapeUrl('https://example.com');
|
|
24
|
+
* if (response.success) {
|
|
25
|
+
* const [content] = scraper.extractContent(response);
|
|
26
|
+
* console.log(content);
|
|
27
|
+
* }
|
|
28
|
+
* ```
|
|
29
|
+
*/
|
|
30
|
+
export class SerperScraper implements t.BaseScraper {
|
|
31
|
+
private apiKey: string;
|
|
32
|
+
private apiUrl: string;
|
|
33
|
+
private timeout: number;
|
|
34
|
+
private logger: t.Logger;
|
|
35
|
+
private includeMarkdown: boolean;
|
|
36
|
+
|
|
37
|
+
constructor(config: t.SerperScraperConfig = {}) {
|
|
38
|
+
this.apiKey = config.apiKey ?? process.env.SERPER_API_KEY ?? '';
|
|
39
|
+
|
|
40
|
+
this.apiUrl =
|
|
41
|
+
config.apiUrl ??
|
|
42
|
+
process.env.SERPER_SCRAPE_URL ??
|
|
43
|
+
'https://scrape.serper.dev';
|
|
44
|
+
|
|
45
|
+
this.timeout = config.timeout ?? 7500;
|
|
46
|
+
this.includeMarkdown = config.includeMarkdown ?? true;
|
|
47
|
+
|
|
48
|
+
this.logger = config.logger || createDefaultLogger();
|
|
49
|
+
|
|
50
|
+
if (!this.apiKey) {
|
|
51
|
+
this.logger.warn('SERPER_API_KEY is not set. Scraping will not work.');
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
this.logger.debug(
|
|
55
|
+
`Serper scraper initialized with API URL: ${this.apiUrl}`
|
|
56
|
+
);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Scrape a single URL
|
|
61
|
+
* @param url URL to scrape
|
|
62
|
+
* @param options Scrape options
|
|
63
|
+
* @returns Scrape response
|
|
64
|
+
*/
|
|
65
|
+
async scrapeUrl(
|
|
66
|
+
url: string,
|
|
67
|
+
options: t.SerperScrapeOptions = {}
|
|
68
|
+
): Promise<[string, t.SerperScrapeResponse]> {
|
|
69
|
+
if (!this.apiKey) {
|
|
70
|
+
return [
|
|
71
|
+
url,
|
|
72
|
+
{
|
|
73
|
+
success: false,
|
|
74
|
+
error: 'SERPER_API_KEY is not set',
|
|
75
|
+
},
|
|
76
|
+
];
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
try {
|
|
80
|
+
const payload = {
|
|
81
|
+
url,
|
|
82
|
+
includeMarkdown: options.includeMarkdown ?? this.includeMarkdown,
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
const response = await axios.post(this.apiUrl, payload, {
|
|
86
|
+
headers: {
|
|
87
|
+
'X-API-KEY': this.apiKey,
|
|
88
|
+
'Content-Type': 'application/json',
|
|
89
|
+
},
|
|
90
|
+
timeout: options.timeout ?? this.timeout,
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
return [url, { success: true, data: response.data }];
|
|
94
|
+
} catch (error) {
|
|
95
|
+
const errorMessage =
|
|
96
|
+
error instanceof Error ? error.message : String(error);
|
|
97
|
+
return [
|
|
98
|
+
url,
|
|
99
|
+
{
|
|
100
|
+
success: false,
|
|
101
|
+
error: `Serper Scrape API request failed: ${errorMessage}`,
|
|
102
|
+
},
|
|
103
|
+
];
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Extract content from scrape response
|
|
109
|
+
* @param response Scrape response
|
|
110
|
+
* @returns Extracted content or empty string if not available
|
|
111
|
+
*/
|
|
112
|
+
extractContent(
|
|
113
|
+
response: t.SerperScrapeResponse
|
|
114
|
+
): [string, undefined | t.References] {
|
|
115
|
+
if (!response.success || !response.data) {
|
|
116
|
+
return ['', undefined];
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (response.data.markdown != null) {
|
|
120
|
+
return [response.data.markdown, undefined];
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if (response.data.text != null) {
|
|
124
|
+
return [response.data.text, undefined];
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return ['', undefined];
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Extract metadata from scrape response
|
|
132
|
+
* @param response Scrape response
|
|
133
|
+
* @returns Metadata object
|
|
134
|
+
*/
|
|
135
|
+
extractMetadata(
|
|
136
|
+
response: t.SerperScrapeResponse
|
|
137
|
+
): Record<string, string | number | boolean | null | undefined> {
|
|
138
|
+
if (!response.success || !response.data || !response.data.metadata) {
|
|
139
|
+
return {};
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return response.data.metadata;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Create a Serper scraper instance
|
|
148
|
+
* @param config Scraper configuration
|
|
149
|
+
* @returns Serper scraper instance
|
|
150
|
+
*/
|
|
151
|
+
export const createSerperScraper = (
|
|
152
|
+
config: t.SerperScraperConfig = {}
|
|
153
|
+
): SerperScraper => {
|
|
154
|
+
return new SerperScraper(config);
|
|
155
|
+
};
|