@librechat/agents 2.4.83 → 2.4.84

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@ import { z } from 'zod';
2
2
  import { tool } from '@langchain/core/tools';
3
3
  import { newsSchema, videosSchema, imagesSchema, dateSchema, querySchema, countrySchema } from './schema.mjs';
4
4
  import { createSearchAPI, createSourceProcessor } from './search.mjs';
5
+ import { createSerperScraper } from './serper-scraper.mjs';
5
6
  import { createFirecrawlScraper } from './firecrawl.mjs';
6
7
  import { expandHighlights } from './highlights.mjs';
7
8
  import { formatResultsForLLM } from './format.mjs';
@@ -230,11 +231,32 @@ Use anchor marker(s) immediately after the statement:
230
231
  * Creates a search tool with a schema that dynamically includes the country field
231
232
  * only when the searchProvider is 'serper'.
232
233
  *
234
+ * Supports multiple scraper providers:
235
+ * - Firecrawl (default): Full-featured web scraping with multiple formats
236
+ * - Serper: Lightweight scraping using Serper's scrape API
237
+ *
238
+ * @example
239
+ * ```typescript
240
+ * // Using Firecrawl scraper (default)
241
+ * const searchTool = createSearchTool({
242
+ * searchProvider: 'serper',
243
+ * scraperProvider: 'firecrawl',
244
+ * firecrawlApiKey: 'your-firecrawl-key'
245
+ * });
246
+ *
247
+ * // Using Serper scraper
248
+ * const searchTool = createSearchTool({
249
+ * searchProvider: 'serper',
250
+ * scraperProvider: 'serper',
251
+ * serperApiKey: 'your-serper-key'
252
+ * });
253
+ * ```
254
+ *
233
255
  * @param config - The search tool configuration
234
256
  * @returns A DynamicStructuredTool with a schema that depends on the searchProvider
235
257
  */
236
258
  const createSearchTool = (config = {}) => {
237
- const { searchProvider = 'serper', serperApiKey, searxngInstanceUrl, searxngApiKey, rerankerType = 'cohere', topResults = 5, strategies = ['no_extraction'], filterContent = true, safeSearch = 1, firecrawlApiKey, firecrawlApiUrl, firecrawlOptions, scraperTimeout, jinaApiKey, jinaApiUrl, cohereApiKey, onSearchResults: _onSearchResults, onGetHighlights, } = config;
259
+ const { searchProvider = 'serper', serperApiKey, searxngInstanceUrl, searxngApiKey, rerankerType = 'cohere', topResults = 5, strategies = ['no_extraction'], filterContent = true, safeSearch = 1, scraperProvider = 'firecrawl', firecrawlApiKey, firecrawlApiUrl, firecrawlVersion, firecrawlOptions, serperScraperOptions, scraperTimeout, jinaApiKey, jinaApiUrl, cohereApiKey, onSearchResults: _onSearchResults, onGetHighlights, } = config;
238
260
  const logger = config.logger || createDefaultLogger();
239
261
  const schemaObject = {
240
262
  query: querySchema,
@@ -253,13 +275,27 @@ const createSearchTool = (config = {}) => {
253
275
  searxngInstanceUrl,
254
276
  searxngApiKey,
255
277
  });
256
- const firecrawlScraper = createFirecrawlScraper({
257
- ...firecrawlOptions,
258
- apiKey: firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY,
259
- apiUrl: firecrawlApiUrl,
260
- timeout: scraperTimeout ?? firecrawlOptions?.timeout,
261
- formats: firecrawlOptions?.formats ?? ['markdown', 'rawHtml'],
262
- });
278
+ /** Create scraper based on scraperProvider */
279
+ let scraperInstance;
280
+ if (scraperProvider === 'serper') {
281
+ scraperInstance = createSerperScraper({
282
+ ...serperScraperOptions,
283
+ apiKey: serperApiKey,
284
+ timeout: scraperTimeout ?? serperScraperOptions?.timeout,
285
+ logger,
286
+ });
287
+ }
288
+ else {
289
+ scraperInstance = createFirecrawlScraper({
290
+ ...firecrawlOptions,
291
+ apiKey: firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY,
292
+ apiUrl: firecrawlApiUrl,
293
+ version: firecrawlVersion,
294
+ timeout: scraperTimeout ?? firecrawlOptions?.timeout,
295
+ formats: firecrawlOptions?.formats ?? ['markdown', 'rawHtml'],
296
+ logger,
297
+ });
298
+ }
263
299
  const selectedReranker = createReranker({
264
300
  rerankerType,
265
301
  jinaApiKey,
@@ -274,7 +310,7 @@ const createSearchTool = (config = {}) => {
274
310
  reranker: selectedReranker,
275
311
  topResults,
276
312
  logger,
277
- }, firecrawlScraper);
313
+ }, scraperInstance);
278
314
  const search = createSearchProcessor({
279
315
  searchAPI,
280
316
  safeSearch,
@@ -1 +1 @@
1
- {"version":3,"file":"tool.mjs","sources":["../../../../src/tools/search/tool.ts"],"sourcesContent":["import { z } from 'zod';\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\nimport type { RunnableConfig } from '@langchain/core/runnables';\nimport type * as t from './types';\nimport {\n DATE_RANGE,\n querySchema,\n dateSchema,\n countrySchema,\n imagesSchema,\n videosSchema,\n newsSchema,\n} from './schema';\nimport { createSearchAPI, createSourceProcessor } from './search';\nimport { createFirecrawlScraper } from './firecrawl';\nimport { expandHighlights } from './highlights';\nimport { formatResultsForLLM } from './format';\nimport { createDefaultLogger } from './utils';\nimport { createReranker } from './rerankers';\nimport { Constants } from '@/common';\n\n/**\n * Executes parallel searches and merges the results\n */\nasync function executeParallelSearches({\n searchAPI,\n query,\n date,\n country,\n safeSearch,\n images,\n videos,\n news,\n logger,\n}: {\n searchAPI: ReturnType<typeof createSearchAPI>;\n query: string;\n date?: DATE_RANGE;\n country?: string;\n safeSearch: t.SearchToolConfig['safeSearch'];\n images: boolean;\n videos: boolean;\n news: boolean;\n logger: t.Logger;\n}): Promise<t.SearchResult> {\n // Prepare all search tasks to run in parallel\n const searchTasks: Promise<t.SearchResult>[] = [\n // Main search\n searchAPI.getSources({\n query,\n date,\n country,\n safeSearch,\n }),\n ];\n\n if (images) {\n searchTasks.push(\n searchAPI\n .getSources({\n query,\n date,\n country,\n safeSearch,\n type: 'images',\n })\n .catch((error) => {\n logger.error('Error fetching images:', error);\n return {\n success: false,\n error: `Images search failed: ${error instanceof Error ? error.message : String(error)}`,\n };\n })\n );\n }\n if (videos) {\n searchTasks.push(\n searchAPI\n .getSources({\n query,\n date,\n country,\n safeSearch,\n type: 'videos',\n })\n .catch((error) => {\n logger.error('Error fetching videos:', error);\n return {\n success: false,\n error: `Videos search failed: ${error instanceof Error ? error.message : String(error)}`,\n };\n })\n );\n }\n if (news) {\n searchTasks.push(\n searchAPI\n .getSources({\n query,\n date,\n country,\n safeSearch,\n type: 'news',\n })\n .catch((error) => {\n logger.error('Error fetching news:', error);\n return {\n success: false,\n error: `News search failed: ${error instanceof Error ? error.message : String(error)}`,\n };\n })\n );\n }\n\n // Run all searches in parallel\n const results = await Promise.all(searchTasks);\n\n // Get the main search result (first result)\n const mainResult = results[0];\n if (!mainResult.success) {\n throw new Error(mainResult.error ?? 'Search failed');\n }\n\n // Merge additional results with the main results\n const mergedResults = { ...mainResult.data };\n\n // Convert existing news to topStories if present\n if (mergedResults.news !== undefined && mergedResults.news.length > 0) {\n const existingNewsAsTopStories = mergedResults.news\n .filter((newsItem) => newsItem.link !== undefined && newsItem.link !== '')\n .map((newsItem) => ({\n title: newsItem.title ?? '',\n link: newsItem.link ?? '',\n source: newsItem.source ?? '',\n date: newsItem.date ?? '',\n imageUrl: newsItem.imageUrl ?? '',\n processed: false,\n }));\n mergedResults.topStories = [\n ...(mergedResults.topStories ?? []),\n ...existingNewsAsTopStories,\n ];\n delete mergedResults.news;\n }\n\n results.slice(1).forEach((result) => {\n if (result.success && result.data !== undefined) {\n if (result.data.images !== undefined && result.data.images.length > 0) {\n mergedResults.images = [\n ...(mergedResults.images ?? []),\n ...result.data.images,\n ];\n }\n if (result.data.videos !== undefined && result.data.videos.length > 0) {\n mergedResults.videos = [\n ...(mergedResults.videos ?? []),\n ...result.data.videos,\n ];\n }\n if (result.data.news !== undefined && result.data.news.length > 0) {\n const newsAsTopStories = result.data.news.map((newsItem) => ({\n ...newsItem,\n link: newsItem.link ?? '',\n }));\n mergedResults.topStories = [\n ...(mergedResults.topStories ?? []),\n ...newsAsTopStories,\n ];\n }\n }\n });\n\n return { success: true, data: mergedResults };\n}\n\nfunction createSearchProcessor({\n searchAPI,\n safeSearch,\n sourceProcessor,\n onGetHighlights,\n logger,\n}: {\n safeSearch: t.SearchToolConfig['safeSearch'];\n searchAPI: ReturnType<typeof createSearchAPI>;\n sourceProcessor: ReturnType<typeof createSourceProcessor>;\n onGetHighlights: t.SearchToolConfig['onGetHighlights'];\n logger: t.Logger;\n}) {\n return async function ({\n query,\n date,\n country,\n proMode = true,\n maxSources = 5,\n onSearchResults,\n images = false,\n videos = false,\n news = false,\n }: {\n query: string;\n country?: string;\n date?: DATE_RANGE;\n proMode?: boolean;\n maxSources?: number;\n onSearchResults: t.SearchToolConfig['onSearchResults'];\n images?: boolean;\n videos?: boolean;\n news?: boolean;\n }): Promise<t.SearchResultData> {\n try {\n // Execute parallel searches and merge results\n const searchResult = await executeParallelSearches({\n searchAPI,\n query,\n date,\n country,\n safeSearch,\n images,\n videos,\n news,\n logger,\n });\n\n onSearchResults?.(searchResult);\n\n const processedSources = await sourceProcessor.processSources({\n query,\n news,\n result: searchResult,\n proMode,\n onGetHighlights,\n numElements: maxSources,\n });\n\n return expandHighlights(processedSources);\n } catch (error) {\n logger.error('Error in search:', error);\n return {\n organic: [],\n topStories: [],\n images: [],\n videos: [],\n news: [],\n relatedSearches: [],\n error: error instanceof Error ? error.message : String(error),\n };\n }\n };\n}\n\nfunction createOnSearchResults({\n runnableConfig,\n onSearchResults,\n}: {\n runnableConfig: RunnableConfig;\n onSearchResults: t.SearchToolConfig['onSearchResults'];\n}) {\n return function (results: t.SearchResult): void {\n if (!onSearchResults) {\n return;\n }\n onSearchResults(results, runnableConfig);\n };\n}\n\nfunction createTool({\n schema,\n search,\n onSearchResults: _onSearchResults,\n}: {\n schema: t.SearchToolSchema;\n search: ReturnType<typeof createSearchProcessor>;\n onSearchResults: t.SearchToolConfig['onSearchResults'];\n}): DynamicStructuredTool<typeof schema> {\n return tool<typeof schema>(\n async (params, runnableConfig) => {\n const { query, date, country: _c, images, videos, news } = params;\n const country = typeof _c === 'string' && _c ? _c : undefined;\n const searchResult = await search({\n query,\n date,\n country,\n images,\n videos,\n news,\n onSearchResults: createOnSearchResults({\n runnableConfig,\n onSearchResults: _onSearchResults,\n }),\n });\n const turn = runnableConfig.toolCall?.turn ?? 0;\n const { output, references } = formatResultsForLLM(turn, searchResult);\n const data: t.SearchResultData = { turn, ...searchResult, references };\n return [output, { [Constants.WEB_SEARCH]: data }];\n },\n {\n name: Constants.WEB_SEARCH,\n description: `Real-time search. Results have required citation anchors.\n\nNote: Use ONCE per reply unless instructed otherwise.\n\nAnchors:\n- \\\\ue202turnXtypeY\n- X = turn idx, type = 'search' | 'news' | 'image' | 'ref', Y = item idx\n\nSpecial Markers:\n- \\\\ue203...\\\\ue204 — highlight start/end of cited text (for Standalone or Group citations)\n- \\\\ue200...\\\\ue201 — group block (e.g. \\\\ue200\\\\ue202turn0search1\\\\ue202turn0news2\\\\ue201)\n\n**CITE EVERY NON-OBVIOUS FACT/QUOTE:**\nUse anchor marker(s) immediately after the statement:\n- Standalone: \"Pure functions produce same output. \\\\ue202turn0search0\"\n- Standalone (multiple): \"Today's News \\\\ue202turn0search0\\\\ue202turn0news0\"\n- Highlight: \"\\\\ue203Highlight text.\\\\ue204\\\\ue202turn0news1\"\n- Group: \"Sources. \\\\ue200\\\\ue202turn0search0\\\\ue202turn0news1\\\\ue201\"\n- Group Highlight: \"\\\\ue203Highlight for group.\\\\ue204 \\\\ue200\\\\ue202turn0search0\\\\ue202turn0news1\\\\ue201\"\n- Image: \"See photo \\\\ue202turn0image0.\"\n\n**NEVER use markdown links, [1], or footnotes. CITE ONLY with anchors provided.**\n`.trim(),\n schema: schema,\n responseFormat: Constants.CONTENT_AND_ARTIFACT,\n }\n );\n}\n\n/**\n * Creates a search tool with a schema that dynamically includes the country field\n * only when the searchProvider is 'serper'.\n *\n * @param config - The search tool configuration\n * @returns A DynamicStructuredTool with a schema that depends on the searchProvider\n */\nexport const createSearchTool = (\n config: t.SearchToolConfig = {}\n): DynamicStructuredTool<typeof toolSchema> => {\n const {\n searchProvider = 'serper',\n serperApiKey,\n searxngInstanceUrl,\n searxngApiKey,\n rerankerType = 'cohere',\n topResults = 5,\n strategies = ['no_extraction'],\n filterContent = true,\n safeSearch = 1,\n firecrawlApiKey,\n firecrawlApiUrl,\n firecrawlOptions,\n scraperTimeout,\n jinaApiKey,\n jinaApiUrl,\n cohereApiKey,\n onSearchResults: _onSearchResults,\n onGetHighlights,\n } = config;\n\n const logger = config.logger || createDefaultLogger();\n\n const schemaObject: {\n query: z.ZodString;\n date: z.ZodOptional<z.ZodNativeEnum<typeof DATE_RANGE>>;\n country?: z.ZodOptional<z.ZodString>;\n images: z.ZodOptional<z.ZodBoolean>;\n videos: z.ZodOptional<z.ZodBoolean>;\n news: z.ZodOptional<z.ZodBoolean>;\n } = {\n query: querySchema,\n date: dateSchema,\n images: imagesSchema,\n videos: videosSchema,\n news: newsSchema,\n };\n\n if (searchProvider === 'serper') {\n schemaObject.country = countrySchema;\n }\n\n const toolSchema = z.object(schemaObject);\n\n const searchAPI = createSearchAPI({\n searchProvider,\n serperApiKey,\n searxngInstanceUrl,\n searxngApiKey,\n });\n\n const firecrawlScraper = createFirecrawlScraper({\n ...firecrawlOptions,\n apiKey: firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY,\n apiUrl: firecrawlApiUrl,\n timeout: scraperTimeout ?? firecrawlOptions?.timeout,\n formats: firecrawlOptions?.formats ?? ['markdown', 'rawHtml'],\n });\n\n const selectedReranker = createReranker({\n rerankerType,\n jinaApiKey,\n jinaApiUrl,\n cohereApiKey,\n logger,\n });\n\n if (!selectedReranker) {\n logger.warn('No reranker selected. Using default ranking.');\n }\n\n const sourceProcessor = createSourceProcessor(\n {\n reranker: selectedReranker,\n topResults,\n strategies,\n filterContent,\n logger,\n },\n firecrawlScraper\n );\n\n const search = createSearchProcessor({\n searchAPI,\n safeSearch,\n sourceProcessor,\n onGetHighlights,\n logger,\n });\n\n return createTool({\n search,\n schema: toolSchema,\n onSearchResults: _onSearchResults,\n });\n};\n"],"names":[],"mappings":";;;;;;;;;;;AAqBA;;AAEG;AACH,eAAe,uBAAuB,CAAC,EACrC,SAAS,EACT,KAAK,EACL,IAAI,EACJ,OAAO,EACP,UAAU,EACV,MAAM,EACN,MAAM,EACN,IAAI,EACJ,MAAM,GAWP,EAAA;;AAEC,IAAA,MAAM,WAAW,GAA8B;;QAE7C,SAAS,CAAC,UAAU,CAAC;YACnB,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;SACX,CAAC;KACH;IAED,IAAI,MAAM,EAAE;QACV,WAAW,CAAC,IAAI,CACd;AACG,aAAA,UAAU,CAAC;YACV,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;AACV,YAAA,IAAI,EAAE,QAAQ;SACf;AACA,aAAA,KAAK,CAAC,CAAC,KAAK,KAAI;AACf,YAAA,MAAM,CAAC,KAAK,CAAC,wBAAwB,EAAE,KAAK,CAAC;YAC7C,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;AACd,gBAAA,KAAK,EAAE,CAAyB,sBAAA,EAAA,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAE,CAAA;aACzF;SACF,CAAC,CACL;;IAEH,IAAI,MAAM,EAAE;QACV,WAAW,CAAC,IAAI,CACd;AACG,aAAA,UAAU,CAAC;YACV,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;AACV,YAAA,IAAI,EAAE,QAAQ;SACf;AACA,aAAA,KAAK,CAAC,CAAC,KAAK,KAAI;AACf,YAAA,MAAM,CAAC,KAAK,CAAC,wBAAwB,EAAE,KAAK,CAAC;YAC7C,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;AACd,gBAAA,KAAK,EAAE,CAAyB,sBAAA,EAAA,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAE,CAAA;aACzF;SACF,CAAC,CACL;;IAEH,IAAI,IAAI,EAAE;QACR,WAAW,CAAC,IAAI,CACd;AACG,aAAA,UAAU,CAAC;YACV,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;AACV,YAAA,IAAI,EAAE,MAAM;SACb;AACA,aAAA,KAAK,CAAC,CAAC,KAAK,KAAI;AACf,YAAA,MAAM,CAAC,KAAK,CAAC,sBAAsB,EAAE,KAAK,CAAC;YAC3C,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;AACd,gBAAA,KAAK,EAAE,CAAuB,oBAAA,EAAA,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAE,CAAA;aACvF;SACF,CAAC,CACL;;;IAIH,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;;AAG9C,IAAA,MAAM,UAAU,GAAG,OAAO,CAAC,CAAC,CAAC;AAC7B,IAAA,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE;QACvB,MAAM,IAAI,KAAK,CAAC,UAAU,CAAC,KAAK,IAAI,eAAe,CAAC;;;IAItD,MAAM,aAAa,GAAG,EAAE,GAAG,UAAU,CAAC,IAAI,EAAE;;AAG5C,IAAA,IAAI,aAAa,CAAC,IAAI,KAAK,SAAS,IAAI,aAAa,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;AACrE,QAAA,MAAM,wBAAwB,GAAG,aAAa,CAAC;AAC5C,aAAA,MAAM,CAAC,CAAC,QAAQ,KAAK,QAAQ,CAAC,IAAI,KAAK,SAAS,IAAI,QAAQ,CAAC,IAAI,KAAK,EAAE;AACxE,aAAA,GAAG,CAAC,CAAC,QAAQ,MAAM;AAClB,YAAA,KAAK,EAAE,QAAQ,CAAC,KAAK,IAAI,EAAE;AAC3B,YAAA,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;AACzB,YAAA,MAAM,EAAE,QAAQ,CAAC,MAAM,IAAI,EAAE;AAC7B,YAAA,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;AACzB,YAAA,QAAQ,EAAE,QAAQ,CAAC,QAAQ,IAAI,EAAE;AACjC,YAAA,SAAS,EAAE,KAAK;AACjB,SAAA,CAAC,CAAC;QACL,aAAa,CAAC,UAAU,GAAG;AACzB,YAAA,IAAI,aAAa,CAAC,UAAU,IAAI,EAAE,CAAC;AACnC,YAAA,GAAG,wBAAwB;SAC5B;QACD,OAAO,aAAa,CAAC,IAAI;;IAG3B,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,KAAI;QAClC,IAAI,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,IAAI,KAAK,SAAS,EAAE;AAC/C,YAAA,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE;gBACrE,aAAa,CAAC,MAAM,GAAG;AACrB,oBAAA,IAAI,aAAa,CAAC,MAAM,IAAI,EAAE,CAAC;AAC/B,oBAAA,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM;iBACtB;;AAEH,YAAA,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE;gBACrE,aAAa,CAAC,MAAM,GAAG;AACrB,oBAAA,IAAI,aAAa,CAAC,MAAM,IAAI,EAAE,CAAC;AAC/B,oBAAA,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM;iBACtB;;AAEH,YAAA,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;AACjE,gBAAA,MAAM,gBAAgB,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,QAAQ,MAAM;AAC3D,oBAAA,GAAG,QAAQ;AACX,oBAAA,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;AAC1B,iBAAA,CAAC,CAAC;gBACH,aAAa,CAAC,UAAU,GAAG;AACzB,oBAAA,IAAI,aAAa,CAAC,UAAU,IAAI,EAAE,CAAC;AACnC,oBAAA,GAAG,gBAAgB;iBACpB;;;AAGP,KAAC,CAAC;IAEF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,aAAa,EAAE;AAC/C;AAEA,SAAS,qBAAqB,CAAC,EAC7B,SAAS,EACT,UAAU,EACV,eAAe,EACf,eAAe,EACf,MAAM,GAOP,EAAA;AACC,IAAA,OAAO,gBAAgB,EACrB,KAAK,EACL,IAAI,EACJ,OAAO,EACP,OAAO,GAAG,IAAI,EACd,UAAU,GAAG,CAAC,EACd,eAAe,EACf,MAAM,GAAG,KAAK,EACd,MAAM,GAAG,KAAK,EACd,IAAI,GAAG,KAAK,GAWb,EAAA;AACC,QAAA,IAAI;;AAEF,YAAA,MAAM,YAAY,GAAG,MAAM,uBAAuB,CAAC;gBACjD,SAAS;gBACT,KAAK;gBACL,IAAI;gBACJ,OAAO;gBACP,UAAU;gBACV,MAAM;gBACN,MAAM;gBACN,IAAI;gBACJ,MAAM;AACP,aAAA,CAAC;AAEF,YAAA,eAAe,GAAG,YAAY,CAAC;AAE/B,YAAA,MAAM,gBAAgB,GAAG,MAAM,eAAe,CAAC,cAAc,CAAC;gBAC5D,KAAK;gBACL,IAAI;AACJ,gBAAA,MAAM,EAAE,YAAY;gBACpB,OAAO;gBACP,eAAe;AACf,gBAAA,WAAW,EAAE,UAAU;AACxB,aAAA,CAAC;AAEF,YAAA,OAAO,gBAAgB,CAAC,gBAAgB,CAAC;;QACzC,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,CAAC,KAAK,CAAC,kBAAkB,EAAE,KAAK,CAAC;YACvC,OAAO;AACL,gBAAA,OAAO,EAAE,EAAE;AACX,gBAAA,UAAU,EAAE,EAAE;AACd,gBAAA,MAAM,EAAE,EAAE;AACV,gBAAA,MAAM,EAAE,EAAE;AACV,gBAAA,IAAI,EAAE,EAAE;AACR,gBAAA,eAAe,EAAE,EAAE;AACnB,gBAAA,KAAK,EAAE,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;aAC9D;;AAEL,KAAC;AACH;AAEA,SAAS,qBAAqB,CAAC,EAC7B,cAAc,EACd,eAAe,GAIhB,EAAA;AACC,IAAA,OAAO,UAAU,OAAuB,EAAA;QACtC,IAAI,CAAC,eAAe,EAAE;YACpB;;AAEF,QAAA,eAAe,CAAC,OAAO,EAAE,cAAc,CAAC;AAC1C,KAAC;AACH;AAEA,SAAS,UAAU,CAAC,EAClB,MAAM,EACN,MAAM,EACN,eAAe,EAAE,gBAAgB,GAKlC,EAAA;IACC,OAAO,IAAI,CACT,OAAO,MAAM,EAAE,cAAc,KAAI;AAC/B,QAAA,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM;AACjE,QAAA,MAAM,OAAO,GAAG,OAAO,EAAE,KAAK,QAAQ,IAAI,EAAE,GAAG,EAAE,GAAG,SAAS;AAC7D,QAAA,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC;YAChC,KAAK;YACL,IAAI;YACJ,OAAO;YACP,MAAM;YACN,MAAM;YACN,IAAI;YACJ,eAAe,EAAE,qBAAqB,CAAC;gBACrC,cAAc;AACd,gBAAA,eAAe,EAAE,gBAAgB;aAClC,CAAC;AACH,SAAA,CAAC;QACF,MAAM,IAAI,GAAG,cAAc,CAAC,QAAQ,EAAE,IAAI,IAAI,CAAC;AAC/C,QAAA,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,mBAAmB,CAAC,IAAI,EAAE,YAAY,CAAC;QACtE,MAAM,IAAI,GAAuB,EAAE,IAAI,EAAE,GAAG,YAAY,EAAE,UAAU,EAAE;AACtE,QAAA,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,SAAS,CAAC,UAAU,GAAG,IAAI,EAAE,CAAC;AACnD,KAAC,EACD;QACE,IAAI,EAAE,SAAS,CAAC,UAAU;AAC1B,QAAA,WAAW,EAAE,CAAA;;;;;;;;;;;;;;;;;;;;;;AAsBlB,CAAA,CAAC,IAAI,EAAE;AACF,QAAA,MAAM,EAAE,MAAM;QACd,cAAc,EAAE,SAAS,CAAC,oBAAoB;AAC/C,KAAA,CACF;AACH;AAEA;;;;;;AAMG;MACU,gBAAgB,GAAG,CAC9B,MAA6B,GAAA,EAAE,KACa;IAC5C,MAAM,EACJ,cAAc,GAAG,QAAQ,EACzB,YAAY,EACZ,kBAAkB,EAClB,aAAa,EACb,YAAY,GAAG,QAAQ,EACvB,UAAU,GAAG,CAAC,EACd,UAAU,GAAG,CAAC,eAAe,CAAC,EAC9B,aAAa,GAAG,IAAI,EACpB,UAAU,GAAG,CAAC,EACd,eAAe,EACf,eAAe,EACf,gBAAgB,EAChB,cAAc,EACd,UAAU,EACV,UAAU,EACV,YAAY,EACZ,eAAe,EAAE,gBAAgB,EACjC,eAAe,GAChB,GAAG,MAAM;IAEV,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,mBAAmB,EAAE;AAErD,IAAA,MAAM,YAAY,GAOd;AACF,QAAA,KAAK,EAAE,WAAW;AAClB,QAAA,IAAI,EAAE,UAAU;AAChB,QAAA,MAAM,EAAE,YAAY;AACpB,QAAA,MAAM,EAAE,YAAY;AACpB,QAAA,IAAI,EAAE,UAAU;KACjB;AAED,IAAA,IAAI,cAAc,KAAK,QAAQ,EAAE;AAC/B,QAAA,YAAY,CAAC,OAAO,GAAG,aAAa;;IAGtC,MAAM,UAAU,GAAG,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC;IAEzC,MAAM,SAAS,GAAG,eAAe,CAAC;QAChC,cAAc;QACd,YAAY;QACZ,kBAAkB;QAClB,aAAa;AACd,KAAA,CAAC;IAEF,MAAM,gBAAgB,GAAG,sBAAsB,CAAC;AAC9C,QAAA,GAAG,gBAAgB;AACnB,QAAA,MAAM,EAAE,eAAe,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB;AACxD,QAAA,MAAM,EAAE,eAAe;AACvB,QAAA,OAAO,EAAE,cAAc,IAAI,gBAAgB,EAAE,OAAO;QACpD,OAAO,EAAE,gBAAgB,EAAE,OAAO,IAAI,CAAC,UAAU,EAAE,SAAS,CAAC;AAC9D,KAAA,CAAC;IAEF,MAAM,gBAAgB,GAAG,cAAc,CAAC;QACtC,YAAY;QACZ,UAAU;QACV,UAAU;QACV,YAAY;QACZ,MAAM;AACP,KAAA,CAAC;IAEF,IAAI,CAAC,gBAAgB,EAAE;AACrB,QAAA,MAAM,CAAC,IAAI,CAAC,8CAA8C,CAAC;;IAG7D,MAAM,eAAe,GAAG,qBAAqB,CAC3C;AACE,QAAA,QAAQ,EAAE,gBAAgB;QAC1B,UAAU;QAGV,MAAM;KACP,EACD,gBAAgB,CACjB;IAED,MAAM,MAAM,GAAG,qBAAqB,CAAC;QACnC,SAAS;QACT,UAAU;QACV,eAAe;QACf,eAAe;QACf,MAAM;AACP,KAAA,CAAC;AAEF,IAAA,OAAO,UAAU,CAAC;QAChB,MAAM;AACN,QAAA,MAAM,EAAE,UAAU;AAClB,QAAA,eAAe,EAAE,gBAAgB;AAClC,KAAA,CAAC;AACJ;;;;"}
1
+ {"version":3,"file":"tool.mjs","sources":["../../../../src/tools/search/tool.ts"],"sourcesContent":["import { z } from 'zod';\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\nimport type { RunnableConfig } from '@langchain/core/runnables';\nimport type * as t from './types';\nimport {\n DATE_RANGE,\n querySchema,\n dateSchema,\n countrySchema,\n imagesSchema,\n videosSchema,\n newsSchema,\n} from './schema';\nimport { createSearchAPI, createSourceProcessor } from './search';\nimport { createSerperScraper } from './serper-scraper';\nimport { createFirecrawlScraper } from './firecrawl';\nimport { expandHighlights } from './highlights';\nimport { formatResultsForLLM } from './format';\nimport { createDefaultLogger } from './utils';\nimport { createReranker } from './rerankers';\nimport { Constants } from '@/common';\n\n/**\n * Executes parallel searches and merges the results\n */\nasync function executeParallelSearches({\n searchAPI,\n query,\n date,\n country,\n safeSearch,\n images,\n videos,\n news,\n logger,\n}: {\n searchAPI: ReturnType<typeof createSearchAPI>;\n query: string;\n date?: DATE_RANGE;\n country?: string;\n safeSearch: t.SearchToolConfig['safeSearch'];\n images: boolean;\n videos: boolean;\n news: boolean;\n logger: t.Logger;\n}): Promise<t.SearchResult> {\n // Prepare all search tasks to run in parallel\n const searchTasks: Promise<t.SearchResult>[] = [\n // Main search\n searchAPI.getSources({\n query,\n date,\n country,\n safeSearch,\n }),\n ];\n\n if (images) {\n searchTasks.push(\n searchAPI\n .getSources({\n query,\n date,\n country,\n safeSearch,\n type: 'images',\n })\n .catch((error) => {\n logger.error('Error fetching images:', error);\n return {\n success: false,\n error: `Images search failed: ${error instanceof Error ? error.message : String(error)}`,\n };\n })\n );\n }\n if (videos) {\n searchTasks.push(\n searchAPI\n .getSources({\n query,\n date,\n country,\n safeSearch,\n type: 'videos',\n })\n .catch((error) => {\n logger.error('Error fetching videos:', error);\n return {\n success: false,\n error: `Videos search failed: ${error instanceof Error ? error.message : String(error)}`,\n };\n })\n );\n }\n if (news) {\n searchTasks.push(\n searchAPI\n .getSources({\n query,\n date,\n country,\n safeSearch,\n type: 'news',\n })\n .catch((error) => {\n logger.error('Error fetching news:', error);\n return {\n success: false,\n error: `News search failed: ${error instanceof Error ? error.message : String(error)}`,\n };\n })\n );\n }\n\n // Run all searches in parallel\n const results = await Promise.all(searchTasks);\n\n // Get the main search result (first result)\n const mainResult = results[0];\n if (!mainResult.success) {\n throw new Error(mainResult.error ?? 'Search failed');\n }\n\n // Merge additional results with the main results\n const mergedResults = { ...mainResult.data };\n\n // Convert existing news to topStories if present\n if (mergedResults.news !== undefined && mergedResults.news.length > 0) {\n const existingNewsAsTopStories = mergedResults.news\n .filter((newsItem) => newsItem.link !== undefined && newsItem.link !== '')\n .map((newsItem) => ({\n title: newsItem.title ?? '',\n link: newsItem.link ?? '',\n source: newsItem.source ?? '',\n date: newsItem.date ?? '',\n imageUrl: newsItem.imageUrl ?? '',\n processed: false,\n }));\n mergedResults.topStories = [\n ...(mergedResults.topStories ?? []),\n ...existingNewsAsTopStories,\n ];\n delete mergedResults.news;\n }\n\n results.slice(1).forEach((result) => {\n if (result.success && result.data !== undefined) {\n if (result.data.images !== undefined && result.data.images.length > 0) {\n mergedResults.images = [\n ...(mergedResults.images ?? []),\n ...result.data.images,\n ];\n }\n if (result.data.videos !== undefined && result.data.videos.length > 0) {\n mergedResults.videos = [\n ...(mergedResults.videos ?? []),\n ...result.data.videos,\n ];\n }\n if (result.data.news !== undefined && result.data.news.length > 0) {\n const newsAsTopStories = result.data.news.map((newsItem) => ({\n ...newsItem,\n link: newsItem.link ?? '',\n }));\n mergedResults.topStories = [\n ...(mergedResults.topStories ?? []),\n ...newsAsTopStories,\n ];\n }\n }\n });\n\n return { success: true, data: mergedResults };\n}\n\nfunction createSearchProcessor({\n searchAPI,\n safeSearch,\n sourceProcessor,\n onGetHighlights,\n logger,\n}: {\n safeSearch: t.SearchToolConfig['safeSearch'];\n searchAPI: ReturnType<typeof createSearchAPI>;\n sourceProcessor: ReturnType<typeof createSourceProcessor>;\n onGetHighlights: t.SearchToolConfig['onGetHighlights'];\n logger: t.Logger;\n}) {\n return async function ({\n query,\n date,\n country,\n proMode = true,\n maxSources = 5,\n onSearchResults,\n images = false,\n videos = false,\n news = false,\n }: {\n query: string;\n country?: string;\n date?: DATE_RANGE;\n proMode?: boolean;\n maxSources?: number;\n onSearchResults: t.SearchToolConfig['onSearchResults'];\n images?: boolean;\n videos?: boolean;\n news?: boolean;\n }): Promise<t.SearchResultData> {\n try {\n // Execute parallel searches and merge results\n const searchResult = await executeParallelSearches({\n searchAPI,\n query,\n date,\n country,\n safeSearch,\n images,\n videos,\n news,\n logger,\n });\n\n onSearchResults?.(searchResult);\n\n const processedSources = await sourceProcessor.processSources({\n query,\n news,\n result: searchResult,\n proMode,\n onGetHighlights,\n numElements: maxSources,\n });\n\n return expandHighlights(processedSources);\n } catch (error) {\n logger.error('Error in search:', error);\n return {\n organic: [],\n topStories: [],\n images: [],\n videos: [],\n news: [],\n relatedSearches: [],\n error: error instanceof Error ? error.message : String(error),\n };\n }\n };\n}\n\nfunction createOnSearchResults({\n runnableConfig,\n onSearchResults,\n}: {\n runnableConfig: RunnableConfig;\n onSearchResults: t.SearchToolConfig['onSearchResults'];\n}) {\n return function (results: t.SearchResult): void {\n if (!onSearchResults) {\n return;\n }\n onSearchResults(results, runnableConfig);\n };\n}\n\nfunction createTool({\n schema,\n search,\n onSearchResults: _onSearchResults,\n}: {\n schema: t.SearchToolSchema;\n search: ReturnType<typeof createSearchProcessor>;\n onSearchResults: t.SearchToolConfig['onSearchResults'];\n}): DynamicStructuredTool<typeof schema> {\n return tool<typeof schema>(\n async (params, runnableConfig) => {\n const { query, date, country: _c, images, videos, news } = params;\n const country = typeof _c === 'string' && _c ? _c : undefined;\n const searchResult = await search({\n query,\n date,\n country,\n images,\n videos,\n news,\n onSearchResults: createOnSearchResults({\n runnableConfig,\n onSearchResults: _onSearchResults,\n }),\n });\n const turn = runnableConfig.toolCall?.turn ?? 0;\n const { output, references } = formatResultsForLLM(turn, searchResult);\n const data: t.SearchResultData = { turn, ...searchResult, references };\n return [output, { [Constants.WEB_SEARCH]: data }];\n },\n {\n name: Constants.WEB_SEARCH,\n description: `Real-time search. Results have required citation anchors.\n\nNote: Use ONCE per reply unless instructed otherwise.\n\nAnchors:\n- \\\\ue202turnXtypeY\n- X = turn idx, type = 'search' | 'news' | 'image' | 'ref', Y = item idx\n\nSpecial Markers:\n- \\\\ue203...\\\\ue204 — highlight start/end of cited text (for Standalone or Group citations)\n- \\\\ue200...\\\\ue201 — group block (e.g. \\\\ue200\\\\ue202turn0search1\\\\ue202turn0news2\\\\ue201)\n\n**CITE EVERY NON-OBVIOUS FACT/QUOTE:**\nUse anchor marker(s) immediately after the statement:\n- Standalone: \"Pure functions produce same output. \\\\ue202turn0search0\"\n- Standalone (multiple): \"Today's News \\\\ue202turn0search0\\\\ue202turn0news0\"\n- Highlight: \"\\\\ue203Highlight text.\\\\ue204\\\\ue202turn0news1\"\n- Group: \"Sources. \\\\ue200\\\\ue202turn0search0\\\\ue202turn0news1\\\\ue201\"\n- Group Highlight: \"\\\\ue203Highlight for group.\\\\ue204 \\\\ue200\\\\ue202turn0search0\\\\ue202turn0news1\\\\ue201\"\n- Image: \"See photo \\\\ue202turn0image0.\"\n\n**NEVER use markdown links, [1], or footnotes. CITE ONLY with anchors provided.**\n`.trim(),\n schema: schema,\n responseFormat: Constants.CONTENT_AND_ARTIFACT,\n }\n );\n}\n\n/**\n * Creates a search tool with a schema that dynamically includes the country field\n * only when the searchProvider is 'serper'.\n *\n * Supports multiple scraper providers:\n * - Firecrawl (default): Full-featured web scraping with multiple formats\n * - Serper: Lightweight scraping using Serper's scrape API\n *\n * @example\n * ```typescript\n * // Using Firecrawl scraper (default)\n * const searchTool = createSearchTool({\n * searchProvider: 'serper',\n * scraperProvider: 'firecrawl',\n * firecrawlApiKey: 'your-firecrawl-key'\n * });\n *\n * // Using Serper scraper\n * const searchTool = createSearchTool({\n * searchProvider: 'serper',\n * scraperProvider: 'serper',\n * serperApiKey: 'your-serper-key'\n * });\n * ```\n *\n * @param config - The search tool configuration\n * @returns A DynamicStructuredTool with a schema that depends on the searchProvider\n */\nexport const createSearchTool = (\n config: t.SearchToolConfig = {}\n): DynamicStructuredTool<typeof toolSchema> => {\n const {\n searchProvider = 'serper',\n serperApiKey,\n searxngInstanceUrl,\n searxngApiKey,\n rerankerType = 'cohere',\n topResults = 5,\n strategies = ['no_extraction'],\n filterContent = true,\n safeSearch = 1,\n scraperProvider = 'firecrawl',\n firecrawlApiKey,\n firecrawlApiUrl,\n firecrawlVersion,\n firecrawlOptions,\n serperScraperOptions,\n scraperTimeout,\n jinaApiKey,\n jinaApiUrl,\n cohereApiKey,\n onSearchResults: _onSearchResults,\n onGetHighlights,\n } = config;\n\n const logger = config.logger || createDefaultLogger();\n\n const schemaObject: {\n query: z.ZodString;\n date: z.ZodOptional<z.ZodNativeEnum<typeof DATE_RANGE>>;\n country?: z.ZodOptional<z.ZodString>;\n images: z.ZodOptional<z.ZodBoolean>;\n videos: z.ZodOptional<z.ZodBoolean>;\n news: z.ZodOptional<z.ZodBoolean>;\n } = {\n query: querySchema,\n date: dateSchema,\n images: imagesSchema,\n videos: videosSchema,\n news: newsSchema,\n };\n\n if (searchProvider === 'serper') {\n schemaObject.country = countrySchema;\n }\n\n const toolSchema = z.object(schemaObject);\n\n const searchAPI = createSearchAPI({\n searchProvider,\n serperApiKey,\n searxngInstanceUrl,\n searxngApiKey,\n });\n\n /** Create scraper based on scraperProvider */\n let scraperInstance: t.BaseScraper;\n\n if (scraperProvider === 'serper') {\n scraperInstance = createSerperScraper({\n ...serperScraperOptions,\n apiKey: serperApiKey,\n timeout: scraperTimeout ?? serperScraperOptions?.timeout,\n logger,\n });\n } else {\n scraperInstance = createFirecrawlScraper({\n ...firecrawlOptions,\n apiKey: firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY,\n apiUrl: firecrawlApiUrl,\n version: firecrawlVersion,\n timeout: scraperTimeout ?? firecrawlOptions?.timeout,\n formats: firecrawlOptions?.formats ?? ['markdown', 'rawHtml'],\n logger,\n });\n }\n\n const selectedReranker = createReranker({\n rerankerType,\n jinaApiKey,\n jinaApiUrl,\n cohereApiKey,\n logger,\n });\n\n if (!selectedReranker) {\n logger.warn('No reranker selected. Using default ranking.');\n }\n\n const sourceProcessor = createSourceProcessor(\n {\n reranker: selectedReranker,\n topResults,\n strategies,\n filterContent,\n logger,\n },\n scraperInstance\n );\n\n const search = createSearchProcessor({\n searchAPI,\n safeSearch,\n sourceProcessor,\n onGetHighlights,\n logger,\n });\n\n return createTool({\n search,\n schema: toolSchema,\n onSearchResults: _onSearchResults,\n });\n};\n"],"names":[],"mappings":";;;;;;;;;;;;AAsBA;;AAEG;AACH,eAAe,uBAAuB,CAAC,EACrC,SAAS,EACT,KAAK,EACL,IAAI,EACJ,OAAO,EACP,UAAU,EACV,MAAM,EACN,MAAM,EACN,IAAI,EACJ,MAAM,GAWP,EAAA;;AAEC,IAAA,MAAM,WAAW,GAA8B;;QAE7C,SAAS,CAAC,UAAU,CAAC;YACnB,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;SACX,CAAC;KACH;IAED,IAAI,MAAM,EAAE;QACV,WAAW,CAAC,IAAI,CACd;AACG,aAAA,UAAU,CAAC;YACV,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;AACV,YAAA,IAAI,EAAE,QAAQ;SACf;AACA,aAAA,KAAK,CAAC,CAAC,KAAK,KAAI;AACf,YAAA,MAAM,CAAC,KAAK,CAAC,wBAAwB,EAAE,KAAK,CAAC;YAC7C,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;AACd,gBAAA,KAAK,EAAE,CAAyB,sBAAA,EAAA,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAE,CAAA;aACzF;SACF,CAAC,CACL;;IAEH,IAAI,MAAM,EAAE;QACV,WAAW,CAAC,IAAI,CACd;AACG,aAAA,UAAU,CAAC;YACV,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;AACV,YAAA,IAAI,EAAE,QAAQ;SACf;AACA,aAAA,KAAK,CAAC,CAAC,KAAK,KAAI;AACf,YAAA,MAAM,CAAC,KAAK,CAAC,wBAAwB,EAAE,KAAK,CAAC;YAC7C,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;AACd,gBAAA,KAAK,EAAE,CAAyB,sBAAA,EAAA,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAE,CAAA;aACzF;SACF,CAAC,CACL;;IAEH,IAAI,IAAI,EAAE;QACR,WAAW,CAAC,IAAI,CACd;AACG,aAAA,UAAU,CAAC;YACV,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;AACV,YAAA,IAAI,EAAE,MAAM;SACb;AACA,aAAA,KAAK,CAAC,CAAC,KAAK,KAAI;AACf,YAAA,MAAM,CAAC,KAAK,CAAC,sBAAsB,EAAE,KAAK,CAAC;YAC3C,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;AACd,gBAAA,KAAK,EAAE,CAAuB,oBAAA,EAAA,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAE,CAAA;aACvF;SACF,CAAC,CACL;;;IAIH,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;;AAG9C,IAAA,MAAM,UAAU,GAAG,OAAO,CAAC,CAAC,CAAC;AAC7B,IAAA,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE;QACvB,MAAM,IAAI,KAAK,CAAC,UAAU,CAAC,KAAK,IAAI,eAAe,CAAC;;;IAItD,MAAM,aAAa,GAAG,EAAE,GAAG,UAAU,CAAC,IAAI,EAAE;;AAG5C,IAAA,IAAI,aAAa,CAAC,IAAI,KAAK,SAAS,IAAI,aAAa,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;AACrE,QAAA,MAAM,wBAAwB,GAAG,aAAa,CAAC;AAC5C,aAAA,MAAM,CAAC,CAAC,QAAQ,KAAK,QAAQ,CAAC,IAAI,KAAK,SAAS,IAAI,QAAQ,CAAC,IAAI,KAAK,EAAE;AACxE,aAAA,GAAG,CAAC,CAAC,QAAQ,MAAM;AAClB,YAAA,KAAK,EAAE,QAAQ,CAAC,KAAK,IAAI,EAAE;AAC3B,YAAA,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;AACzB,YAAA,MAAM,EAAE,QAAQ,CAAC,MAAM,IAAI,EAAE;AAC7B,YAAA,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;AACzB,YAAA,QAAQ,EAAE,QAAQ,CAAC,QAAQ,IAAI,EAAE;AACjC,YAAA,SAAS,EAAE,KAAK;AACjB,SAAA,CAAC,CAAC;QACL,aAAa,CAAC,UAAU,GAAG;AACzB,YAAA,IAAI,aAAa,CAAC,UAAU,IAAI,EAAE,CAAC;AACnC,YAAA,GAAG,wBAAwB;SAC5B;QACD,OAAO,aAAa,CAAC,IAAI;;IAG3B,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,KAAI;QAClC,IAAI,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,IAAI,KAAK,SAAS,EAAE;AAC/C,YAAA,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE;gBACrE,aAAa,CAAC,MAAM,GAAG;AACrB,oBAAA,IAAI,aAAa,CAAC,MAAM,IAAI,EAAE,CAAC;AAC/B,oBAAA,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM;iBACtB;;AAEH,YAAA,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE;gBACrE,aAAa,CAAC,MAAM,GAAG;AACrB,oBAAA,IAAI,aAAa,CAAC,MAAM,IAAI,EAAE,CAAC;AAC/B,oBAAA,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM;iBACtB;;AAEH,YAAA,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;AACjE,gBAAA,MAAM,gBAAgB,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,QAAQ,MAAM;AAC3D,oBAAA,GAAG,QAAQ;AACX,oBAAA,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;AAC1B,iBAAA,CAAC,CAAC;gBACH,aAAa,CAAC,UAAU,GAAG;AACzB,oBAAA,IAAI,aAAa,CAAC,UAAU,IAAI,EAAE,CAAC;AACnC,oBAAA,GAAG,gBAAgB;iBACpB;;;AAGP,KAAC,CAAC;IAEF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,aAAa,EAAE;AAC/C;AAEA,SAAS,qBAAqB,CAAC,EAC7B,SAAS,EACT,UAAU,EACV,eAAe,EACf,eAAe,EACf,MAAM,GAOP,EAAA;AACC,IAAA,OAAO,gBAAgB,EACrB,KAAK,EACL,IAAI,EACJ,OAAO,EACP,OAAO,GAAG,IAAI,EACd,UAAU,GAAG,CAAC,EACd,eAAe,EACf,MAAM,GAAG,KAAK,EACd,MAAM,GAAG,KAAK,EACd,IAAI,GAAG,KAAK,GAWb,EAAA;AACC,QAAA,IAAI;;AAEF,YAAA,MAAM,YAAY,GAAG,MAAM,uBAAuB,CAAC;gBACjD,SAAS;gBACT,KAAK;gBACL,IAAI;gBACJ,OAAO;gBACP,UAAU;gBACV,MAAM;gBACN,MAAM;gBACN,IAAI;gBACJ,MAAM;AACP,aAAA,CAAC;AAEF,YAAA,eAAe,GAAG,YAAY,CAAC;AAE/B,YAAA,MAAM,gBAAgB,GAAG,MAAM,eAAe,CAAC,cAAc,CAAC;gBAC5D,KAAK;gBACL,IAAI;AACJ,gBAAA,MAAM,EAAE,YAAY;gBACpB,OAAO;gBACP,eAAe;AACf,gBAAA,WAAW,EAAE,UAAU;AACxB,aAAA,CAAC;AAEF,YAAA,OAAO,gBAAgB,CAAC,gBAAgB,CAAC;;QACzC,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,CAAC,KAAK,CAAC,kBAAkB,EAAE,KAAK,CAAC;YACvC,OAAO;AACL,gBAAA,OAAO,EAAE,EAAE;AACX,gBAAA,UAAU,EAAE,EAAE;AACd,gBAAA,MAAM,EAAE,EAAE;AACV,gBAAA,MAAM,EAAE,EAAE;AACV,gBAAA,IAAI,EAAE,EAAE;AACR,gBAAA,eAAe,EAAE,EAAE;AACnB,gBAAA,KAAK,EAAE,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;aAC9D;;AAEL,KAAC;AACH;AAEA,SAAS,qBAAqB,CAAC,EAC7B,cAAc,EACd,eAAe,GAIhB,EAAA;AACC,IAAA,OAAO,UAAU,OAAuB,EAAA;QACtC,IAAI,CAAC,eAAe,EAAE;YACpB;;AAEF,QAAA,eAAe,CAAC,OAAO,EAAE,cAAc,CAAC;AAC1C,KAAC;AACH;AAEA,SAAS,UAAU,CAAC,EAClB,MAAM,EACN,MAAM,EACN,eAAe,EAAE,gBAAgB,GAKlC,EAAA;IACC,OAAO,IAAI,CACT,OAAO,MAAM,EAAE,cAAc,KAAI;AAC/B,QAAA,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM;AACjE,QAAA,MAAM,OAAO,GAAG,OAAO,EAAE,KAAK,QAAQ,IAAI,EAAE,GAAG,EAAE,GAAG,SAAS;AAC7D,QAAA,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC;YAChC,KAAK;YACL,IAAI;YACJ,OAAO;YACP,MAAM;YACN,MAAM;YACN,IAAI;YACJ,eAAe,EAAE,qBAAqB,CAAC;gBACrC,cAAc;AACd,gBAAA,eAAe,EAAE,gBAAgB;aAClC,CAAC;AACH,SAAA,CAAC;QACF,MAAM,IAAI,GAAG,cAAc,CAAC,QAAQ,EAAE,IAAI,IAAI,CAAC;AAC/C,QAAA,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,mBAAmB,CAAC,IAAI,EAAE,YAAY,CAAC;QACtE,MAAM,IAAI,GAAuB,EAAE,IAAI,EAAE,GAAG,YAAY,EAAE,UAAU,EAAE;AACtE,QAAA,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,SAAS,CAAC,UAAU,GAAG,IAAI,EAAE,CAAC;AACnD,KAAC,EACD;QACE,IAAI,EAAE,SAAS,CAAC,UAAU;AAC1B,QAAA,WAAW,EAAE,CAAA;;;;;;;;;;;;;;;;;;;;;;AAsBlB,CAAA,CAAC,IAAI,EAAE;AACF,QAAA,MAAM,EAAE,MAAM;QACd,cAAc,EAAE,SAAS,CAAC,oBAAoB;AAC/C,KAAA,CACF;AACH;AAEA;;;;;;;;;;;;;;;;;;;;;;;;;;;AA2BG;MACU,gBAAgB,GAAG,CAC9B,MAA6B,GAAA,EAAE,KACa;AAC5C,IAAA,MAAM,EACJ,cAAc,GAAG,QAAQ,EACzB,YAAY,EACZ,kBAAkB,EAClB,aAAa,EACb,YAAY,GAAG,QAAQ,EACvB,UAAU,GAAG,CAAC,EACd,UAAU,GAAG,CAAC,eAAe,CAAC,EAC9B,aAAa,GAAG,IAAI,EACpB,UAAU,GAAG,CAAC,EACd,eAAe,GAAG,WAAW,EAC7B,eAAe,EACf,eAAe,EACf,gBAAgB,EAChB,gBAAgB,EAChB,oBAAoB,EACpB,cAAc,EACd,UAAU,EACV,UAAU,EACV,YAAY,EACZ,eAAe,EAAE,gBAAgB,EACjC,eAAe,GAChB,GAAG,MAAM;IAEV,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,mBAAmB,EAAE;AAErD,IAAA,MAAM,YAAY,GAOd;AACF,QAAA,KAAK,EAAE,WAAW;AAClB,QAAA,IAAI,EAAE,UAAU;AAChB,QAAA,MAAM,EAAE,YAAY;AACpB,QAAA,MAAM,EAAE,YAAY;AACpB,QAAA,IAAI,EAAE,UAAU;KACjB;AAED,IAAA,IAAI,cAAc,KAAK,QAAQ,EAAE;AAC/B,QAAA,YAAY,CAAC,OAAO,GAAG,aAAa;;IAGtC,MAAM,UAAU,GAAG,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC;IAEzC,MAAM,SAAS,GAAG,eAAe,CAAC;QAChC,cAAc;QACd,YAAY;QACZ,kBAAkB;QAClB,aAAa;AACd,KAAA,CAAC;;AAGF,IAAA,IAAI,eAA8B;AAElC,IAAA,IAAI,eAAe,KAAK,QAAQ,EAAE;QAChC,eAAe,GAAG,mBAAmB,CAAC;AACpC,YAAA,GAAG,oBAAoB;AACvB,YAAA,MAAM,EAAE,YAAY;AACpB,YAAA,OAAO,EAAE,cAAc,IAAI,oBAAoB,EAAE,OAAO;YACxD,MAAM;AACP,SAAA,CAAC;;SACG;QACL,eAAe,GAAG,sBAAsB,CAAC;AACvC,YAAA,GAAG,gBAAgB;AACnB,YAAA,MAAM,EAAE,eAAe,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB;AACxD,YAAA,MAAM,EAAE,eAAe;AACvB,YAAA,OAAO,EAAE,gBAAgB;AACzB,YAAA,OAAO,EAAE,cAAc,IAAI,gBAAgB,EAAE,OAAO;YACpD,OAAO,EAAE,gBAAgB,EAAE,OAAO,IAAI,CAAC,UAAU,EAAE,SAAS,CAAC;YAC7D,MAAM;AACP,SAAA,CAAC;;IAGJ,MAAM,gBAAgB,GAAG,cAAc,CAAC;QACtC,YAAY;QACZ,UAAU;QACV,UAAU;QACV,YAAY;QACZ,MAAM;AACP,KAAA,CAAC;IAEF,IAAI,CAAC,gBAAgB,EAAE;AACrB,QAAA,MAAM,CAAC,IAAI,CAAC,8CAA8C,CAAC;;IAG7D,MAAM,eAAe,GAAG,qBAAqB,CAC3C;AACE,QAAA,QAAQ,EAAE,gBAAgB;QAC1B,UAAU;QAGV,MAAM;KACP,EACD,eAAe,CAChB;IAED,MAAM,MAAM,GAAG,qBAAqB,CAAC;QACnC,SAAS;QACT,UAAU;QACV,eAAe;QACf,eAAe;QACf,MAAM;AACP,KAAA,CAAC;AAEF,IAAA,OAAO,UAAU,CAAC;QAChB,MAAM;AACN,QAAA,MAAM,EAAE,UAAU;AAClB,QAAA,eAAe,EAAE,gBAAgB;AAClC,KAAA,CAAC;AACJ;;;;"}
@@ -3,9 +3,10 @@ import type * as t from './types';
3
3
  * Firecrawl scraper implementation
4
4
  * Uses the Firecrawl API to scrape web pages
5
5
  */
6
- export declare class FirecrawlScraper {
6
+ export declare class FirecrawlScraper implements t.BaseScraper {
7
7
  private apiKey;
8
8
  private apiUrl;
9
+ private version;
9
10
  private defaultFormats;
10
11
  private timeout;
11
12
  private logger;
@@ -1,9 +1,8 @@
1
1
  import type * as t from './types';
2
- import { FirecrawlScraper } from './firecrawl';
3
2
  export declare const createSearchAPI: (config: t.SearchConfig) => {
4
3
  getSources: (params: t.GetSourcesParams) => Promise<t.SearchResult>;
5
4
  };
6
- export declare const createSourceProcessor: (config?: t.ProcessSourcesConfig, scraperInstance?: FirecrawlScraper) => {
5
+ export declare const createSourceProcessor: (config?: t.ProcessSourcesConfig, scraperInstance?: t.BaseScraper) => {
7
6
  processSources: (fields: t.ProcessSourcesFields) => Promise<t.SearchResultData>;
8
7
  topResults: number;
9
8
  };
@@ -0,0 +1,59 @@
1
+ import type * as t from './types';
2
+ /**
3
+ * Serper scraper implementation
4
+ * Uses the Serper Scrape API (https://scrape.serper.dev) to scrape web pages
5
+ *
6
+ * Features:
7
+ * - Simple API with single endpoint
8
+ * - Returns both text and markdown content
9
+ * - Includes metadata from scraped pages
10
+ * - Credits-based pricing model
11
+ *
12
+ * @example
13
+ * ```typescript
14
+ * const scraper = createSerperScraper({
15
+ * apiKey: 'your-serper-api-key',
16
+ * includeMarkdown: true,
17
+ * timeout: 10000
18
+ * });
19
+ *
20
+ * const [url, response] = await scraper.scrapeUrl('https://example.com');
21
+ * if (response.success) {
22
+ * const [content] = scraper.extractContent(response);
23
+ * console.log(content);
24
+ * }
25
+ * ```
26
+ */
27
+ export declare class SerperScraper implements t.BaseScraper {
28
+ private apiKey;
29
+ private apiUrl;
30
+ private timeout;
31
+ private logger;
32
+ private includeMarkdown;
33
+ constructor(config?: t.SerperScraperConfig);
34
+ /**
35
+ * Scrape a single URL
36
+ * @param url URL to scrape
37
+ * @param options Scrape options
38
+ * @returns Scrape response
39
+ */
40
+ scrapeUrl(url: string, options?: t.SerperScrapeOptions): Promise<[string, t.SerperScrapeResponse]>;
41
+ /**
42
+ * Extract content from scrape response
43
+ * @param response Scrape response
44
+ * @returns Extracted content or empty string if not available
45
+ */
46
+ extractContent(response: t.SerperScrapeResponse): [string, undefined | t.References];
47
+ /**
48
+ * Extract metadata from scrape response
49
+ * @param response Scrape response
50
+ * @returns Metadata object
51
+ */
52
+ extractMetadata(response: t.SerperScrapeResponse): Record<string, string | number | boolean | null | undefined>;
53
+ }
54
+ /**
55
+ * Create a Serper scraper instance
56
+ * @param config Scraper configuration
57
+ * @returns Serper scraper instance
58
+ */
59
+ export declare const createSerperScraper: (config?: t.SerperScraperConfig) => SerperScraper;
@@ -6,6 +6,27 @@ import { DATE_RANGE } from './schema';
6
6
  * Creates a search tool with a schema that dynamically includes the country field
7
7
  * only when the searchProvider is 'serper'.
8
8
  *
9
+ * Supports multiple scraper providers:
10
+ * - Firecrawl (default): Full-featured web scraping with multiple formats
11
+ * - Serper: Lightweight scraping using Serper's scrape API
12
+ *
13
+ * @example
14
+ * ```typescript
15
+ * // Using Firecrawl scraper (default)
16
+ * const searchTool = createSearchTool({
17
+ * searchProvider: 'serper',
18
+ * scraperProvider: 'firecrawl',
19
+ * firecrawlApiKey: 'your-firecrawl-key'
20
+ * });
21
+ *
22
+ * // Using Serper scraper
23
+ * const searchTool = createSearchTool({
24
+ * searchProvider: 'serper',
25
+ * scraperProvider: 'serper',
26
+ * serperApiKey: 'your-serper-key'
27
+ * });
28
+ * ```
29
+ *
9
30
  * @param config - The search tool configuration
10
31
  * @returns A DynamicStructuredTool with a schema that depends on the searchProvider
11
32
  */
@@ -4,6 +4,7 @@ import type { RunnableConfig } from '@langchain/core/runnables';
4
4
  import type { BaseReranker } from './rerankers';
5
5
  import { DATE_RANGE } from './schema';
6
6
  export type SearchProvider = 'serper' | 'searxng';
7
+ export type ScraperProvider = 'firecrawl' | 'serper';
7
8
  export type RerankerType = 'infinity' | 'jina' | 'cohere' | 'none';
8
9
  export interface Highlight {
9
10
  score: number;
@@ -85,8 +86,16 @@ export interface ProcessSourcesConfig {
85
86
  export interface FirecrawlConfig {
86
87
  firecrawlApiKey?: string;
87
88
  firecrawlApiUrl?: string;
89
+ firecrawlVersion?: string;
88
90
  firecrawlOptions?: FirecrawlScraperConfig;
89
91
  }
92
+ export interface SerperScraperConfig {
93
+ apiKey?: string;
94
+ apiUrl?: string;
95
+ timeout?: number;
96
+ logger?: Logger;
97
+ includeMarkdown?: boolean;
98
+ }
90
99
  export interface ScraperContentResult {
91
100
  content: string;
92
101
  }
@@ -133,7 +142,9 @@ export interface SearchToolConfig extends SearchConfig, ProcessSourcesConfig, Fi
133
142
  jinaApiUrl?: string;
134
143
  cohereApiKey?: string;
135
144
  rerankerType?: RerankerType;
145
+ scraperProvider?: ScraperProvider;
136
146
  scraperTimeout?: number;
147
+ serperScraperOptions?: SerperScraperConfig;
137
148
  onSearchResults?: (results: SearchResult, runnableConfig?: RunnableConfig) => void;
138
149
  onGetHighlights?: (link: string) => void;
139
150
  }
@@ -147,8 +158,15 @@ export type UsedReferences = {
147
158
  originalIndex: number;
148
159
  reference: MediaReference;
149
160
  }[];
161
+ /** Base Scraper Interface */
162
+ export interface BaseScraper {
163
+ scrapeUrl(url: string, options?: unknown): Promise<[string, FirecrawlScrapeResponse | SerperScrapeResponse]>;
164
+ extractContent(response: FirecrawlScrapeResponse | SerperScrapeResponse): [string, undefined | References];
165
+ extractMetadata(response: FirecrawlScrapeResponse | SerperScrapeResponse): ScrapeMetadata | Record<string, string | number | boolean | null | undefined>;
166
+ }
150
167
  /** Firecrawl */
151
- export type FirecrawlScrapeOptions = Omit<FirecrawlScraperConfig, 'apiKey' | 'apiUrl' | 'logger'>;
168
+ export type FirecrawlScrapeOptions = Omit<FirecrawlScraperConfig, 'apiKey' | 'apiUrl' | 'version' | 'logger'>;
169
+ export type SerperScrapeOptions = Omit<SerperScraperConfig, 'apiKey' | 'apiUrl' | 'logger'>;
152
170
  export interface ScrapeMetadata {
153
171
  sourceURL?: string;
154
172
  url?: string;
@@ -214,9 +232,20 @@ export interface FirecrawlScrapeResponse {
214
232
  };
215
233
  error?: string;
216
234
  }
235
+ export interface SerperScrapeResponse {
236
+ success: boolean;
237
+ data?: {
238
+ text?: string;
239
+ markdown?: string;
240
+ metadata?: Record<string, string | number | boolean | null | undefined>;
241
+ credits?: number;
242
+ };
243
+ error?: string;
244
+ }
217
245
  export interface FirecrawlScraperConfig {
218
246
  apiKey?: string;
219
247
  apiUrl?: string;
248
+ version?: string;
220
249
  formats?: string[];
221
250
  timeout?: number;
222
251
  logger?: Logger;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@librechat/agents",
3
- "version": "2.4.83",
3
+ "version": "2.4.84",
4
4
  "main": "./dist/cjs/main.cjs",
5
5
  "module": "./dist/esm/main.mjs",
6
6
  "types": "./dist/types/index.d.ts",
@@ -83,7 +83,11 @@ async function testStandardStreaming(): Promise<void> {
83
83
  graphConfig: {
84
84
  type: 'standard',
85
85
  llmConfig,
86
- tools: [createSearchTool()],
86
+ tools: [
87
+ createSearchTool({
88
+ scraperProvider: 'serper',
89
+ }),
90
+ ],
87
91
  instructions:
88
92
  'You are a friendly AI assistant. Always address the user by their name.',
89
93
  // additional_instructions: `The user's name is ${userName} and they are located in ${location}.`,
@@ -7,9 +7,10 @@ import { createDefaultLogger } from './utils';
7
7
  * Firecrawl scraper implementation
8
8
  * Uses the Firecrawl API to scrape web pages
9
9
  */
10
- export class FirecrawlScraper {
10
+ export class FirecrawlScraper implements t.BaseScraper {
11
11
  private apiKey: string;
12
12
  private apiUrl: string;
13
+ private version: string;
13
14
  private defaultFormats: string[];
14
15
  private timeout: number;
15
16
  private logger: t.Logger;
@@ -32,11 +33,13 @@ export class FirecrawlScraper {
32
33
  constructor(config: t.FirecrawlScraperConfig = {}) {
33
34
  this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';
34
35
 
36
+ this.version = config.version ?? 'v2';
37
+
35
38
  const baseUrl =
36
39
  config.apiUrl ??
37
40
  process.env.FIRECRAWL_BASE_URL ??
38
41
  'https://api.firecrawl.dev';
39
- this.apiUrl = `${baseUrl.replace(/\/+$/, '')}/v1/scrape`;
42
+ this.apiUrl = `${baseUrl.replace(/\/+$/, '')}/${this.version}/scrape`;
40
43
 
41
44
  this.defaultFormats = config.formats ?? ['markdown', 'rawHtml'];
42
45
  this.timeout = config.timeout ?? 7500;
@@ -2,7 +2,6 @@ import axios from 'axios';
2
2
  import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
3
3
  import type * as t from './types';
4
4
  import { getAttribution, createDefaultLogger } from './utils';
5
- import { FirecrawlScraper } from './firecrawl';
6
5
  import { BaseReranker } from './rerankers';
7
6
 
8
7
  const chunker = {
@@ -434,7 +433,7 @@ export const createSearchAPI = (
434
433
 
435
434
  export const createSourceProcessor = (
436
435
  config: t.ProcessSourcesConfig = {},
437
- scraperInstance?: FirecrawlScraper
436
+ scraperInstance?: t.BaseScraper
438
437
  ): {
439
438
  processSources: (
440
439
  fields: t.ProcessSourcesFields
@@ -442,7 +441,7 @@ export const createSourceProcessor = (
442
441
  topResults: number;
443
442
  } => {
444
443
  if (!scraperInstance) {
445
- throw new Error('Firecrawl scraper instance is required');
444
+ throw new Error('Scraper instance is required');
446
445
  }
447
446
  const {
448
447
  topResults = 5,
@@ -453,7 +452,7 @@ export const createSourceProcessor = (
453
452
  } = config;
454
453
 
455
454
  const logger_ = logger || createDefaultLogger();
456
- const firecrawlScraper = scraperInstance;
455
+ const scraper = scraperInstance;
457
456
 
458
457
  const webScraper = {
459
458
  scrapeMany: async ({
@@ -465,12 +464,12 @@ export const createSourceProcessor = (
465
464
  links: string[];
466
465
  onGetHighlights: t.SearchToolConfig['onGetHighlights'];
467
466
  }): Promise<Array<t.ScrapeResult>> => {
468
- logger_.debug(`Scraping ${links.length} links with Firecrawl`);
467
+ logger_.debug(`Scraping ${links.length} links`);
469
468
  const promises: Array<Promise<t.ScrapeResult>> = [];
470
469
  try {
471
470
  for (let i = 0; i < links.length; i++) {
472
471
  const currentLink = links[i];
473
- const promise: Promise<t.ScrapeResult> = firecrawlScraper
472
+ const promise: Promise<t.ScrapeResult> = scraper
474
473
  .scrapeUrl(currentLink, {})
475
474
  .then(([url, response]) => {
476
475
  const attribution = getAttribution(
@@ -479,8 +478,7 @@ export const createSourceProcessor = (
479
478
  logger_
480
479
  );
481
480
  if (response.success && response.data) {
482
- const [content, references] =
483
- firecrawlScraper.extractContent(response);
481
+ const [content, references] = scraper.extractContent(response);
484
482
  return {
485
483
  url,
486
484
  references,
@@ -0,0 +1,155 @@
1
+ import axios from 'axios';
2
+ import type * as t from './types';
3
+ import { createDefaultLogger } from './utils';
4
+
5
+ /**
6
+ * Serper scraper implementation
7
+ * Uses the Serper Scrape API (https://scrape.serper.dev) to scrape web pages
8
+ *
9
+ * Features:
10
+ * - Simple API with single endpoint
11
+ * - Returns both text and markdown content
12
+ * - Includes metadata from scraped pages
13
+ * - Credits-based pricing model
14
+ *
15
+ * @example
16
+ * ```typescript
17
+ * const scraper = createSerperScraper({
18
+ * apiKey: 'your-serper-api-key',
19
+ * includeMarkdown: true,
20
+ * timeout: 10000
21
+ * });
22
+ *
23
+ * const [url, response] = await scraper.scrapeUrl('https://example.com');
24
+ * if (response.success) {
25
+ * const [content] = scraper.extractContent(response);
26
+ * console.log(content);
27
+ * }
28
+ * ```
29
+ */
30
+ export class SerperScraper implements t.BaseScraper {
31
+ private apiKey: string;
32
+ private apiUrl: string;
33
+ private timeout: number;
34
+ private logger: t.Logger;
35
+ private includeMarkdown: boolean;
36
+
37
+ constructor(config: t.SerperScraperConfig = {}) {
38
+ this.apiKey = config.apiKey ?? process.env.SERPER_API_KEY ?? '';
39
+
40
+ this.apiUrl =
41
+ config.apiUrl ??
42
+ process.env.SERPER_SCRAPE_URL ??
43
+ 'https://scrape.serper.dev';
44
+
45
+ this.timeout = config.timeout ?? 7500;
46
+ this.includeMarkdown = config.includeMarkdown ?? true;
47
+
48
+ this.logger = config.logger || createDefaultLogger();
49
+
50
+ if (!this.apiKey) {
51
+ this.logger.warn('SERPER_API_KEY is not set. Scraping will not work.');
52
+ }
53
+
54
+ this.logger.debug(
55
+ `Serper scraper initialized with API URL: ${this.apiUrl}`
56
+ );
57
+ }
58
+
59
+ /**
60
+ * Scrape a single URL
61
+ * @param url URL to scrape
62
+ * @param options Scrape options
63
+ * @returns Scrape response
64
+ */
65
+ async scrapeUrl(
66
+ url: string,
67
+ options: t.SerperScrapeOptions = {}
68
+ ): Promise<[string, t.SerperScrapeResponse]> {
69
+ if (!this.apiKey) {
70
+ return [
71
+ url,
72
+ {
73
+ success: false,
74
+ error: 'SERPER_API_KEY is not set',
75
+ },
76
+ ];
77
+ }
78
+
79
+ try {
80
+ const payload = {
81
+ url,
82
+ includeMarkdown: options.includeMarkdown ?? this.includeMarkdown,
83
+ };
84
+
85
+ const response = await axios.post(this.apiUrl, payload, {
86
+ headers: {
87
+ 'X-API-KEY': this.apiKey,
88
+ 'Content-Type': 'application/json',
89
+ },
90
+ timeout: options.timeout ?? this.timeout,
91
+ });
92
+
93
+ return [url, { success: true, data: response.data }];
94
+ } catch (error) {
95
+ const errorMessage =
96
+ error instanceof Error ? error.message : String(error);
97
+ return [
98
+ url,
99
+ {
100
+ success: false,
101
+ error: `Serper Scrape API request failed: ${errorMessage}`,
102
+ },
103
+ ];
104
+ }
105
+ }
106
+
107
+ /**
108
+ * Extract content from scrape response
109
+ * @param response Scrape response
110
+ * @returns Extracted content or empty string if not available
111
+ */
112
+ extractContent(
113
+ response: t.SerperScrapeResponse
114
+ ): [string, undefined | t.References] {
115
+ if (!response.success || !response.data) {
116
+ return ['', undefined];
117
+ }
118
+
119
+ if (response.data.markdown != null) {
120
+ return [response.data.markdown, undefined];
121
+ }
122
+
123
+ if (response.data.text != null) {
124
+ return [response.data.text, undefined];
125
+ }
126
+
127
+ return ['', undefined];
128
+ }
129
+
130
+ /**
131
+ * Extract metadata from scrape response
132
+ * @param response Scrape response
133
+ * @returns Metadata object
134
+ */
135
+ extractMetadata(
136
+ response: t.SerperScrapeResponse
137
+ ): Record<string, string | number | boolean | null | undefined> {
138
+ if (!response.success || !response.data || !response.data.metadata) {
139
+ return {};
140
+ }
141
+
142
+ return response.data.metadata;
143
+ }
144
+ }
145
+
146
+ /**
147
+ * Create a Serper scraper instance
148
+ * @param config Scraper configuration
149
+ * @returns Serper scraper instance
150
+ */
151
+ export const createSerperScraper = (
152
+ config: t.SerperScraperConfig = {}
153
+ ): SerperScraper => {
154
+ return new SerperScraper(config);
155
+ };