@librechat/agents 2.4.62 → 2.4.63
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/tools/search/firecrawl.cjs +54 -8
- package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +4 -3
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/esm/tools/search/firecrawl.mjs +54 -8
- package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +4 -3
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/types/tools/search/firecrawl.d.ts +15 -0
- package/dist/types/tools/search/types.d.ts +20 -9
- package/package.json +3 -3
- package/src/tools/search/firecrawl.ts +68 -19
- package/src/tools/search/tool.ts +4 -3
- package/src/tools/search/types.ts +20 -10
|
@@ -14,15 +14,45 @@ class FirecrawlScraper {
|
|
|
14
14
|
defaultFormats;
|
|
15
15
|
timeout;
|
|
16
16
|
logger;
|
|
17
|
+
includeTags;
|
|
18
|
+
excludeTags;
|
|
19
|
+
waitFor;
|
|
20
|
+
maxAge;
|
|
21
|
+
mobile;
|
|
22
|
+
skipTlsVerification;
|
|
23
|
+
blockAds;
|
|
24
|
+
removeBase64Images;
|
|
25
|
+
parsePDF;
|
|
26
|
+
storeInCache;
|
|
27
|
+
zeroDataRetention;
|
|
28
|
+
headers;
|
|
29
|
+
location;
|
|
30
|
+
onlyMainContent;
|
|
31
|
+
changeTrackingOptions;
|
|
17
32
|
constructor(config = {}) {
|
|
18
33
|
this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';
|
|
19
34
|
const baseUrl = config.apiUrl ??
|
|
20
35
|
process.env.FIRECRAWL_BASE_URL ??
|
|
21
36
|
'https://api.firecrawl.dev';
|
|
22
37
|
this.apiUrl = `${baseUrl.replace(/\/+$/, '')}/v1/scrape`;
|
|
23
|
-
this.defaultFormats = config.formats ?? ['markdown', '
|
|
38
|
+
this.defaultFormats = config.formats ?? ['markdown', 'rawHtml'];
|
|
24
39
|
this.timeout = config.timeout ?? 7500;
|
|
25
40
|
this.logger = config.logger || utils.createDefaultLogger();
|
|
41
|
+
this.includeTags = config.includeTags;
|
|
42
|
+
this.excludeTags = config.excludeTags;
|
|
43
|
+
this.waitFor = config.waitFor;
|
|
44
|
+
this.maxAge = config.maxAge;
|
|
45
|
+
this.mobile = config.mobile;
|
|
46
|
+
this.skipTlsVerification = config.skipTlsVerification;
|
|
47
|
+
this.blockAds = config.blockAds;
|
|
48
|
+
this.removeBase64Images = config.removeBase64Images;
|
|
49
|
+
this.parsePDF = config.parsePDF;
|
|
50
|
+
this.storeInCache = config.storeInCache;
|
|
51
|
+
this.zeroDataRetention = config.zeroDataRetention;
|
|
52
|
+
this.headers = config.headers;
|
|
53
|
+
this.location = config.location;
|
|
54
|
+
this.onlyMainContent = config.onlyMainContent;
|
|
55
|
+
this.changeTrackingOptions = config.changeTrackingOptions;
|
|
26
56
|
if (!this.apiKey) {
|
|
27
57
|
this.logger.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');
|
|
28
58
|
}
|
|
@@ -45,15 +75,27 @@ class FirecrawlScraper {
|
|
|
45
75
|
];
|
|
46
76
|
}
|
|
47
77
|
try {
|
|
48
|
-
const
|
|
78
|
+
const payload = omitUndefined({
|
|
49
79
|
url,
|
|
50
|
-
formats: options.formats
|
|
51
|
-
includeTags: options.includeTags,
|
|
52
|
-
excludeTags: options.excludeTags,
|
|
53
|
-
headers: options.headers,
|
|
54
|
-
waitFor: options.waitFor,
|
|
80
|
+
formats: options.formats ?? this.defaultFormats,
|
|
81
|
+
includeTags: options.includeTags ?? this.includeTags,
|
|
82
|
+
excludeTags: options.excludeTags ?? this.excludeTags,
|
|
83
|
+
headers: options.headers ?? this.headers,
|
|
84
|
+
waitFor: options.waitFor ?? this.waitFor,
|
|
55
85
|
timeout: options.timeout ?? this.timeout,
|
|
56
|
-
|
|
86
|
+
onlyMainContent: options.onlyMainContent ?? this.onlyMainContent,
|
|
87
|
+
maxAge: options.maxAge ?? this.maxAge,
|
|
88
|
+
mobile: options.mobile ?? this.mobile,
|
|
89
|
+
skipTlsVerification: options.skipTlsVerification ?? this.skipTlsVerification,
|
|
90
|
+
parsePDF: options.parsePDF ?? this.parsePDF,
|
|
91
|
+
location: options.location ?? this.location,
|
|
92
|
+
removeBase64Images: options.removeBase64Images ?? this.removeBase64Images,
|
|
93
|
+
blockAds: options.blockAds ?? this.blockAds,
|
|
94
|
+
storeInCache: options.storeInCache ?? this.storeInCache,
|
|
95
|
+
zeroDataRetention: options.zeroDataRetention ?? this.zeroDataRetention,
|
|
96
|
+
changeTrackingOptions: options.changeTrackingOptions ?? this.changeTrackingOptions,
|
|
97
|
+
});
|
|
98
|
+
const response = await axios.post(this.apiUrl, payload, {
|
|
57
99
|
headers: {
|
|
58
100
|
'Content-Type': 'application/json',
|
|
59
101
|
Authorization: `Bearer ${this.apiKey}`,
|
|
@@ -125,6 +167,10 @@ class FirecrawlScraper {
|
|
|
125
167
|
const createFirecrawlScraper = (config = {}) => {
|
|
126
168
|
return new FirecrawlScraper(config);
|
|
127
169
|
};
|
|
170
|
+
// Helper function to clean up payload for firecrawl
|
|
171
|
+
function omitUndefined(obj) {
|
|
172
|
+
return Object.fromEntries(Object.entries(obj).filter(([, v]) => v !== undefined));
|
|
173
|
+
}
|
|
128
174
|
|
|
129
175
|
exports.FirecrawlScraper = FirecrawlScraper;
|
|
130
176
|
exports.createFirecrawlScraper = createFirecrawlScraper;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"firecrawl.cjs","sources":["../../../../src/tools/search/firecrawl.ts"],"sourcesContent":["import axios from 'axios';\nimport { processContent } from './content';\nimport type * as t from './types';\nimport { createDefaultLogger } from './utils';\n\n/**\n * Firecrawl scraper implementation\n * Uses the Firecrawl API to scrape web pages\n */\nexport class FirecrawlScraper {\n private apiKey: string;\n private apiUrl: string;\n private defaultFormats: string[];\n private timeout: number;\n private logger: t.Logger;\n\n constructor(config: t.FirecrawlScraperConfig = {}) {\n this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';\n\n const baseUrl =\n config.apiUrl ??\n process.env.FIRECRAWL_BASE_URL ??\n 'https://api.firecrawl.dev';\n this.apiUrl = `${baseUrl.replace(/\\/+$/, '')}/v1/scrape`;\n\n this.defaultFormats = config.formats ?? ['markdown', 'html'];\n this.timeout = config.timeout ?? 7500;\n\n this.logger = config.logger || createDefaultLogger();\n\n if (!this.apiKey) {\n this.logger.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');\n }\n\n this.logger.debug(\n `Firecrawl scraper initialized with API URL: ${this.apiUrl}`\n );\n }\n\n /**\n * Scrape a single URL\n * @param url URL to scrape\n * @param options Scrape options\n * @returns Scrape response\n */\n async scrapeUrl(\n url: string,\n options: t.FirecrawlScrapeOptions = {}\n ): Promise<[string, t.FirecrawlScrapeResponse]> {\n if (!this.apiKey) {\n return [\n url,\n {\n success: false,\n error: 'FIRECRAWL_API_KEY is not set',\n },\n ];\n }\n\n try {\n const response = await axios.post(\n this.apiUrl,\n {\n url,\n formats: options.formats || this.defaultFormats,\n includeTags: options.includeTags,\n excludeTags: options.excludeTags,\n headers: options.headers,\n waitFor: options.waitFor,\n timeout: options.timeout ?? this.timeout,\n },\n {\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`,\n },\n timeout: this.timeout,\n }\n );\n\n return [url, response.data];\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return [\n url,\n {\n success: false,\n error: `Firecrawl API request failed: ${errorMessage}`,\n },\n ];\n }\n }\n\n /**\n * Extract content from scrape response\n * @param response Scrape response\n * @returns Extracted content or empty string if not available\n */\n extractContent(\n response: t.FirecrawlScrapeResponse\n ): [string, undefined | t.References] {\n if (!response.success || !response.data) {\n return ['', undefined];\n }\n\n if (response.data.markdown != null && response.data.html != null) {\n try {\n const { markdown, ...rest } = processContent(\n response.data.html,\n response.data.markdown\n );\n return [markdown, rest];\n } catch (error) {\n this.logger.error('Error processing content:', error);\n return [response.data.markdown, undefined];\n }\n } else if (response.data.markdown != null) {\n return [response.data.markdown, undefined];\n }\n\n // Fall back to HTML content\n if (response.data.html != null) {\n return [response.data.html, undefined];\n }\n\n // Fall back to raw HTML content\n if (response.data.rawHtml != null) {\n return [response.data.rawHtml, undefined];\n }\n\n return ['', undefined];\n }\n\n /**\n * Extract metadata from scrape response\n * @param response Scrape response\n * @returns Metadata object\n */\n extractMetadata(response: t.FirecrawlScrapeResponse): t.ScrapeMetadata {\n if (!response.success || !response.data || !response.data.metadata) {\n return {};\n }\n\n return response.data.metadata;\n }\n}\n\n/**\n * Create a Firecrawl scraper instance\n * @param config Scraper configuration\n * @returns Firecrawl scraper instance\n */\nexport const createFirecrawlScraper = (\n config: t.FirecrawlScraperConfig = {}\n): FirecrawlScraper => {\n return new FirecrawlScraper(config);\n};\n"],"names":["createDefaultLogger","processContent"],"mappings":";;;;;;AAKA;;;AAGG;MACU,gBAAgB,CAAA;AACnB,IAAA,MAAM;AACN,IAAA,MAAM;AACN,IAAA,cAAc;AACd,IAAA,OAAO;AACP,IAAA,MAAM;AAEd,IAAA,WAAA,CAAY,SAAmC,EAAE,EAAA;AAC/C,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,EAAE;AAElE,QAAA,MAAM,OAAO,GACX,MAAM,CAAC,MAAM;YACb,OAAO,CAAC,GAAG,CAAC,kBAAkB;AAC9B,YAAA,2BAA2B;AAC7B,QAAA,IAAI,CAAC,MAAM,GAAG,CAAA,EAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,YAAY;AAExD,QAAA,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC,OAAO,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC;QAC5D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,IAAI;QAErC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAIA,yBAAmB,EAAE;AAEpD,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;AAChB,YAAA,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,uDAAuD,CAAC;;QAG3E,IAAI,CAAC,MAAM,CAAC,KAAK,CACf,CAA+C,4CAAA,EAAA,IAAI,CAAC,MAAM,CAAE,CAAA,CAC7D;;AAGH;;;;;AAKG;AACH,IAAA,MAAM,SAAS,CACb,GAAW,EACX,UAAoC,EAAE,EAAA;AAEtC,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;YAChB,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;AACd,oBAAA,KAAK,EAAE,8BAA8B;AACtC,iBAAA;aACF;;AAGH,QAAA,IAAI;YACF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,IAAI,CAAC,MAAM,EACX;gBACE,GAAG;AACH,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc;gBAC/C,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,OAAO,EAAE,OAAO,CAAC,OAAO;gBACxB,OAAO,EAAE,OAAO,CAAC,OAAO;AACxB,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;aACzC,EACD;AACE,gBAAA,OAAO,EAAE;AACP,oBAAA,cAAc,EAAE,kBAAkB;AAClC,oBAAA,aAAa,EAAE,CAAA,OAAA,EAAU,IAAI,CAAC,MAAM,CAAE,CAAA;AACvC,iBAAA;gBACD,OAAO,EAAE,IAAI,CAAC,OAAO;AACtB,aAAA,CACF;AAED,YAAA,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,CAAC;;QAC3B,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACxD,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,CAAiC,8BAAA,EAAA,YAAY,CAAE,CAAA;AACvD,iBAAA;aACF;;;AAIL;;;;AAIG;AACH,IAAA,cAAc,CACZ,QAAmC,EAAA;QAEnC,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;AACvC,YAAA,OAAO,CAAC,EAAE,EAAE,SAAS,CAAC;;AAGxB,QAAA,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;AAChE,YAAA,IAAI;gBACF,MAAM,EAAE,QAAQ,EAAE,GAAG,IAAI,EAAE,GAAGC,sBAAc,CAC1C,QAAQ,CAAC,IAAI,CAAC,IAAI,EAClB,QAAQ,CAAC,IAAI,CAAC,QAAQ,CACvB;AACD,gBAAA,OAAO,CAAC,QAAQ,EAAE,IAAI,CAAC;;YACvB,OAAO,KAAK,EAAE;gBACd,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,2BAA2B,EAAE,KAAK,CAAC;gBACrD,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC;;;aAEvC,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,EAAE;YACzC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC;;;QAI5C,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;YAC9B,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,SAAS,CAAC;;;QAIxC,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,IAAI,EAAE;YACjC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,CAAC;;AAG3C,QAAA,OAAO,CAAC,EAAE,EAAE,SAAS,CAAC;;AAGxB;;;;AAIG;AACH,IAAA,eAAe,CAAC,QAAmC,EAAA;AACjD,QAAA,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE;AAClE,YAAA,OAAO,EAAE;;AAGX,QAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,QAAQ;;AAEhC;AAED;;;;AAIG;MACU,sBAAsB,GAAG,CACpC,MAAmC,GAAA,EAAE,KACjB;AACpB,IAAA,OAAO,IAAI,gBAAgB,CAAC,MAAM,CAAC;AACrC;;;;;"}
|
|
1
|
+
{"version":3,"file":"firecrawl.cjs","sources":["../../../../src/tools/search/firecrawl.ts"],"sourcesContent":["import axios from 'axios';\nimport { processContent } from './content';\nimport type * as t from './types';\nimport { createDefaultLogger } from './utils';\n\n/**\n * Firecrawl scraper implementation\n * Uses the Firecrawl API to scrape web pages\n */\nexport class FirecrawlScraper {\n private apiKey: string;\n private apiUrl: string;\n private defaultFormats: string[];\n private timeout: number;\n private logger: t.Logger;\n private includeTags?: string[];\n private excludeTags?: string[];\n private waitFor?: number;\n private maxAge?: number;\n private mobile?: boolean;\n private skipTlsVerification?: boolean;\n private blockAds?: boolean;\n private removeBase64Images?: boolean;\n private parsePDF?: boolean;\n private storeInCache?: boolean;\n private zeroDataRetention?: boolean;\n private headers?: Record<string, string>;\n private location?: { country?: string; languages?: string[] };\n private onlyMainContent?: boolean;\n private changeTrackingOptions?: object;\n\n constructor(config: t.FirecrawlScraperConfig = {}) {\n this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';\n\n const baseUrl =\n config.apiUrl ??\n process.env.FIRECRAWL_BASE_URL ??\n 'https://api.firecrawl.dev';\n this.apiUrl = `${baseUrl.replace(/\\/+$/, '')}/v1/scrape`;\n\n this.defaultFormats = config.formats ?? ['markdown', 'rawHtml'];\n this.timeout = config.timeout ?? 7500;\n\n this.logger = config.logger || createDefaultLogger();\n\n this.includeTags = config.includeTags;\n this.excludeTags = config.excludeTags;\n this.waitFor = config.waitFor;\n this.maxAge = config.maxAge;\n this.mobile = config.mobile;\n this.skipTlsVerification = config.skipTlsVerification;\n this.blockAds = config.blockAds;\n this.removeBase64Images = config.removeBase64Images;\n this.parsePDF = config.parsePDF;\n this.storeInCache = config.storeInCache;\n this.zeroDataRetention = config.zeroDataRetention;\n this.headers = config.headers;\n this.location = config.location;\n this.onlyMainContent = config.onlyMainContent;\n this.changeTrackingOptions = config.changeTrackingOptions;\n\n if (!this.apiKey) {\n this.logger.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');\n }\n\n this.logger.debug(\n `Firecrawl scraper initialized with API URL: ${this.apiUrl}`\n );\n }\n\n /**\n * Scrape a single URL\n * @param url URL to scrape\n * @param options Scrape options\n * @returns Scrape response\n */\n async scrapeUrl(\n url: string,\n options: t.FirecrawlScrapeOptions = {}\n ): Promise<[string, t.FirecrawlScrapeResponse]> {\n if (!this.apiKey) {\n return [\n url,\n {\n success: false,\n error: 'FIRECRAWL_API_KEY is not set',\n },\n ];\n }\n\n try {\n const payload = omitUndefined({\n url,\n formats: options.formats ?? this.defaultFormats,\n includeTags: options.includeTags ?? this.includeTags,\n excludeTags: options.excludeTags ?? this.excludeTags,\n headers: options.headers ?? this.headers,\n waitFor: options.waitFor ?? this.waitFor,\n timeout: options.timeout ?? this.timeout,\n onlyMainContent: options.onlyMainContent ?? this.onlyMainContent,\n maxAge: options.maxAge ?? this.maxAge,\n mobile: options.mobile ?? this.mobile,\n skipTlsVerification:\n options.skipTlsVerification ?? this.skipTlsVerification,\n parsePDF: options.parsePDF ?? this.parsePDF,\n location: options.location ?? this.location,\n removeBase64Images:\n options.removeBase64Images ?? this.removeBase64Images,\n blockAds: options.blockAds ?? this.blockAds,\n storeInCache: options.storeInCache ?? this.storeInCache,\n zeroDataRetention: options.zeroDataRetention ?? this.zeroDataRetention,\n changeTrackingOptions:\n options.changeTrackingOptions ?? this.changeTrackingOptions,\n });\n const response = await axios.post(this.apiUrl, payload, {\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`,\n },\n timeout: this.timeout,\n });\n\n return [url, response.data];\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return [\n url,\n {\n success: false,\n error: `Firecrawl API request failed: ${errorMessage}`,\n },\n ];\n }\n }\n\n /**\n * Extract content from scrape response\n * @param response Scrape response\n * @returns Extracted content or empty string if not available\n */\n extractContent(\n response: t.FirecrawlScrapeResponse\n ): [string, undefined | t.References] {\n if (!response.success || !response.data) {\n return ['', undefined];\n }\n\n if (response.data.markdown != null && response.data.html != null) {\n try {\n const { markdown, ...rest } = processContent(\n response.data.html,\n response.data.markdown\n );\n return [markdown, rest];\n } catch (error) {\n this.logger.error('Error processing content:', error);\n return [response.data.markdown, undefined];\n }\n } else if (response.data.markdown != null) {\n return [response.data.markdown, undefined];\n }\n\n // Fall back to HTML content\n if (response.data.html != null) {\n return [response.data.html, undefined];\n }\n\n // Fall back to raw HTML content\n if (response.data.rawHtml != null) {\n return [response.data.rawHtml, undefined];\n }\n\n return ['', undefined];\n }\n\n /**\n * Extract metadata from scrape response\n * @param response Scrape response\n * @returns Metadata object\n */\n extractMetadata(response: t.FirecrawlScrapeResponse): t.ScrapeMetadata {\n if (!response.success || !response.data || !response.data.metadata) {\n return {};\n }\n\n return response.data.metadata;\n }\n}\n\n/**\n * Create a Firecrawl scraper instance\n * @param config Scraper configuration\n * @returns Firecrawl scraper instance\n */\nexport const createFirecrawlScraper = (\n config: t.FirecrawlScraperConfig = {}\n): FirecrawlScraper => {\n return new FirecrawlScraper(config);\n};\n\n// Helper function to clean up payload for firecrawl\nfunction omitUndefined<T extends object>(obj: T): Partial<T> {\n return Object.fromEntries(\n Object.entries(obj).filter(([, v]) => v !== undefined)\n ) as Partial<T>;\n}\n"],"names":["createDefaultLogger","processContent"],"mappings":";;;;;;AAKA;;;AAGG;MACU,gBAAgB,CAAA;AACnB,IAAA,MAAM;AACN,IAAA,MAAM;AACN,IAAA,cAAc;AACd,IAAA,OAAO;AACP,IAAA,MAAM;AACN,IAAA,WAAW;AACX,IAAA,WAAW;AACX,IAAA,OAAO;AACP,IAAA,MAAM;AACN,IAAA,MAAM;AACN,IAAA,mBAAmB;AACnB,IAAA,QAAQ;AACR,IAAA,kBAAkB;AAClB,IAAA,QAAQ;AACR,IAAA,YAAY;AACZ,IAAA,iBAAiB;AACjB,IAAA,OAAO;AACP,IAAA,QAAQ;AACR,IAAA,eAAe;AACf,IAAA,qBAAqB;AAE7B,IAAA,WAAA,CAAY,SAAmC,EAAE,EAAA;AAC/C,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,EAAE;AAElE,QAAA,MAAM,OAAO,GACX,MAAM,CAAC,MAAM;YACb,OAAO,CAAC,GAAG,CAAC,kBAAkB;AAC9B,YAAA,2BAA2B;AAC7B,QAAA,IAAI,CAAC,MAAM,GAAG,CAAA,EAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,YAAY;AAExD,QAAA,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC,OAAO,IAAI,CAAC,UAAU,EAAE,SAAS,CAAC;QAC/D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,IAAI;QAErC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAIA,yBAAmB,EAAE;AAEpD,QAAA,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,WAAW;AACrC,QAAA,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,WAAW;AACrC,QAAA,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO;AAC7B,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM;AAC3B,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM;AAC3B,QAAA,IAAI,CAAC,mBAAmB,GAAG,MAAM,CAAC,mBAAmB;AACrD,QAAA,IAAI,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ;AAC/B,QAAA,IAAI,CAAC,kBAAkB,GAAG,MAAM,CAAC,kBAAkB;AACnD,QAAA,IAAI,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ;AAC/B,QAAA,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC,YAAY;AACvC,QAAA,IAAI,CAAC,iBAAiB,GAAG,MAAM,CAAC,iBAAiB;AACjD,QAAA,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO;AAC7B,QAAA,IAAI,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ;AAC/B,QAAA,IAAI,CAAC,eAAe,GAAG,MAAM,CAAC,eAAe;AAC7C,QAAA,IAAI,CAAC,qBAAqB,GAAG,MAAM,CAAC,qBAAqB;AAEzD,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;AAChB,YAAA,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,uDAAuD,CAAC;;QAG3E,IAAI,CAAC,MAAM,CAAC,KAAK,CACf,CAA+C,4CAAA,EAAA,IAAI,CAAC,MAAM,CAAE,CAAA,CAC7D;;AAGH;;;;;AAKG;AACH,IAAA,MAAM,SAAS,CACb,GAAW,EACX,UAAoC,EAAE,EAAA;AAEtC,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;YAChB,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;AACd,oBAAA,KAAK,EAAE,8BAA8B;AACtC,iBAAA;aACF;;AAGH,QAAA,IAAI;YACF,MAAM,OAAO,GAAG,aAAa,CAAC;gBAC5B,GAAG;AACH,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc;AAC/C,gBAAA,WAAW,EAAE,OAAO,CAAC,WAAW,IAAI,IAAI,CAAC,WAAW;AACpD,gBAAA,WAAW,EAAE,OAAO,CAAC,WAAW,IAAI,IAAI,CAAC,WAAW;AACpD,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;AACxC,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;AACxC,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;AACxC,gBAAA,eAAe,EAAE,OAAO,CAAC,eAAe,IAAI,IAAI,CAAC,eAAe;AAChE,gBAAA,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM;AACrC,gBAAA,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM;AACrC,gBAAA,mBAAmB,EACjB,OAAO,CAAC,mBAAmB,IAAI,IAAI,CAAC,mBAAmB;AACzD,gBAAA,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ;AAC3C,gBAAA,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ;AAC3C,gBAAA,kBAAkB,EAChB,OAAO,CAAC,kBAAkB,IAAI,IAAI,CAAC,kBAAkB;AACvD,gBAAA,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ;AAC3C,gBAAA,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,IAAI,CAAC,YAAY;AACvD,gBAAA,iBAAiB,EAAE,OAAO,CAAC,iBAAiB,IAAI,IAAI,CAAC,iBAAiB;AACtE,gBAAA,qBAAqB,EACnB,OAAO,CAAC,qBAAqB,IAAI,IAAI,CAAC,qBAAqB;AAC9D,aAAA,CAAC;AACF,YAAA,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,OAAO,EAAE;AACtD,gBAAA,OAAO,EAAE;AACP,oBAAA,cAAc,EAAE,kBAAkB;AAClC,oBAAA,aAAa,EAAE,CAAA,OAAA,EAAU,IAAI,CAAC,MAAM,CAAE,CAAA;AACvC,iBAAA;gBACD,OAAO,EAAE,IAAI,CAAC,OAAO;AACtB,aAAA,CAAC;AAEF,YAAA,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,CAAC;;QAC3B,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACxD,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,CAAiC,8BAAA,EAAA,YAAY,CAAE,CAAA;AACvD,iBAAA;aACF;;;AAIL;;;;AAIG;AACH,IAAA,cAAc,CACZ,QAAmC,EAAA;QAEnC,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;AACvC,YAAA,OAAO,CAAC,EAAE,EAAE,SAAS,CAAC;;AAGxB,QAAA,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;AAChE,YAAA,IAAI;gBACF,MAAM,EAAE,QAAQ,EAAE,GAAG,IAAI,EAAE,GAAGC,sBAAc,CAC1C,QAAQ,CAAC,IAAI,CAAC,IAAI,EAClB,QAAQ,CAAC,IAAI,CAAC,QAAQ,CACvB;AACD,gBAAA,OAAO,CAAC,QAAQ,EAAE,IAAI,CAAC;;YACvB,OAAO,KAAK,EAAE;gBACd,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,2BAA2B,EAAE,KAAK,CAAC;gBACrD,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC;;;aAEvC,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,EAAE;YACzC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC;;;QAI5C,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;YAC9B,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,SAAS,CAAC;;;QAIxC,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,IAAI,EAAE;YACjC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,CAAC;;AAG3C,QAAA,OAAO,CAAC,EAAE,EAAE,SAAS,CAAC;;AAGxB;;;;AAIG;AACH,IAAA,eAAe,CAAC,QAAmC,EAAA;AACjD,QAAA,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE;AAClE,YAAA,OAAO,EAAE;;AAGX,QAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,QAAQ;;AAEhC;AAED;;;;AAIG;MACU,sBAAsB,GAAG,CACpC,MAAmC,GAAA,EAAE,KACjB;AACpB,IAAA,OAAO,IAAI,gBAAgB,CAAC,MAAM,CAAC;AACrC;AAEA;AACA,SAAS,aAAa,CAAmB,GAAM,EAAA;IAC7C,OAAO,MAAM,CAAC,WAAW,CACvB,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,KAAK,SAAS,CAAC,CACzC;AACjB;;;;;"}
|
|
@@ -236,7 +236,7 @@ Use anchor marker(s) immediately after the statement:
|
|
|
236
236
|
* @returns A DynamicStructuredTool with a schema that depends on the searchProvider
|
|
237
237
|
*/
|
|
238
238
|
const createSearchTool = (config = {}) => {
|
|
239
|
-
const { searchProvider = 'serper', serperApiKey, searxngInstanceUrl, searxngApiKey, rerankerType = 'cohere', topResults = 5, strategies = ['no_extraction'], filterContent = true, safeSearch = 1, firecrawlApiKey, firecrawlApiUrl,
|
|
239
|
+
const { searchProvider = 'serper', serperApiKey, searxngInstanceUrl, searxngApiKey, rerankerType = 'cohere', topResults = 5, strategies = ['no_extraction'], filterContent = true, safeSearch = 1, firecrawlApiKey, firecrawlApiUrl, firecrawlOptions, scraperTimeout, jinaApiKey, cohereApiKey, onSearchResults: _onSearchResults, onGetHighlights, } = config;
|
|
240
240
|
const logger = config.logger || utils.createDefaultLogger();
|
|
241
241
|
const schemaObject = {
|
|
242
242
|
query: schema.querySchema,
|
|
@@ -256,10 +256,11 @@ const createSearchTool = (config = {}) => {
|
|
|
256
256
|
searxngApiKey,
|
|
257
257
|
});
|
|
258
258
|
const firecrawlScraper = firecrawl.createFirecrawlScraper({
|
|
259
|
+
...firecrawlOptions,
|
|
259
260
|
apiKey: firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY,
|
|
260
261
|
apiUrl: firecrawlApiUrl,
|
|
261
|
-
timeout: scraperTimeout,
|
|
262
|
-
formats:
|
|
262
|
+
timeout: scraperTimeout ?? firecrawlOptions?.timeout,
|
|
263
|
+
formats: firecrawlOptions?.formats ?? ['markdown', 'rawHtml'],
|
|
263
264
|
});
|
|
264
265
|
const selectedReranker = rerankers.createReranker({
|
|
265
266
|
rerankerType,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tool.cjs","sources":["../../../../src/tools/search/tool.ts"],"sourcesContent":["import { z } from 'zod';\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\nimport type { RunnableConfig } from '@langchain/core/runnables';\nimport type * as t from './types';\nimport {\n DATE_RANGE,\n querySchema,\n dateSchema,\n countrySchema,\n imagesSchema,\n videosSchema,\n newsSchema,\n} from './schema';\nimport { createSearchAPI, createSourceProcessor } from './search';\nimport { createFirecrawlScraper } from './firecrawl';\nimport { expandHighlights } from './highlights';\nimport { formatResultsForLLM } from './format';\nimport { createDefaultLogger } from './utils';\nimport { createReranker } from './rerankers';\nimport { Constants } from '@/common';\n\n/**\n * Executes parallel searches and merges the results\n */\nasync function executeParallelSearches({\n searchAPI,\n query,\n date,\n country,\n safeSearch,\n images,\n videos,\n news,\n logger,\n}: {\n searchAPI: ReturnType<typeof createSearchAPI>;\n query: string;\n date?: DATE_RANGE;\n country?: string;\n safeSearch: t.SearchToolConfig['safeSearch'];\n images: boolean;\n videos: boolean;\n news: boolean;\n logger: t.Logger;\n}): Promise<t.SearchResult> {\n // Prepare all search tasks to run in parallel\n const searchTasks: Promise<t.SearchResult>[] = [\n // Main search\n searchAPI.getSources({\n query,\n date,\n country,\n safeSearch,\n }),\n ];\n\n if (images) {\n searchTasks.push(\n searchAPI\n .getSources({\n query,\n date,\n country,\n safeSearch,\n type: 'images',\n })\n .catch((error) => {\n logger.error('Error fetching images:', error);\n return {\n success: false,\n error: `Images search failed: ${error instanceof Error ? error.message : String(error)}`,\n };\n })\n );\n }\n if (videos) {\n searchTasks.push(\n searchAPI\n .getSources({\n query,\n date,\n country,\n safeSearch,\n type: 'videos',\n })\n .catch((error) => {\n logger.error('Error fetching videos:', error);\n return {\n success: false,\n error: `Videos search failed: ${error instanceof Error ? error.message : String(error)}`,\n };\n })\n );\n }\n if (news) {\n searchTasks.push(\n searchAPI\n .getSources({\n query,\n date,\n country,\n safeSearch,\n type: 'news',\n })\n .catch((error) => {\n logger.error('Error fetching news:', error);\n return {\n success: false,\n error: `News search failed: ${error instanceof Error ? error.message : String(error)}`,\n };\n })\n );\n }\n\n // Run all searches in parallel\n const results = await Promise.all(searchTasks);\n\n // Get the main search result (first result)\n const mainResult = results[0];\n if (!mainResult.success) {\n throw new Error(mainResult.error ?? 'Search failed');\n }\n\n // Merge additional results with the main results\n const mergedResults = { ...mainResult.data };\n\n // Convert existing news to topStories if present\n if (mergedResults.news !== undefined && mergedResults.news.length > 0) {\n const existingNewsAsTopStories = mergedResults.news\n .filter((newsItem) => newsItem.link !== undefined && newsItem.link !== '')\n .map((newsItem) => ({\n title: newsItem.title ?? '',\n link: newsItem.link ?? '',\n source: newsItem.source ?? '',\n date: newsItem.date ?? '',\n imageUrl: newsItem.imageUrl ?? '',\n processed: false,\n }));\n mergedResults.topStories = [\n ...(mergedResults.topStories ?? []),\n ...existingNewsAsTopStories,\n ];\n delete mergedResults.news;\n }\n\n results.slice(1).forEach((result) => {\n if (result.success && result.data !== undefined) {\n if (result.data.images !== undefined && result.data.images.length > 0) {\n mergedResults.images = [\n ...(mergedResults.images ?? []),\n ...result.data.images,\n ];\n }\n if (result.data.videos !== undefined && result.data.videos.length > 0) {\n mergedResults.videos = [\n ...(mergedResults.videos ?? []),\n ...result.data.videos,\n ];\n }\n if (result.data.news !== undefined && result.data.news.length > 0) {\n const newsAsTopStories = result.data.news.map((newsItem) => ({\n ...newsItem,\n link: newsItem.link ?? '',\n }));\n mergedResults.topStories = [\n ...(mergedResults.topStories ?? []),\n ...newsAsTopStories,\n ];\n }\n }\n });\n\n return { success: true, data: mergedResults };\n}\n\nfunction createSearchProcessor({\n searchAPI,\n safeSearch,\n sourceProcessor,\n onGetHighlights,\n logger,\n}: {\n safeSearch: t.SearchToolConfig['safeSearch'];\n searchAPI: ReturnType<typeof createSearchAPI>;\n sourceProcessor: ReturnType<typeof createSourceProcessor>;\n onGetHighlights: t.SearchToolConfig['onGetHighlights'];\n logger: t.Logger;\n}) {\n return async function ({\n query,\n date,\n country,\n proMode = true,\n maxSources = 5,\n onSearchResults,\n images = false,\n videos = false,\n news = false,\n }: {\n query: string;\n country?: string;\n date?: DATE_RANGE;\n proMode?: boolean;\n maxSources?: number;\n onSearchResults: t.SearchToolConfig['onSearchResults'];\n images?: boolean;\n videos?: boolean;\n news?: boolean;\n }): Promise<t.SearchResultData> {\n try {\n // Execute parallel searches and merge results\n const searchResult = await executeParallelSearches({\n searchAPI,\n query,\n date,\n country,\n safeSearch,\n images,\n videos,\n news,\n logger,\n });\n\n onSearchResults?.(searchResult);\n\n const processedSources = await sourceProcessor.processSources({\n query,\n news,\n result: searchResult,\n proMode,\n onGetHighlights,\n numElements: maxSources,\n });\n\n return expandHighlights(processedSources);\n } catch (error) {\n logger.error('Error in search:', error);\n return {\n organic: [],\n topStories: [],\n images: [],\n videos: [],\n news: [],\n relatedSearches: [],\n error: error instanceof Error ? error.message : String(error),\n };\n }\n };\n}\n\nfunction createOnSearchResults({\n runnableConfig,\n onSearchResults,\n}: {\n runnableConfig: RunnableConfig;\n onSearchResults: t.SearchToolConfig['onSearchResults'];\n}) {\n return function (results: t.SearchResult): void {\n if (!onSearchResults) {\n return;\n }\n onSearchResults(results, runnableConfig);\n };\n}\n\nfunction createTool({\n schema,\n search,\n onSearchResults: _onSearchResults,\n}: {\n schema: t.SearchToolSchema;\n search: ReturnType<typeof createSearchProcessor>;\n onSearchResults: t.SearchToolConfig['onSearchResults'];\n}): DynamicStructuredTool<typeof schema> {\n return tool<typeof schema>(\n async (params, runnableConfig) => {\n const { query, date, country: _c, images, videos, news } = params;\n const country = typeof _c === 'string' && _c ? _c : undefined;\n const searchResult = await search({\n query,\n date,\n country,\n images,\n videos,\n news,\n onSearchResults: createOnSearchResults({\n runnableConfig,\n onSearchResults: _onSearchResults,\n }),\n });\n const turn = runnableConfig.toolCall?.turn ?? 0;\n const { output, references } = formatResultsForLLM(turn, searchResult);\n const data: t.SearchResultData = { turn, ...searchResult, references };\n return [output, { [Constants.WEB_SEARCH]: data }];\n },\n {\n name: Constants.WEB_SEARCH,\n description: `Real-time search. Results have required citation anchors.\n\nNote: Use ONCE per reply unless instructed otherwise.\n\nAnchors:\n- \\\\ue202turnXtypeY\n- X = turn idx, type = 'search' | 'news' | 'image' | 'ref', Y = item idx\n\nSpecial Markers:\n- \\\\ue203...\\\\ue204 — highlight start/end of cited text (for Standalone or Group citations)\n- \\\\ue200...\\\\ue201 — group block (e.g. \\\\ue200\\\\ue202turn0search1\\\\ue202turn0news2\\\\ue201)\n\n**CITE EVERY NON-OBVIOUS FACT/QUOTE:**\nUse anchor marker(s) immediately after the statement:\n- Standalone: \"Pure functions produce same output. \\\\ue202turn0search0\"\n- Standalone (multiple): \"Today's News \\\\ue202turn0search0\\\\ue202turn0news0\"\n- Highlight: \"\\\\ue203Highlight text.\\\\ue204\\\\ue202turn0news1\"\n- Group: \"Sources. \\\\ue200\\\\ue202turn0search0\\\\ue202turn0news1\\\\ue201\"\n- Group Highlight: \"\\\\ue203Highlight for group.\\\\ue204 \\\\ue200\\\\ue202turn0search0\\\\ue202turn0news1\\\\ue201\"\n- Image: \"See photo \\\\ue202turn0image0.\"\n\n**NEVER use markdown links, [1], or footnotes. CITE ONLY with anchors provided.**\n`.trim(),\n schema: schema,\n responseFormat: Constants.CONTENT_AND_ARTIFACT,\n }\n );\n}\n\n/**\n * Creates a search tool with a schema that dynamically includes the country field\n * only when the searchProvider is 'serper'.\n *\n * @param config - The search tool configuration\n * @returns A DynamicStructuredTool with a schema that depends on the searchProvider\n */\nexport const createSearchTool = (\n config: t.SearchToolConfig = {}\n): DynamicStructuredTool<typeof toolSchema> => {\n const {\n searchProvider = 'serper',\n serperApiKey,\n searxngInstanceUrl,\n searxngApiKey,\n rerankerType = 'cohere',\n topResults = 5,\n strategies = ['no_extraction'],\n filterContent = true,\n safeSearch = 1,\n firecrawlApiKey,\n firecrawlApiUrl,\n firecrawlFormats = ['markdown', 'html'],\n scraperTimeout,\n jinaApiKey,\n cohereApiKey,\n onSearchResults: _onSearchResults,\n onGetHighlights,\n } = config;\n\n const logger = config.logger || createDefaultLogger();\n\n const schemaObject: {\n query: z.ZodString;\n date: z.ZodOptional<z.ZodNativeEnum<typeof DATE_RANGE>>;\n country?: z.ZodOptional<z.ZodString>;\n images: z.ZodOptional<z.ZodBoolean>;\n videos: z.ZodOptional<z.ZodBoolean>;\n news: z.ZodOptional<z.ZodBoolean>;\n } = {\n query: querySchema,\n date: dateSchema,\n images: imagesSchema,\n videos: videosSchema,\n news: newsSchema,\n };\n\n if (searchProvider === 'serper') {\n schemaObject.country = countrySchema;\n }\n\n const toolSchema = z.object(schemaObject);\n\n const searchAPI = createSearchAPI({\n searchProvider,\n serperApiKey,\n searxngInstanceUrl,\n searxngApiKey,\n });\n\n const firecrawlScraper = createFirecrawlScraper({\n apiKey: firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY,\n apiUrl: firecrawlApiUrl,\n timeout: scraperTimeout,\n formats: firecrawlFormats,\n });\n\n const selectedReranker = createReranker({\n rerankerType,\n jinaApiKey,\n cohereApiKey,\n logger,\n });\n\n if (!selectedReranker) {\n logger.warn('No reranker selected. Using default ranking.');\n }\n\n const sourceProcessor = createSourceProcessor(\n {\n reranker: selectedReranker,\n topResults,\n strategies,\n filterContent,\n logger,\n },\n firecrawlScraper\n );\n\n const search = createSearchProcessor({\n searchAPI,\n safeSearch,\n sourceProcessor,\n onGetHighlights,\n logger,\n });\n\n return createTool({\n search,\n schema: toolSchema,\n onSearchResults: _onSearchResults,\n });\n};\n"],"names":["expandHighlights","tool","formatResultsForLLM","Constants","createDefaultLogger","querySchema","dateSchema","imagesSchema","videosSchema","newsSchema","countrySchema","z","createSearchAPI","createFirecrawlScraper","createReranker","createSourceProcessor","search"],"mappings":";;;;;;;;;;;;;AAqBA;;AAEG;AACH,eAAe,uBAAuB,CAAC,EACrC,SAAS,EACT,KAAK,EACL,IAAI,EACJ,OAAO,EACP,UAAU,EACV,MAAM,EACN,MAAM,EACN,IAAI,EACJ,MAAM,GAWP,EAAA;;AAEC,IAAA,MAAM,WAAW,GAA8B;;QAE7C,SAAS,CAAC,UAAU,CAAC;YACnB,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;SACX,CAAC;KACH;IAED,IAAI,MAAM,EAAE;QACV,WAAW,CAAC,IAAI,CACd;AACG,aAAA,UAAU,CAAC;YACV,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;AACV,YAAA,IAAI,EAAE,QAAQ;SACf;AACA,aAAA,KAAK,CAAC,CAAC,KAAK,KAAI;AACf,YAAA,MAAM,CAAC,KAAK,CAAC,wBAAwB,EAAE,KAAK,CAAC;YAC7C,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;AACd,gBAAA,KAAK,EAAE,CAAyB,sBAAA,EAAA,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAE,CAAA;aACzF;SACF,CAAC,CACL;;IAEH,IAAI,MAAM,EAAE;QACV,WAAW,CAAC,IAAI,CACd;AACG,aAAA,UAAU,CAAC;YACV,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;AACV,YAAA,IAAI,EAAE,QAAQ;SACf;AACA,aAAA,KAAK,CAAC,CAAC,KAAK,KAAI;AACf,YAAA,MAAM,CAAC,KAAK,CAAC,wBAAwB,EAAE,KAAK,CAAC;YAC7C,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;AACd,gBAAA,KAAK,EAAE,CAAyB,sBAAA,EAAA,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAE,CAAA;aACzF;SACF,CAAC,CACL;;IAEH,IAAI,IAAI,EAAE;QACR,WAAW,CAAC,IAAI,CACd;AACG,aAAA,UAAU,CAAC;YACV,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;AACV,YAAA,IAAI,EAAE,MAAM;SACb;AACA,aAAA,KAAK,CAAC,CAAC,KAAK,KAAI;AACf,YAAA,MAAM,CAAC,KAAK,CAAC,sBAAsB,EAAE,KAAK,CAAC;YAC3C,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;AACd,gBAAA,KAAK,EAAE,CAAuB,oBAAA,EAAA,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAE,CAAA;aACvF;SACF,CAAC,CACL;;;IAIH,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;;AAG9C,IAAA,MAAM,UAAU,GAAG,OAAO,CAAC,CAAC,CAAC;AAC7B,IAAA,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE;QACvB,MAAM,IAAI,KAAK,CAAC,UAAU,CAAC,KAAK,IAAI,eAAe,CAAC;;;IAItD,MAAM,aAAa,GAAG,EAAE,GAAG,UAAU,CAAC,IAAI,EAAE;;AAG5C,IAAA,IAAI,aAAa,CAAC,IAAI,KAAK,SAAS,IAAI,aAAa,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;AACrE,QAAA,MAAM,wBAAwB,GAAG,aAAa,CAAC;AAC5C,aAAA,MAAM,CAAC,CAAC,QAAQ,KAAK,QAAQ,CAAC,IAAI,KAAK,SAAS,IAAI,QAAQ,CAAC,IAAI,KAAK,EAAE;AACxE,aAAA,GAAG,CAAC,CAAC,QAAQ,MAAM;AAClB,YAAA,KAAK,EAAE,QAAQ,CAAC,KAAK,IAAI,EAAE;AAC3B,YAAA,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;AACzB,YAAA,MAAM,EAAE,QAAQ,CAAC,MAAM,IAAI,EAAE;AAC7B,YAAA,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;AACzB,YAAA,QAAQ,EAAE,QAAQ,CAAC,QAAQ,IAAI,EAAE;AACjC,YAAA,SAAS,EAAE,KAAK;AACjB,SAAA,CAAC,CAAC;QACL,aAAa,CAAC,UAAU,GAAG;AACzB,YAAA,IAAI,aAAa,CAAC,UAAU,IAAI,EAAE,CAAC;AACnC,YAAA,GAAG,wBAAwB;SAC5B;QACD,OAAO,aAAa,CAAC,IAAI;;IAG3B,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,KAAI;QAClC,IAAI,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,IAAI,KAAK,SAAS,EAAE;AAC/C,YAAA,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE;gBACrE,aAAa,CAAC,MAAM,GAAG;AACrB,oBAAA,IAAI,aAAa,CAAC,MAAM,IAAI,EAAE,CAAC;AAC/B,oBAAA,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM;iBACtB;;AAEH,YAAA,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE;gBACrE,aAAa,CAAC,MAAM,GAAG;AACrB,oBAAA,IAAI,aAAa,CAAC,MAAM,IAAI,EAAE,CAAC;AAC/B,oBAAA,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM;iBACtB;;AAEH,YAAA,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;AACjE,gBAAA,MAAM,gBAAgB,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,QAAQ,MAAM;AAC3D,oBAAA,GAAG,QAAQ;AACX,oBAAA,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;AAC1B,iBAAA,CAAC,CAAC;gBACH,aAAa,CAAC,UAAU,GAAG;AACzB,oBAAA,IAAI,aAAa,CAAC,UAAU,IAAI,EAAE,CAAC;AACnC,oBAAA,GAAG,gBAAgB;iBACpB;;;AAGP,KAAC,CAAC;IAEF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,aAAa,EAAE;AAC/C;AAEA,SAAS,qBAAqB,CAAC,EAC7B,SAAS,EACT,UAAU,EACV,eAAe,EACf,eAAe,EACf,MAAM,GAOP,EAAA;AACC,IAAA,OAAO,gBAAgB,EACrB,KAAK,EACL,IAAI,EACJ,OAAO,EACP,OAAO,GAAG,IAAI,EACd,UAAU,GAAG,CAAC,EACd,eAAe,EACf,MAAM,GAAG,KAAK,EACd,MAAM,GAAG,KAAK,EACd,IAAI,GAAG,KAAK,GAWb,EAAA;AACC,QAAA,IAAI;;AAEF,YAAA,MAAM,YAAY,GAAG,MAAM,uBAAuB,CAAC;gBACjD,SAAS;gBACT,KAAK;gBACL,IAAI;gBACJ,OAAO;gBACP,UAAU;gBACV,MAAM;gBACN,MAAM;gBACN,IAAI;gBACJ,MAAM;AACP,aAAA,CAAC;AAEF,YAAA,eAAe,GAAG,YAAY,CAAC;AAE/B,YAAA,MAAM,gBAAgB,GAAG,MAAM,eAAe,CAAC,cAAc,CAAC;gBAC5D,KAAK;gBACL,IAAI;AACJ,gBAAA,MAAM,EAAE,YAAY;gBACpB,OAAO;gBACP,eAAe;AACf,gBAAA,WAAW,EAAE,UAAU;AACxB,aAAA,CAAC;AAEF,YAAA,OAAOA,2BAAgB,CAAC,gBAAgB,CAAC;;QACzC,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,CAAC,KAAK,CAAC,kBAAkB,EAAE,KAAK,CAAC;YACvC,OAAO;AACL,gBAAA,OAAO,EAAE,EAAE;AACX,gBAAA,UAAU,EAAE,EAAE;AACd,gBAAA,MAAM,EAAE,EAAE;AACV,gBAAA,MAAM,EAAE,EAAE;AACV,gBAAA,IAAI,EAAE,EAAE;AACR,gBAAA,eAAe,EAAE,EAAE;AACnB,gBAAA,KAAK,EAAE,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;aAC9D;;AAEL,KAAC;AACH;AAEA,SAAS,qBAAqB,CAAC,EAC7B,cAAc,EACd,eAAe,GAIhB,EAAA;AACC,IAAA,OAAO,UAAU,OAAuB,EAAA;QACtC,IAAI,CAAC,eAAe,EAAE;YACpB;;AAEF,QAAA,eAAe,CAAC,OAAO,EAAE,cAAc,CAAC;AAC1C,KAAC;AACH;AAEA,SAAS,UAAU,CAAC,EAClB,MAAM,EACN,MAAM,EACN,eAAe,EAAE,gBAAgB,GAKlC,EAAA;IACC,OAAOC,UAAI,CACT,OAAO,MAAM,EAAE,cAAc,KAAI;AAC/B,QAAA,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM;AACjE,QAAA,MAAM,OAAO,GAAG,OAAO,EAAE,KAAK,QAAQ,IAAI,EAAE,GAAG,EAAE,GAAG,SAAS;AAC7D,QAAA,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC;YAChC,KAAK;YACL,IAAI;YACJ,OAAO;YACP,MAAM;YACN,MAAM;YACN,IAAI;YACJ,eAAe,EAAE,qBAAqB,CAAC;gBACrC,cAAc;AACd,gBAAA,eAAe,EAAE,gBAAgB;aAClC,CAAC;AACH,SAAA,CAAC;QACF,MAAM,IAAI,GAAG,cAAc,CAAC,QAAQ,EAAE,IAAI,IAAI,CAAC;AAC/C,QAAA,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAGC,0BAAmB,CAAC,IAAI,EAAE,YAAY,CAAC;QACtE,MAAM,IAAI,GAAuB,EAAE,IAAI,EAAE,GAAG,YAAY,EAAE,UAAU,EAAE;AACtE,QAAA,OAAO,CAAC,MAAM,EAAE,EAAE,CAACC,eAAS,CAAC,UAAU,GAAG,IAAI,EAAE,CAAC;AACnD,KAAC,EACD;QACE,IAAI,EAAEA,eAAS,CAAC,UAAU;AAC1B,QAAA,WAAW,EAAE,CAAA;;;;;;;;;;;;;;;;;;;;;;AAsBlB,CAAA,CAAC,IAAI,EAAE;AACF,QAAA,MAAM,EAAE,MAAM;QACd,cAAc,EAAEA,eAAS,CAAC,oBAAoB;AAC/C,KAAA,CACF;AACH;AAEA;;;;;;AAMG;MACU,gBAAgB,GAAG,CAC9B,MAA6B,GAAA,EAAE,KACa;IAC5C,MAAM,EACJ,cAAc,GAAG,QAAQ,EACzB,YAAY,EACZ,kBAAkB,EAClB,aAAa,EACb,YAAY,GAAG,QAAQ,EACvB,UAAU,GAAG,CAAC,EACd,UAAU,GAAG,CAAC,eAAe,CAAC,EAC9B,aAAa,GAAG,IAAI,EACpB,UAAU,GAAG,CAAC,EACd,eAAe,EACf,eAAe,EACf,gBAAgB,GAAG,CAAC,UAAU,EAAE,MAAM,CAAC,EACvC,cAAc,EACd,UAAU,EACV,YAAY,EACZ,eAAe,EAAE,gBAAgB,EACjC,eAAe,GAChB,GAAG,MAAM;IAEV,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,IAAIC,yBAAmB,EAAE;AAErD,IAAA,MAAM,YAAY,GAOd;AACF,QAAA,KAAK,EAAEC,kBAAW;AAClB,QAAA,IAAI,EAAEC,iBAAU;AAChB,QAAA,MAAM,EAAEC,mBAAY;AACpB,QAAA,MAAM,EAAEC,mBAAY;AACpB,QAAA,IAAI,EAAEC,iBAAU;KACjB;AAED,IAAA,IAAI,cAAc,KAAK,QAAQ,EAAE;AAC/B,QAAA,YAAY,CAAC,OAAO,GAAGC,oBAAa;;IAGtC,MAAM,UAAU,GAAGC,KAAC,CAAC,MAAM,CAAC,YAAY,CAAC;IAEzC,MAAM,SAAS,GAAGC,sBAAe,CAAC;QAChC,cAAc;QACd,YAAY;QACZ,kBAAkB;QAClB,aAAa;AACd,KAAA,CAAC;IAEF,MAAM,gBAAgB,GAAGC,gCAAsB,CAAC;AAC9C,QAAA,MAAM,EAAE,eAAe,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB;AACxD,QAAA,MAAM,EAAE,eAAe;AACvB,QAAA,OAAO,EAAE,cAAc;AACvB,QAAA,OAAO,EAAE,gBAAgB;AAC1B,KAAA,CAAC;IAEF,MAAM,gBAAgB,GAAGC,wBAAc,CAAC;QACtC,YAAY;QACZ,UAAU;QACV,YAAY;QACZ,MAAM;AACP,KAAA,CAAC;IAEF,IAAI,CAAC,gBAAgB,EAAE;AACrB,QAAA,MAAM,CAAC,IAAI,CAAC,8CAA8C,CAAC;;IAG7D,MAAM,eAAe,GAAGC,4BAAqB,CAC3C;AACE,QAAA,QAAQ,EAAE,gBAAgB;QAC1B,UAAU;QAGV,MAAM;KACP,EACD,gBAAgB,CACjB;IAED,MAAMC,QAAM,GAAG,qBAAqB,CAAC;QACnC,SAAS;QACT,UAAU;QACV,eAAe;QACf,eAAe;QACf,MAAM;AACP,KAAA,CAAC;AAEF,IAAA,OAAO,UAAU,CAAC;gBAChBA,QAAM;AACN,QAAA,MAAM,EAAE,UAAU;AAClB,QAAA,eAAe,EAAE,gBAAgB;AAClC,KAAA,CAAC;AACJ;;;;"}
|
|
1
|
+
{"version":3,"file":"tool.cjs","sources":["../../../../src/tools/search/tool.ts"],"sourcesContent":["import { z } from 'zod';\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\nimport type { RunnableConfig } from '@langchain/core/runnables';\nimport type * as t from './types';\nimport {\n DATE_RANGE,\n querySchema,\n dateSchema,\n countrySchema,\n imagesSchema,\n videosSchema,\n newsSchema,\n} from './schema';\nimport { createSearchAPI, createSourceProcessor } from './search';\nimport { createFirecrawlScraper } from './firecrawl';\nimport { expandHighlights } from './highlights';\nimport { formatResultsForLLM } from './format';\nimport { createDefaultLogger } from './utils';\nimport { createReranker } from './rerankers';\nimport { Constants } from '@/common';\n\n/**\n * Executes parallel searches and merges the results\n */\nasync function executeParallelSearches({\n searchAPI,\n query,\n date,\n country,\n safeSearch,\n images,\n videos,\n news,\n logger,\n}: {\n searchAPI: ReturnType<typeof createSearchAPI>;\n query: string;\n date?: DATE_RANGE;\n country?: string;\n safeSearch: t.SearchToolConfig['safeSearch'];\n images: boolean;\n videos: boolean;\n news: boolean;\n logger: t.Logger;\n}): Promise<t.SearchResult> {\n // Prepare all search tasks to run in parallel\n const searchTasks: Promise<t.SearchResult>[] = [\n // Main search\n searchAPI.getSources({\n query,\n date,\n country,\n safeSearch,\n }),\n ];\n\n if (images) {\n searchTasks.push(\n searchAPI\n .getSources({\n query,\n date,\n country,\n safeSearch,\n type: 'images',\n })\n .catch((error) => {\n logger.error('Error fetching images:', error);\n return {\n success: false,\n error: `Images search failed: ${error instanceof Error ? error.message : String(error)}`,\n };\n })\n );\n }\n if (videos) {\n searchTasks.push(\n searchAPI\n .getSources({\n query,\n date,\n country,\n safeSearch,\n type: 'videos',\n })\n .catch((error) => {\n logger.error('Error fetching videos:', error);\n return {\n success: false,\n error: `Videos search failed: ${error instanceof Error ? error.message : String(error)}`,\n };\n })\n );\n }\n if (news) {\n searchTasks.push(\n searchAPI\n .getSources({\n query,\n date,\n country,\n safeSearch,\n type: 'news',\n })\n .catch((error) => {\n logger.error('Error fetching news:', error);\n return {\n success: false,\n error: `News search failed: ${error instanceof Error ? error.message : String(error)}`,\n };\n })\n );\n }\n\n // Run all searches in parallel\n const results = await Promise.all(searchTasks);\n\n // Get the main search result (first result)\n const mainResult = results[0];\n if (!mainResult.success) {\n throw new Error(mainResult.error ?? 'Search failed');\n }\n\n // Merge additional results with the main results\n const mergedResults = { ...mainResult.data };\n\n // Convert existing news to topStories if present\n if (mergedResults.news !== undefined && mergedResults.news.length > 0) {\n const existingNewsAsTopStories = mergedResults.news\n .filter((newsItem) => newsItem.link !== undefined && newsItem.link !== '')\n .map((newsItem) => ({\n title: newsItem.title ?? '',\n link: newsItem.link ?? '',\n source: newsItem.source ?? '',\n date: newsItem.date ?? '',\n imageUrl: newsItem.imageUrl ?? '',\n processed: false,\n }));\n mergedResults.topStories = [\n ...(mergedResults.topStories ?? []),\n ...existingNewsAsTopStories,\n ];\n delete mergedResults.news;\n }\n\n results.slice(1).forEach((result) => {\n if (result.success && result.data !== undefined) {\n if (result.data.images !== undefined && result.data.images.length > 0) {\n mergedResults.images = [\n ...(mergedResults.images ?? []),\n ...result.data.images,\n ];\n }\n if (result.data.videos !== undefined && result.data.videos.length > 0) {\n mergedResults.videos = [\n ...(mergedResults.videos ?? []),\n ...result.data.videos,\n ];\n }\n if (result.data.news !== undefined && result.data.news.length > 0) {\n const newsAsTopStories = result.data.news.map((newsItem) => ({\n ...newsItem,\n link: newsItem.link ?? '',\n }));\n mergedResults.topStories = [\n ...(mergedResults.topStories ?? []),\n ...newsAsTopStories,\n ];\n }\n }\n });\n\n return { success: true, data: mergedResults };\n}\n\nfunction createSearchProcessor({\n searchAPI,\n safeSearch,\n sourceProcessor,\n onGetHighlights,\n logger,\n}: {\n safeSearch: t.SearchToolConfig['safeSearch'];\n searchAPI: ReturnType<typeof createSearchAPI>;\n sourceProcessor: ReturnType<typeof createSourceProcessor>;\n onGetHighlights: t.SearchToolConfig['onGetHighlights'];\n logger: t.Logger;\n}) {\n return async function ({\n query,\n date,\n country,\n proMode = true,\n maxSources = 5,\n onSearchResults,\n images = false,\n videos = false,\n news = false,\n }: {\n query: string;\n country?: string;\n date?: DATE_RANGE;\n proMode?: boolean;\n maxSources?: number;\n onSearchResults: t.SearchToolConfig['onSearchResults'];\n images?: boolean;\n videos?: boolean;\n news?: boolean;\n }): Promise<t.SearchResultData> {\n try {\n // Execute parallel searches and merge results\n const searchResult = await executeParallelSearches({\n searchAPI,\n query,\n date,\n country,\n safeSearch,\n images,\n videos,\n news,\n logger,\n });\n\n onSearchResults?.(searchResult);\n\n const processedSources = await sourceProcessor.processSources({\n query,\n news,\n result: searchResult,\n proMode,\n onGetHighlights,\n numElements: maxSources,\n });\n\n return expandHighlights(processedSources);\n } catch (error) {\n logger.error('Error in search:', error);\n return {\n organic: [],\n topStories: [],\n images: [],\n videos: [],\n news: [],\n relatedSearches: [],\n error: error instanceof Error ? error.message : String(error),\n };\n }\n };\n}\n\nfunction createOnSearchResults({\n runnableConfig,\n onSearchResults,\n}: {\n runnableConfig: RunnableConfig;\n onSearchResults: t.SearchToolConfig['onSearchResults'];\n}) {\n return function (results: t.SearchResult): void {\n if (!onSearchResults) {\n return;\n }\n onSearchResults(results, runnableConfig);\n };\n}\n\nfunction createTool({\n schema,\n search,\n onSearchResults: _onSearchResults,\n}: {\n schema: t.SearchToolSchema;\n search: ReturnType<typeof createSearchProcessor>;\n onSearchResults: t.SearchToolConfig['onSearchResults'];\n}): DynamicStructuredTool<typeof schema> {\n return tool<typeof schema>(\n async (params, runnableConfig) => {\n const { query, date, country: _c, images, videos, news } = params;\n const country = typeof _c === 'string' && _c ? _c : undefined;\n const searchResult = await search({\n query,\n date,\n country,\n images,\n videos,\n news,\n onSearchResults: createOnSearchResults({\n runnableConfig,\n onSearchResults: _onSearchResults,\n }),\n });\n const turn = runnableConfig.toolCall?.turn ?? 0;\n const { output, references } = formatResultsForLLM(turn, searchResult);\n const data: t.SearchResultData = { turn, ...searchResult, references };\n return [output, { [Constants.WEB_SEARCH]: data }];\n },\n {\n name: Constants.WEB_SEARCH,\n description: `Real-time search. Results have required citation anchors.\n\nNote: Use ONCE per reply unless instructed otherwise.\n\nAnchors:\n- \\\\ue202turnXtypeY\n- X = turn idx, type = 'search' | 'news' | 'image' | 'ref', Y = item idx\n\nSpecial Markers:\n- \\\\ue203...\\\\ue204 — highlight start/end of cited text (for Standalone or Group citations)\n- \\\\ue200...\\\\ue201 — group block (e.g. \\\\ue200\\\\ue202turn0search1\\\\ue202turn0news2\\\\ue201)\n\n**CITE EVERY NON-OBVIOUS FACT/QUOTE:**\nUse anchor marker(s) immediately after the statement:\n- Standalone: \"Pure functions produce same output. \\\\ue202turn0search0\"\n- Standalone (multiple): \"Today's News \\\\ue202turn0search0\\\\ue202turn0news0\"\n- Highlight: \"\\\\ue203Highlight text.\\\\ue204\\\\ue202turn0news1\"\n- Group: \"Sources. \\\\ue200\\\\ue202turn0search0\\\\ue202turn0news1\\\\ue201\"\n- Group Highlight: \"\\\\ue203Highlight for group.\\\\ue204 \\\\ue200\\\\ue202turn0search0\\\\ue202turn0news1\\\\ue201\"\n- Image: \"See photo \\\\ue202turn0image0.\"\n\n**NEVER use markdown links, [1], or footnotes. CITE ONLY with anchors provided.**\n`.trim(),\n schema: schema,\n responseFormat: Constants.CONTENT_AND_ARTIFACT,\n }\n );\n}\n\n/**\n * Creates a search tool with a schema that dynamically includes the country field\n * only when the searchProvider is 'serper'.\n *\n * @param config - The search tool configuration\n * @returns A DynamicStructuredTool with a schema that depends on the searchProvider\n */\nexport const createSearchTool = (\n config: t.SearchToolConfig = {}\n): DynamicStructuredTool<typeof toolSchema> => {\n const {\n searchProvider = 'serper',\n serperApiKey,\n searxngInstanceUrl,\n searxngApiKey,\n rerankerType = 'cohere',\n topResults = 5,\n strategies = ['no_extraction'],\n filterContent = true,\n safeSearch = 1,\n firecrawlApiKey,\n firecrawlApiUrl,\n firecrawlOptions,\n scraperTimeout,\n jinaApiKey,\n cohereApiKey,\n onSearchResults: _onSearchResults,\n onGetHighlights,\n } = config;\n\n const logger = config.logger || createDefaultLogger();\n\n const schemaObject: {\n query: z.ZodString;\n date: z.ZodOptional<z.ZodNativeEnum<typeof DATE_RANGE>>;\n country?: z.ZodOptional<z.ZodString>;\n images: z.ZodOptional<z.ZodBoolean>;\n videos: z.ZodOptional<z.ZodBoolean>;\n news: z.ZodOptional<z.ZodBoolean>;\n } = {\n query: querySchema,\n date: dateSchema,\n images: imagesSchema,\n videos: videosSchema,\n news: newsSchema,\n };\n\n if (searchProvider === 'serper') {\n schemaObject.country = countrySchema;\n }\n\n const toolSchema = z.object(schemaObject);\n\n const searchAPI = createSearchAPI({\n searchProvider,\n serperApiKey,\n searxngInstanceUrl,\n searxngApiKey,\n });\n\n const firecrawlScraper = createFirecrawlScraper({\n ...firecrawlOptions,\n apiKey: firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY,\n apiUrl: firecrawlApiUrl,\n timeout: scraperTimeout ?? firecrawlOptions?.timeout,\n formats: firecrawlOptions?.formats ?? ['markdown', 'rawHtml'],\n });\n\n const selectedReranker = createReranker({\n rerankerType,\n jinaApiKey,\n cohereApiKey,\n logger,\n });\n\n if (!selectedReranker) {\n logger.warn('No reranker selected. Using default ranking.');\n }\n\n const sourceProcessor = createSourceProcessor(\n {\n reranker: selectedReranker,\n topResults,\n strategies,\n filterContent,\n logger,\n },\n firecrawlScraper\n );\n\n const search = createSearchProcessor({\n searchAPI,\n safeSearch,\n sourceProcessor,\n onGetHighlights,\n logger,\n });\n\n return createTool({\n search,\n schema: toolSchema,\n onSearchResults: _onSearchResults,\n });\n};\n"],"names":["expandHighlights","tool","formatResultsForLLM","Constants","createDefaultLogger","querySchema","dateSchema","imagesSchema","videosSchema","newsSchema","countrySchema","z","createSearchAPI","createFirecrawlScraper","createReranker","createSourceProcessor","search"],"mappings":";;;;;;;;;;;;;AAqBA;;AAEG;AACH,eAAe,uBAAuB,CAAC,EACrC,SAAS,EACT,KAAK,EACL,IAAI,EACJ,OAAO,EACP,UAAU,EACV,MAAM,EACN,MAAM,EACN,IAAI,EACJ,MAAM,GAWP,EAAA;;AAEC,IAAA,MAAM,WAAW,GAA8B;;QAE7C,SAAS,CAAC,UAAU,CAAC;YACnB,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;SACX,CAAC;KACH;IAED,IAAI,MAAM,EAAE;QACV,WAAW,CAAC,IAAI,CACd;AACG,aAAA,UAAU,CAAC;YACV,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;AACV,YAAA,IAAI,EAAE,QAAQ;SACf;AACA,aAAA,KAAK,CAAC,CAAC,KAAK,KAAI;AACf,YAAA,MAAM,CAAC,KAAK,CAAC,wBAAwB,EAAE,KAAK,CAAC;YAC7C,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;AACd,gBAAA,KAAK,EAAE,CAAyB,sBAAA,EAAA,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAE,CAAA;aACzF;SACF,CAAC,CACL;;IAEH,IAAI,MAAM,EAAE;QACV,WAAW,CAAC,IAAI,CACd;AACG,aAAA,UAAU,CAAC;YACV,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;AACV,YAAA,IAAI,EAAE,QAAQ;SACf;AACA,aAAA,KAAK,CAAC,CAAC,KAAK,KAAI;AACf,YAAA,MAAM,CAAC,KAAK,CAAC,wBAAwB,EAAE,KAAK,CAAC;YAC7C,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;AACd,gBAAA,KAAK,EAAE,CAAyB,sBAAA,EAAA,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAE,CAAA;aACzF;SACF,CAAC,CACL;;IAEH,IAAI,IAAI,EAAE;QACR,WAAW,CAAC,IAAI,CACd;AACG,aAAA,UAAU,CAAC;YACV,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;AACV,YAAA,IAAI,EAAE,MAAM;SACb;AACA,aAAA,KAAK,CAAC,CAAC,KAAK,KAAI;AACf,YAAA,MAAM,CAAC,KAAK,CAAC,sBAAsB,EAAE,KAAK,CAAC;YAC3C,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;AACd,gBAAA,KAAK,EAAE,CAAuB,oBAAA,EAAA,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAE,CAAA;aACvF;SACF,CAAC,CACL;;;IAIH,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;;AAG9C,IAAA,MAAM,UAAU,GAAG,OAAO,CAAC,CAAC,CAAC;AAC7B,IAAA,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE;QACvB,MAAM,IAAI,KAAK,CAAC,UAAU,CAAC,KAAK,IAAI,eAAe,CAAC;;;IAItD,MAAM,aAAa,GAAG,EAAE,GAAG,UAAU,CAAC,IAAI,EAAE;;AAG5C,IAAA,IAAI,aAAa,CAAC,IAAI,KAAK,SAAS,IAAI,aAAa,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;AACrE,QAAA,MAAM,wBAAwB,GAAG,aAAa,CAAC;AAC5C,aAAA,MAAM,CAAC,CAAC,QAAQ,KAAK,QAAQ,CAAC,IAAI,KAAK,SAAS,IAAI,QAAQ,CAAC,IAAI,KAAK,EAAE;AACxE,aAAA,GAAG,CAAC,CAAC,QAAQ,MAAM;AAClB,YAAA,KAAK,EAAE,QAAQ,CAAC,KAAK,IAAI,EAAE;AAC3B,YAAA,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;AACzB,YAAA,MAAM,EAAE,QAAQ,CAAC,MAAM,IAAI,EAAE;AAC7B,YAAA,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;AACzB,YAAA,QAAQ,EAAE,QAAQ,CAAC,QAAQ,IAAI,EAAE;AACjC,YAAA,SAAS,EAAE,KAAK;AACjB,SAAA,CAAC,CAAC;QACL,aAAa,CAAC,UAAU,GAAG;AACzB,YAAA,IAAI,aAAa,CAAC,UAAU,IAAI,EAAE,CAAC;AACnC,YAAA,GAAG,wBAAwB;SAC5B;QACD,OAAO,aAAa,CAAC,IAAI;;IAG3B,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,KAAI;QAClC,IAAI,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,IAAI,KAAK,SAAS,EAAE;AAC/C,YAAA,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE;gBACrE,aAAa,CAAC,MAAM,GAAG;AACrB,oBAAA,IAAI,aAAa,CAAC,MAAM,IAAI,EAAE,CAAC;AAC/B,oBAAA,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM;iBACtB;;AAEH,YAAA,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE;gBACrE,aAAa,CAAC,MAAM,GAAG;AACrB,oBAAA,IAAI,aAAa,CAAC,MAAM,IAAI,EAAE,CAAC;AAC/B,oBAAA,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM;iBACtB;;AAEH,YAAA,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;AACjE,gBAAA,MAAM,gBAAgB,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,QAAQ,MAAM;AAC3D,oBAAA,GAAG,QAAQ;AACX,oBAAA,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;AAC1B,iBAAA,CAAC,CAAC;gBACH,aAAa,CAAC,UAAU,GAAG;AACzB,oBAAA,IAAI,aAAa,CAAC,UAAU,IAAI,EAAE,CAAC;AACnC,oBAAA,GAAG,gBAAgB;iBACpB;;;AAGP,KAAC,CAAC;IAEF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,aAAa,EAAE;AAC/C;AAEA,SAAS,qBAAqB,CAAC,EAC7B,SAAS,EACT,UAAU,EACV,eAAe,EACf,eAAe,EACf,MAAM,GAOP,EAAA;AACC,IAAA,OAAO,gBAAgB,EACrB,KAAK,EACL,IAAI,EACJ,OAAO,EACP,OAAO,GAAG,IAAI,EACd,UAAU,GAAG,CAAC,EACd,eAAe,EACf,MAAM,GAAG,KAAK,EACd,MAAM,GAAG,KAAK,EACd,IAAI,GAAG,KAAK,GAWb,EAAA;AACC,QAAA,IAAI;;AAEF,YAAA,MAAM,YAAY,GAAG,MAAM,uBAAuB,CAAC;gBACjD,SAAS;gBACT,KAAK;gBACL,IAAI;gBACJ,OAAO;gBACP,UAAU;gBACV,MAAM;gBACN,MAAM;gBACN,IAAI;gBACJ,MAAM;AACP,aAAA,CAAC;AAEF,YAAA,eAAe,GAAG,YAAY,CAAC;AAE/B,YAAA,MAAM,gBAAgB,GAAG,MAAM,eAAe,CAAC,cAAc,CAAC;gBAC5D,KAAK;gBACL,IAAI;AACJ,gBAAA,MAAM,EAAE,YAAY;gBACpB,OAAO;gBACP,eAAe;AACf,gBAAA,WAAW,EAAE,UAAU;AACxB,aAAA,CAAC;AAEF,YAAA,OAAOA,2BAAgB,CAAC,gBAAgB,CAAC;;QACzC,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,CAAC,KAAK,CAAC,kBAAkB,EAAE,KAAK,CAAC;YACvC,OAAO;AACL,gBAAA,OAAO,EAAE,EAAE;AACX,gBAAA,UAAU,EAAE,EAAE;AACd,gBAAA,MAAM,EAAE,EAAE;AACV,gBAAA,MAAM,EAAE,EAAE;AACV,gBAAA,IAAI,EAAE,EAAE;AACR,gBAAA,eAAe,EAAE,EAAE;AACnB,gBAAA,KAAK,EAAE,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;aAC9D;;AAEL,KAAC;AACH;AAEA,SAAS,qBAAqB,CAAC,EAC7B,cAAc,EACd,eAAe,GAIhB,EAAA;AACC,IAAA,OAAO,UAAU,OAAuB,EAAA;QACtC,IAAI,CAAC,eAAe,EAAE;YACpB;;AAEF,QAAA,eAAe,CAAC,OAAO,EAAE,cAAc,CAAC;AAC1C,KAAC;AACH;AAEA,SAAS,UAAU,CAAC,EAClB,MAAM,EACN,MAAM,EACN,eAAe,EAAE,gBAAgB,GAKlC,EAAA;IACC,OAAOC,UAAI,CACT,OAAO,MAAM,EAAE,cAAc,KAAI;AAC/B,QAAA,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM;AACjE,QAAA,MAAM,OAAO,GAAG,OAAO,EAAE,KAAK,QAAQ,IAAI,EAAE,GAAG,EAAE,GAAG,SAAS;AAC7D,QAAA,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC;YAChC,KAAK;YACL,IAAI;YACJ,OAAO;YACP,MAAM;YACN,MAAM;YACN,IAAI;YACJ,eAAe,EAAE,qBAAqB,CAAC;gBACrC,cAAc;AACd,gBAAA,eAAe,EAAE,gBAAgB;aAClC,CAAC;AACH,SAAA,CAAC;QACF,MAAM,IAAI,GAAG,cAAc,CAAC,QAAQ,EAAE,IAAI,IAAI,CAAC;AAC/C,QAAA,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAGC,0BAAmB,CAAC,IAAI,EAAE,YAAY,CAAC;QACtE,MAAM,IAAI,GAAuB,EAAE,IAAI,EAAE,GAAG,YAAY,EAAE,UAAU,EAAE;AACtE,QAAA,OAAO,CAAC,MAAM,EAAE,EAAE,CAACC,eAAS,CAAC,UAAU,GAAG,IAAI,EAAE,CAAC;AACnD,KAAC,EACD;QACE,IAAI,EAAEA,eAAS,CAAC,UAAU;AAC1B,QAAA,WAAW,EAAE,CAAA;;;;;;;;;;;;;;;;;;;;;;AAsBlB,CAAA,CAAC,IAAI,EAAE;AACF,QAAA,MAAM,EAAE,MAAM;QACd,cAAc,EAAEA,eAAS,CAAC,oBAAoB;AAC/C,KAAA,CACF;AACH;AAEA;;;;;;AAMG;MACU,gBAAgB,GAAG,CAC9B,MAA6B,GAAA,EAAE,KACa;IAC5C,MAAM,EACJ,cAAc,GAAG,QAAQ,EACzB,YAAY,EACZ,kBAAkB,EAClB,aAAa,EACb,YAAY,GAAG,QAAQ,EACvB,UAAU,GAAG,CAAC,EACd,UAAU,GAAG,CAAC,eAAe,CAAC,EAC9B,aAAa,GAAG,IAAI,EACpB,UAAU,GAAG,CAAC,EACd,eAAe,EACf,eAAe,EACf,gBAAgB,EAChB,cAAc,EACd,UAAU,EACV,YAAY,EACZ,eAAe,EAAE,gBAAgB,EACjC,eAAe,GAChB,GAAG,MAAM;IAEV,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,IAAIC,yBAAmB,EAAE;AAErD,IAAA,MAAM,YAAY,GAOd;AACF,QAAA,KAAK,EAAEC,kBAAW;AAClB,QAAA,IAAI,EAAEC,iBAAU;AAChB,QAAA,MAAM,EAAEC,mBAAY;AACpB,QAAA,MAAM,EAAEC,mBAAY;AACpB,QAAA,IAAI,EAAEC,iBAAU;KACjB;AAED,IAAA,IAAI,cAAc,KAAK,QAAQ,EAAE;AAC/B,QAAA,YAAY,CAAC,OAAO,GAAGC,oBAAa;;IAGtC,MAAM,UAAU,GAAGC,KAAC,CAAC,MAAM,CAAC,YAAY,CAAC;IAEzC,MAAM,SAAS,GAAGC,sBAAe,CAAC;QAChC,cAAc;QACd,YAAY;QACZ,kBAAkB;QAClB,aAAa;AACd,KAAA,CAAC;IAEF,MAAM,gBAAgB,GAAGC,gCAAsB,CAAC;AAC9C,QAAA,GAAG,gBAAgB;AACnB,QAAA,MAAM,EAAE,eAAe,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB;AACxD,QAAA,MAAM,EAAE,eAAe;AACvB,QAAA,OAAO,EAAE,cAAc,IAAI,gBAAgB,EAAE,OAAO;QACpD,OAAO,EAAE,gBAAgB,EAAE,OAAO,IAAI,CAAC,UAAU,EAAE,SAAS,CAAC;AAC9D,KAAA,CAAC;IAEF,MAAM,gBAAgB,GAAGC,wBAAc,CAAC;QACtC,YAAY;QACZ,UAAU;QACV,YAAY;QACZ,MAAM;AACP,KAAA,CAAC;IAEF,IAAI,CAAC,gBAAgB,EAAE;AACrB,QAAA,MAAM,CAAC,IAAI,CAAC,8CAA8C,CAAC;;IAG7D,MAAM,eAAe,GAAGC,4BAAqB,CAC3C;AACE,QAAA,QAAQ,EAAE,gBAAgB;QAC1B,UAAU;QAGV,MAAM;KACP,EACD,gBAAgB,CACjB;IAED,MAAMC,QAAM,GAAG,qBAAqB,CAAC;QACnC,SAAS;QACT,UAAU;QACV,eAAe;QACf,eAAe;QACf,MAAM;AACP,KAAA,CAAC;AAEF,IAAA,OAAO,UAAU,CAAC;gBAChBA,QAAM;AACN,QAAA,MAAM,EAAE,UAAU;AAClB,QAAA,eAAe,EAAE,gBAAgB;AAClC,KAAA,CAAC;AACJ;;;;"}
|
|
@@ -12,15 +12,45 @@ class FirecrawlScraper {
|
|
|
12
12
|
defaultFormats;
|
|
13
13
|
timeout;
|
|
14
14
|
logger;
|
|
15
|
+
includeTags;
|
|
16
|
+
excludeTags;
|
|
17
|
+
waitFor;
|
|
18
|
+
maxAge;
|
|
19
|
+
mobile;
|
|
20
|
+
skipTlsVerification;
|
|
21
|
+
blockAds;
|
|
22
|
+
removeBase64Images;
|
|
23
|
+
parsePDF;
|
|
24
|
+
storeInCache;
|
|
25
|
+
zeroDataRetention;
|
|
26
|
+
headers;
|
|
27
|
+
location;
|
|
28
|
+
onlyMainContent;
|
|
29
|
+
changeTrackingOptions;
|
|
15
30
|
constructor(config = {}) {
|
|
16
31
|
this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';
|
|
17
32
|
const baseUrl = config.apiUrl ??
|
|
18
33
|
process.env.FIRECRAWL_BASE_URL ??
|
|
19
34
|
'https://api.firecrawl.dev';
|
|
20
35
|
this.apiUrl = `${baseUrl.replace(/\/+$/, '')}/v1/scrape`;
|
|
21
|
-
this.defaultFormats = config.formats ?? ['markdown', '
|
|
36
|
+
this.defaultFormats = config.formats ?? ['markdown', 'rawHtml'];
|
|
22
37
|
this.timeout = config.timeout ?? 7500;
|
|
23
38
|
this.logger = config.logger || createDefaultLogger();
|
|
39
|
+
this.includeTags = config.includeTags;
|
|
40
|
+
this.excludeTags = config.excludeTags;
|
|
41
|
+
this.waitFor = config.waitFor;
|
|
42
|
+
this.maxAge = config.maxAge;
|
|
43
|
+
this.mobile = config.mobile;
|
|
44
|
+
this.skipTlsVerification = config.skipTlsVerification;
|
|
45
|
+
this.blockAds = config.blockAds;
|
|
46
|
+
this.removeBase64Images = config.removeBase64Images;
|
|
47
|
+
this.parsePDF = config.parsePDF;
|
|
48
|
+
this.storeInCache = config.storeInCache;
|
|
49
|
+
this.zeroDataRetention = config.zeroDataRetention;
|
|
50
|
+
this.headers = config.headers;
|
|
51
|
+
this.location = config.location;
|
|
52
|
+
this.onlyMainContent = config.onlyMainContent;
|
|
53
|
+
this.changeTrackingOptions = config.changeTrackingOptions;
|
|
24
54
|
if (!this.apiKey) {
|
|
25
55
|
this.logger.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');
|
|
26
56
|
}
|
|
@@ -43,15 +73,27 @@ class FirecrawlScraper {
|
|
|
43
73
|
];
|
|
44
74
|
}
|
|
45
75
|
try {
|
|
46
|
-
const
|
|
76
|
+
const payload = omitUndefined({
|
|
47
77
|
url,
|
|
48
|
-
formats: options.formats
|
|
49
|
-
includeTags: options.includeTags,
|
|
50
|
-
excludeTags: options.excludeTags,
|
|
51
|
-
headers: options.headers,
|
|
52
|
-
waitFor: options.waitFor,
|
|
78
|
+
formats: options.formats ?? this.defaultFormats,
|
|
79
|
+
includeTags: options.includeTags ?? this.includeTags,
|
|
80
|
+
excludeTags: options.excludeTags ?? this.excludeTags,
|
|
81
|
+
headers: options.headers ?? this.headers,
|
|
82
|
+
waitFor: options.waitFor ?? this.waitFor,
|
|
53
83
|
timeout: options.timeout ?? this.timeout,
|
|
54
|
-
|
|
84
|
+
onlyMainContent: options.onlyMainContent ?? this.onlyMainContent,
|
|
85
|
+
maxAge: options.maxAge ?? this.maxAge,
|
|
86
|
+
mobile: options.mobile ?? this.mobile,
|
|
87
|
+
skipTlsVerification: options.skipTlsVerification ?? this.skipTlsVerification,
|
|
88
|
+
parsePDF: options.parsePDF ?? this.parsePDF,
|
|
89
|
+
location: options.location ?? this.location,
|
|
90
|
+
removeBase64Images: options.removeBase64Images ?? this.removeBase64Images,
|
|
91
|
+
blockAds: options.blockAds ?? this.blockAds,
|
|
92
|
+
storeInCache: options.storeInCache ?? this.storeInCache,
|
|
93
|
+
zeroDataRetention: options.zeroDataRetention ?? this.zeroDataRetention,
|
|
94
|
+
changeTrackingOptions: options.changeTrackingOptions ?? this.changeTrackingOptions,
|
|
95
|
+
});
|
|
96
|
+
const response = await axios.post(this.apiUrl, payload, {
|
|
55
97
|
headers: {
|
|
56
98
|
'Content-Type': 'application/json',
|
|
57
99
|
Authorization: `Bearer ${this.apiKey}`,
|
|
@@ -123,6 +165,10 @@ class FirecrawlScraper {
|
|
|
123
165
|
const createFirecrawlScraper = (config = {}) => {
|
|
124
166
|
return new FirecrawlScraper(config);
|
|
125
167
|
};
|
|
168
|
+
// Helper function to clean up payload for firecrawl
|
|
169
|
+
function omitUndefined(obj) {
|
|
170
|
+
return Object.fromEntries(Object.entries(obj).filter(([, v]) => v !== undefined));
|
|
171
|
+
}
|
|
126
172
|
|
|
127
173
|
export { FirecrawlScraper, createFirecrawlScraper };
|
|
128
174
|
//# sourceMappingURL=firecrawl.mjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"firecrawl.mjs","sources":["../../../../src/tools/search/firecrawl.ts"],"sourcesContent":["import axios from 'axios';\nimport { processContent } from './content';\nimport type * as t from './types';\nimport { createDefaultLogger } from './utils';\n\n/**\n * Firecrawl scraper implementation\n * Uses the Firecrawl API to scrape web pages\n */\nexport class FirecrawlScraper {\n private apiKey: string;\n private apiUrl: string;\n private defaultFormats: string[];\n private timeout: number;\n private logger: t.Logger;\n\n constructor(config: t.FirecrawlScraperConfig = {}) {\n this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';\n\n const baseUrl =\n config.apiUrl ??\n process.env.FIRECRAWL_BASE_URL ??\n 'https://api.firecrawl.dev';\n this.apiUrl = `${baseUrl.replace(/\\/+$/, '')}/v1/scrape`;\n\n this.defaultFormats = config.formats ?? ['markdown', 'html'];\n this.timeout = config.timeout ?? 7500;\n\n this.logger = config.logger || createDefaultLogger();\n\n if (!this.apiKey) {\n this.logger.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');\n }\n\n this.logger.debug(\n `Firecrawl scraper initialized with API URL: ${this.apiUrl}`\n );\n }\n\n /**\n * Scrape a single URL\n * @param url URL to scrape\n * @param options Scrape options\n * @returns Scrape response\n */\n async scrapeUrl(\n url: string,\n options: t.FirecrawlScrapeOptions = {}\n ): Promise<[string, t.FirecrawlScrapeResponse]> {\n if (!this.apiKey) {\n return [\n url,\n {\n success: false,\n error: 'FIRECRAWL_API_KEY is not set',\n },\n ];\n }\n\n try {\n const response = await axios.post(\n this.apiUrl,\n {\n url,\n formats: options.formats || this.defaultFormats,\n includeTags: options.includeTags,\n excludeTags: options.excludeTags,\n headers: options.headers,\n waitFor: options.waitFor,\n timeout: options.timeout ?? this.timeout,\n },\n {\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`,\n },\n timeout: this.timeout,\n }\n );\n\n return [url, response.data];\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return [\n url,\n {\n success: false,\n error: `Firecrawl API request failed: ${errorMessage}`,\n },\n ];\n }\n }\n\n /**\n * Extract content from scrape response\n * @param response Scrape response\n * @returns Extracted content or empty string if not available\n */\n extractContent(\n response: t.FirecrawlScrapeResponse\n ): [string, undefined | t.References] {\n if (!response.success || !response.data) {\n return ['', undefined];\n }\n\n if (response.data.markdown != null && response.data.html != null) {\n try {\n const { markdown, ...rest } = processContent(\n response.data.html,\n response.data.markdown\n );\n return [markdown, rest];\n } catch (error) {\n this.logger.error('Error processing content:', error);\n return [response.data.markdown, undefined];\n }\n } else if (response.data.markdown != null) {\n return [response.data.markdown, undefined];\n }\n\n // Fall back to HTML content\n if (response.data.html != null) {\n return [response.data.html, undefined];\n }\n\n // Fall back to raw HTML content\n if (response.data.rawHtml != null) {\n return [response.data.rawHtml, undefined];\n }\n\n return ['', undefined];\n }\n\n /**\n * Extract metadata from scrape response\n * @param response Scrape response\n * @returns Metadata object\n */\n extractMetadata(response: t.FirecrawlScrapeResponse): t.ScrapeMetadata {\n if (!response.success || !response.data || !response.data.metadata) {\n return {};\n }\n\n return response.data.metadata;\n }\n}\n\n/**\n * Create a Firecrawl scraper instance\n * @param config Scraper configuration\n * @returns Firecrawl scraper instance\n */\nexport const createFirecrawlScraper = (\n config: t.FirecrawlScraperConfig = {}\n): FirecrawlScraper => {\n return new FirecrawlScraper(config);\n};\n"],"names":[],"mappings":";;;;AAKA;;;AAGG;MACU,gBAAgB,CAAA;AACnB,IAAA,MAAM;AACN,IAAA,MAAM;AACN,IAAA,cAAc;AACd,IAAA,OAAO;AACP,IAAA,MAAM;AAEd,IAAA,WAAA,CAAY,SAAmC,EAAE,EAAA;AAC/C,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,EAAE;AAElE,QAAA,MAAM,OAAO,GACX,MAAM,CAAC,MAAM;YACb,OAAO,CAAC,GAAG,CAAC,kBAAkB;AAC9B,YAAA,2BAA2B;AAC7B,QAAA,IAAI,CAAC,MAAM,GAAG,CAAA,EAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,YAAY;AAExD,QAAA,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC,OAAO,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC;QAC5D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,IAAI;QAErC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,mBAAmB,EAAE;AAEpD,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;AAChB,YAAA,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,uDAAuD,CAAC;;QAG3E,IAAI,CAAC,MAAM,CAAC,KAAK,CACf,CAA+C,4CAAA,EAAA,IAAI,CAAC,MAAM,CAAE,CAAA,CAC7D;;AAGH;;;;;AAKG;AACH,IAAA,MAAM,SAAS,CACb,GAAW,EACX,UAAoC,EAAE,EAAA;AAEtC,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;YAChB,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;AACd,oBAAA,KAAK,EAAE,8BAA8B;AACtC,iBAAA;aACF;;AAGH,QAAA,IAAI;YACF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,IAAI,CAAC,MAAM,EACX;gBACE,GAAG;AACH,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc;gBAC/C,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,OAAO,EAAE,OAAO,CAAC,OAAO;gBACxB,OAAO,EAAE,OAAO,CAAC,OAAO;AACxB,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;aACzC,EACD;AACE,gBAAA,OAAO,EAAE;AACP,oBAAA,cAAc,EAAE,kBAAkB;AAClC,oBAAA,aAAa,EAAE,CAAA,OAAA,EAAU,IAAI,CAAC,MAAM,CAAE,CAAA;AACvC,iBAAA;gBACD,OAAO,EAAE,IAAI,CAAC,OAAO;AACtB,aAAA,CACF;AAED,YAAA,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,CAAC;;QAC3B,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACxD,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,CAAiC,8BAAA,EAAA,YAAY,CAAE,CAAA;AACvD,iBAAA;aACF;;;AAIL;;;;AAIG;AACH,IAAA,cAAc,CACZ,QAAmC,EAAA;QAEnC,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;AACvC,YAAA,OAAO,CAAC,EAAE,EAAE,SAAS,CAAC;;AAGxB,QAAA,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;AAChE,YAAA,IAAI;gBACF,MAAM,EAAE,QAAQ,EAAE,GAAG,IAAI,EAAE,GAAG,cAAc,CAC1C,QAAQ,CAAC,IAAI,CAAC,IAAI,EAClB,QAAQ,CAAC,IAAI,CAAC,QAAQ,CACvB;AACD,gBAAA,OAAO,CAAC,QAAQ,EAAE,IAAI,CAAC;;YACvB,OAAO,KAAK,EAAE;gBACd,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,2BAA2B,EAAE,KAAK,CAAC;gBACrD,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC;;;aAEvC,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,EAAE;YACzC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC;;;QAI5C,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;YAC9B,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,SAAS,CAAC;;;QAIxC,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,IAAI,EAAE;YACjC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,CAAC;;AAG3C,QAAA,OAAO,CAAC,EAAE,EAAE,SAAS,CAAC;;AAGxB;;;;AAIG;AACH,IAAA,eAAe,CAAC,QAAmC,EAAA;AACjD,QAAA,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE;AAClE,YAAA,OAAO,EAAE;;AAGX,QAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,QAAQ;;AAEhC;AAED;;;;AAIG;MACU,sBAAsB,GAAG,CACpC,MAAmC,GAAA,EAAE,KACjB;AACpB,IAAA,OAAO,IAAI,gBAAgB,CAAC,MAAM,CAAC;AACrC;;;;"}
|
|
1
|
+
{"version":3,"file":"firecrawl.mjs","sources":["../../../../src/tools/search/firecrawl.ts"],"sourcesContent":["import axios from 'axios';\nimport { processContent } from './content';\nimport type * as t from './types';\nimport { createDefaultLogger } from './utils';\n\n/**\n * Firecrawl scraper implementation\n * Uses the Firecrawl API to scrape web pages\n */\nexport class FirecrawlScraper {\n private apiKey: string;\n private apiUrl: string;\n private defaultFormats: string[];\n private timeout: number;\n private logger: t.Logger;\n private includeTags?: string[];\n private excludeTags?: string[];\n private waitFor?: number;\n private maxAge?: number;\n private mobile?: boolean;\n private skipTlsVerification?: boolean;\n private blockAds?: boolean;\n private removeBase64Images?: boolean;\n private parsePDF?: boolean;\n private storeInCache?: boolean;\n private zeroDataRetention?: boolean;\n private headers?: Record<string, string>;\n private location?: { country?: string; languages?: string[] };\n private onlyMainContent?: boolean;\n private changeTrackingOptions?: object;\n\n constructor(config: t.FirecrawlScraperConfig = {}) {\n this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';\n\n const baseUrl =\n config.apiUrl ??\n process.env.FIRECRAWL_BASE_URL ??\n 'https://api.firecrawl.dev';\n this.apiUrl = `${baseUrl.replace(/\\/+$/, '')}/v1/scrape`;\n\n this.defaultFormats = config.formats ?? ['markdown', 'rawHtml'];\n this.timeout = config.timeout ?? 7500;\n\n this.logger = config.logger || createDefaultLogger();\n\n this.includeTags = config.includeTags;\n this.excludeTags = config.excludeTags;\n this.waitFor = config.waitFor;\n this.maxAge = config.maxAge;\n this.mobile = config.mobile;\n this.skipTlsVerification = config.skipTlsVerification;\n this.blockAds = config.blockAds;\n this.removeBase64Images = config.removeBase64Images;\n this.parsePDF = config.parsePDF;\n this.storeInCache = config.storeInCache;\n this.zeroDataRetention = config.zeroDataRetention;\n this.headers = config.headers;\n this.location = config.location;\n this.onlyMainContent = config.onlyMainContent;\n this.changeTrackingOptions = config.changeTrackingOptions;\n\n if (!this.apiKey) {\n this.logger.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');\n }\n\n this.logger.debug(\n `Firecrawl scraper initialized with API URL: ${this.apiUrl}`\n );\n }\n\n /**\n * Scrape a single URL\n * @param url URL to scrape\n * @param options Scrape options\n * @returns Scrape response\n */\n async scrapeUrl(\n url: string,\n options: t.FirecrawlScrapeOptions = {}\n ): Promise<[string, t.FirecrawlScrapeResponse]> {\n if (!this.apiKey) {\n return [\n url,\n {\n success: false,\n error: 'FIRECRAWL_API_KEY is not set',\n },\n ];\n }\n\n try {\n const payload = omitUndefined({\n url,\n formats: options.formats ?? this.defaultFormats,\n includeTags: options.includeTags ?? this.includeTags,\n excludeTags: options.excludeTags ?? this.excludeTags,\n headers: options.headers ?? this.headers,\n waitFor: options.waitFor ?? this.waitFor,\n timeout: options.timeout ?? this.timeout,\n onlyMainContent: options.onlyMainContent ?? this.onlyMainContent,\n maxAge: options.maxAge ?? this.maxAge,\n mobile: options.mobile ?? this.mobile,\n skipTlsVerification:\n options.skipTlsVerification ?? this.skipTlsVerification,\n parsePDF: options.parsePDF ?? this.parsePDF,\n location: options.location ?? this.location,\n removeBase64Images:\n options.removeBase64Images ?? this.removeBase64Images,\n blockAds: options.blockAds ?? this.blockAds,\n storeInCache: options.storeInCache ?? this.storeInCache,\n zeroDataRetention: options.zeroDataRetention ?? this.zeroDataRetention,\n changeTrackingOptions:\n options.changeTrackingOptions ?? this.changeTrackingOptions,\n });\n const response = await axios.post(this.apiUrl, payload, {\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`,\n },\n timeout: this.timeout,\n });\n\n return [url, response.data];\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return [\n url,\n {\n success: false,\n error: `Firecrawl API request failed: ${errorMessage}`,\n },\n ];\n }\n }\n\n /**\n * Extract content from scrape response\n * @param response Scrape response\n * @returns Extracted content or empty string if not available\n */\n extractContent(\n response: t.FirecrawlScrapeResponse\n ): [string, undefined | t.References] {\n if (!response.success || !response.data) {\n return ['', undefined];\n }\n\n if (response.data.markdown != null && response.data.html != null) {\n try {\n const { markdown, ...rest } = processContent(\n response.data.html,\n response.data.markdown\n );\n return [markdown, rest];\n } catch (error) {\n this.logger.error('Error processing content:', error);\n return [response.data.markdown, undefined];\n }\n } else if (response.data.markdown != null) {\n return [response.data.markdown, undefined];\n }\n\n // Fall back to HTML content\n if (response.data.html != null) {\n return [response.data.html, undefined];\n }\n\n // Fall back to raw HTML content\n if (response.data.rawHtml != null) {\n return [response.data.rawHtml, undefined];\n }\n\n return ['', undefined];\n }\n\n /**\n * Extract metadata from scrape response\n * @param response Scrape response\n * @returns Metadata object\n */\n extractMetadata(response: t.FirecrawlScrapeResponse): t.ScrapeMetadata {\n if (!response.success || !response.data || !response.data.metadata) {\n return {};\n }\n\n return response.data.metadata;\n }\n}\n\n/**\n * Create a Firecrawl scraper instance\n * @param config Scraper configuration\n * @returns Firecrawl scraper instance\n */\nexport const createFirecrawlScraper = (\n config: t.FirecrawlScraperConfig = {}\n): FirecrawlScraper => {\n return new FirecrawlScraper(config);\n};\n\n// Helper function to clean up payload for firecrawl\nfunction omitUndefined<T extends object>(obj: T): Partial<T> {\n return Object.fromEntries(\n Object.entries(obj).filter(([, v]) => v !== undefined)\n ) as Partial<T>;\n}\n"],"names":[],"mappings":";;;;AAKA;;;AAGG;MACU,gBAAgB,CAAA;AACnB,IAAA,MAAM;AACN,IAAA,MAAM;AACN,IAAA,cAAc;AACd,IAAA,OAAO;AACP,IAAA,MAAM;AACN,IAAA,WAAW;AACX,IAAA,WAAW;AACX,IAAA,OAAO;AACP,IAAA,MAAM;AACN,IAAA,MAAM;AACN,IAAA,mBAAmB;AACnB,IAAA,QAAQ;AACR,IAAA,kBAAkB;AAClB,IAAA,QAAQ;AACR,IAAA,YAAY;AACZ,IAAA,iBAAiB;AACjB,IAAA,OAAO;AACP,IAAA,QAAQ;AACR,IAAA,eAAe;AACf,IAAA,qBAAqB;AAE7B,IAAA,WAAA,CAAY,SAAmC,EAAE,EAAA;AAC/C,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,EAAE;AAElE,QAAA,MAAM,OAAO,GACX,MAAM,CAAC,MAAM;YACb,OAAO,CAAC,GAAG,CAAC,kBAAkB;AAC9B,YAAA,2BAA2B;AAC7B,QAAA,IAAI,CAAC,MAAM,GAAG,CAAA,EAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,YAAY;AAExD,QAAA,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC,OAAO,IAAI,CAAC,UAAU,EAAE,SAAS,CAAC;QAC/D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,IAAI;QAErC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,mBAAmB,EAAE;AAEpD,QAAA,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,WAAW;AACrC,QAAA,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,WAAW;AACrC,QAAA,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO;AAC7B,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM;AAC3B,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM;AAC3B,QAAA,IAAI,CAAC,mBAAmB,GAAG,MAAM,CAAC,mBAAmB;AACrD,QAAA,IAAI,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ;AAC/B,QAAA,IAAI,CAAC,kBAAkB,GAAG,MAAM,CAAC,kBAAkB;AACnD,QAAA,IAAI,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ;AAC/B,QAAA,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC,YAAY;AACvC,QAAA,IAAI,CAAC,iBAAiB,GAAG,MAAM,CAAC,iBAAiB;AACjD,QAAA,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO;AAC7B,QAAA,IAAI,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ;AAC/B,QAAA,IAAI,CAAC,eAAe,GAAG,MAAM,CAAC,eAAe;AAC7C,QAAA,IAAI,CAAC,qBAAqB,GAAG,MAAM,CAAC,qBAAqB;AAEzD,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;AAChB,YAAA,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,uDAAuD,CAAC;;QAG3E,IAAI,CAAC,MAAM,CAAC,KAAK,CACf,CAA+C,4CAAA,EAAA,IAAI,CAAC,MAAM,CAAE,CAAA,CAC7D;;AAGH;;;;;AAKG;AACH,IAAA,MAAM,SAAS,CACb,GAAW,EACX,UAAoC,EAAE,EAAA;AAEtC,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;YAChB,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;AACd,oBAAA,KAAK,EAAE,8BAA8B;AACtC,iBAAA;aACF;;AAGH,QAAA,IAAI;YACF,MAAM,OAAO,GAAG,aAAa,CAAC;gBAC5B,GAAG;AACH,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc;AAC/C,gBAAA,WAAW,EAAE,OAAO,CAAC,WAAW,IAAI,IAAI,CAAC,WAAW;AACpD,gBAAA,WAAW,EAAE,OAAO,CAAC,WAAW,IAAI,IAAI,CAAC,WAAW;AACpD,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;AACxC,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;AACxC,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;AACxC,gBAAA,eAAe,EAAE,OAAO,CAAC,eAAe,IAAI,IAAI,CAAC,eAAe;AAChE,gBAAA,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM;AACrC,gBAAA,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM;AACrC,gBAAA,mBAAmB,EACjB,OAAO,CAAC,mBAAmB,IAAI,IAAI,CAAC,mBAAmB;AACzD,gBAAA,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ;AAC3C,gBAAA,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ;AAC3C,gBAAA,kBAAkB,EAChB,OAAO,CAAC,kBAAkB,IAAI,IAAI,CAAC,kBAAkB;AACvD,gBAAA,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ;AAC3C,gBAAA,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,IAAI,CAAC,YAAY;AACvD,gBAAA,iBAAiB,EAAE,OAAO,CAAC,iBAAiB,IAAI,IAAI,CAAC,iBAAiB;AACtE,gBAAA,qBAAqB,EACnB,OAAO,CAAC,qBAAqB,IAAI,IAAI,CAAC,qBAAqB;AAC9D,aAAA,CAAC;AACF,YAAA,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,OAAO,EAAE;AACtD,gBAAA,OAAO,EAAE;AACP,oBAAA,cAAc,EAAE,kBAAkB;AAClC,oBAAA,aAAa,EAAE,CAAA,OAAA,EAAU,IAAI,CAAC,MAAM,CAAE,CAAA;AACvC,iBAAA;gBACD,OAAO,EAAE,IAAI,CAAC,OAAO;AACtB,aAAA,CAAC;AAEF,YAAA,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,CAAC;;QAC3B,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACxD,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,CAAiC,8BAAA,EAAA,YAAY,CAAE,CAAA;AACvD,iBAAA;aACF;;;AAIL;;;;AAIG;AACH,IAAA,cAAc,CACZ,QAAmC,EAAA;QAEnC,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;AACvC,YAAA,OAAO,CAAC,EAAE,EAAE,SAAS,CAAC;;AAGxB,QAAA,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;AAChE,YAAA,IAAI;gBACF,MAAM,EAAE,QAAQ,EAAE,GAAG,IAAI,EAAE,GAAG,cAAc,CAC1C,QAAQ,CAAC,IAAI,CAAC,IAAI,EAClB,QAAQ,CAAC,IAAI,CAAC,QAAQ,CACvB;AACD,gBAAA,OAAO,CAAC,QAAQ,EAAE,IAAI,CAAC;;YACvB,OAAO,KAAK,EAAE;gBACd,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,2BAA2B,EAAE,KAAK,CAAC;gBACrD,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC;;;aAEvC,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,EAAE;YACzC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC;;;QAI5C,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;YAC9B,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,SAAS,CAAC;;;QAIxC,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,IAAI,EAAE;YACjC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,CAAC;;AAG3C,QAAA,OAAO,CAAC,EAAE,EAAE,SAAS,CAAC;;AAGxB;;;;AAIG;AACH,IAAA,eAAe,CAAC,QAAmC,EAAA;AACjD,QAAA,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE;AAClE,YAAA,OAAO,EAAE;;AAGX,QAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,QAAQ;;AAEhC;AAED;;;;AAIG;MACU,sBAAsB,GAAG,CACpC,MAAmC,GAAA,EAAE,KACjB;AACpB,IAAA,OAAO,IAAI,gBAAgB,CAAC,MAAM,CAAC;AACrC;AAEA;AACA,SAAS,aAAa,CAAmB,GAAM,EAAA;IAC7C,OAAO,MAAM,CAAC,WAAW,CACvB,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,KAAK,SAAS,CAAC,CACzC;AACjB;;;;"}
|
|
@@ -234,7 +234,7 @@ Use anchor marker(s) immediately after the statement:
|
|
|
234
234
|
* @returns A DynamicStructuredTool with a schema that depends on the searchProvider
|
|
235
235
|
*/
|
|
236
236
|
const createSearchTool = (config = {}) => {
|
|
237
|
-
const { searchProvider = 'serper', serperApiKey, searxngInstanceUrl, searxngApiKey, rerankerType = 'cohere', topResults = 5, strategies = ['no_extraction'], filterContent = true, safeSearch = 1, firecrawlApiKey, firecrawlApiUrl,
|
|
237
|
+
const { searchProvider = 'serper', serperApiKey, searxngInstanceUrl, searxngApiKey, rerankerType = 'cohere', topResults = 5, strategies = ['no_extraction'], filterContent = true, safeSearch = 1, firecrawlApiKey, firecrawlApiUrl, firecrawlOptions, scraperTimeout, jinaApiKey, cohereApiKey, onSearchResults: _onSearchResults, onGetHighlights, } = config;
|
|
238
238
|
const logger = config.logger || createDefaultLogger();
|
|
239
239
|
const schemaObject = {
|
|
240
240
|
query: querySchema,
|
|
@@ -254,10 +254,11 @@ const createSearchTool = (config = {}) => {
|
|
|
254
254
|
searxngApiKey,
|
|
255
255
|
});
|
|
256
256
|
const firecrawlScraper = createFirecrawlScraper({
|
|
257
|
+
...firecrawlOptions,
|
|
257
258
|
apiKey: firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY,
|
|
258
259
|
apiUrl: firecrawlApiUrl,
|
|
259
|
-
timeout: scraperTimeout,
|
|
260
|
-
formats:
|
|
260
|
+
timeout: scraperTimeout ?? firecrawlOptions?.timeout,
|
|
261
|
+
formats: firecrawlOptions?.formats ?? ['markdown', 'rawHtml'],
|
|
261
262
|
});
|
|
262
263
|
const selectedReranker = createReranker({
|
|
263
264
|
rerankerType,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tool.mjs","sources":["../../../../src/tools/search/tool.ts"],"sourcesContent":["import { z } from 'zod';\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\nimport type { RunnableConfig } from '@langchain/core/runnables';\nimport type * as t from './types';\nimport {\n DATE_RANGE,\n querySchema,\n dateSchema,\n countrySchema,\n imagesSchema,\n videosSchema,\n newsSchema,\n} from './schema';\nimport { createSearchAPI, createSourceProcessor } from './search';\nimport { createFirecrawlScraper } from './firecrawl';\nimport { expandHighlights } from './highlights';\nimport { formatResultsForLLM } from './format';\nimport { createDefaultLogger } from './utils';\nimport { createReranker } from './rerankers';\nimport { Constants } from '@/common';\n\n/**\n * Executes parallel searches and merges the results\n */\nasync function executeParallelSearches({\n searchAPI,\n query,\n date,\n country,\n safeSearch,\n images,\n videos,\n news,\n logger,\n}: {\n searchAPI: ReturnType<typeof createSearchAPI>;\n query: string;\n date?: DATE_RANGE;\n country?: string;\n safeSearch: t.SearchToolConfig['safeSearch'];\n images: boolean;\n videos: boolean;\n news: boolean;\n logger: t.Logger;\n}): Promise<t.SearchResult> {\n // Prepare all search tasks to run in parallel\n const searchTasks: Promise<t.SearchResult>[] = [\n // Main search\n searchAPI.getSources({\n query,\n date,\n country,\n safeSearch,\n }),\n ];\n\n if (images) {\n searchTasks.push(\n searchAPI\n .getSources({\n query,\n date,\n country,\n safeSearch,\n type: 'images',\n })\n .catch((error) => {\n logger.error('Error fetching images:', error);\n return {\n success: false,\n error: `Images search failed: ${error instanceof Error ? error.message : String(error)}`,\n };\n })\n );\n }\n if (videos) {\n searchTasks.push(\n searchAPI\n .getSources({\n query,\n date,\n country,\n safeSearch,\n type: 'videos',\n })\n .catch((error) => {\n logger.error('Error fetching videos:', error);\n return {\n success: false,\n error: `Videos search failed: ${error instanceof Error ? error.message : String(error)}`,\n };\n })\n );\n }\n if (news) {\n searchTasks.push(\n searchAPI\n .getSources({\n query,\n date,\n country,\n safeSearch,\n type: 'news',\n })\n .catch((error) => {\n logger.error('Error fetching news:', error);\n return {\n success: false,\n error: `News search failed: ${error instanceof Error ? error.message : String(error)}`,\n };\n })\n );\n }\n\n // Run all searches in parallel\n const results = await Promise.all(searchTasks);\n\n // Get the main search result (first result)\n const mainResult = results[0];\n if (!mainResult.success) {\n throw new Error(mainResult.error ?? 'Search failed');\n }\n\n // Merge additional results with the main results\n const mergedResults = { ...mainResult.data };\n\n // Convert existing news to topStories if present\n if (mergedResults.news !== undefined && mergedResults.news.length > 0) {\n const existingNewsAsTopStories = mergedResults.news\n .filter((newsItem) => newsItem.link !== undefined && newsItem.link !== '')\n .map((newsItem) => ({\n title: newsItem.title ?? '',\n link: newsItem.link ?? '',\n source: newsItem.source ?? '',\n date: newsItem.date ?? '',\n imageUrl: newsItem.imageUrl ?? '',\n processed: false,\n }));\n mergedResults.topStories = [\n ...(mergedResults.topStories ?? []),\n ...existingNewsAsTopStories,\n ];\n delete mergedResults.news;\n }\n\n results.slice(1).forEach((result) => {\n if (result.success && result.data !== undefined) {\n if (result.data.images !== undefined && result.data.images.length > 0) {\n mergedResults.images = [\n ...(mergedResults.images ?? []),\n ...result.data.images,\n ];\n }\n if (result.data.videos !== undefined && result.data.videos.length > 0) {\n mergedResults.videos = [\n ...(mergedResults.videos ?? []),\n ...result.data.videos,\n ];\n }\n if (result.data.news !== undefined && result.data.news.length > 0) {\n const newsAsTopStories = result.data.news.map((newsItem) => ({\n ...newsItem,\n link: newsItem.link ?? '',\n }));\n mergedResults.topStories = [\n ...(mergedResults.topStories ?? []),\n ...newsAsTopStories,\n ];\n }\n }\n });\n\n return { success: true, data: mergedResults };\n}\n\nfunction createSearchProcessor({\n searchAPI,\n safeSearch,\n sourceProcessor,\n onGetHighlights,\n logger,\n}: {\n safeSearch: t.SearchToolConfig['safeSearch'];\n searchAPI: ReturnType<typeof createSearchAPI>;\n sourceProcessor: ReturnType<typeof createSourceProcessor>;\n onGetHighlights: t.SearchToolConfig['onGetHighlights'];\n logger: t.Logger;\n}) {\n return async function ({\n query,\n date,\n country,\n proMode = true,\n maxSources = 5,\n onSearchResults,\n images = false,\n videos = false,\n news = false,\n }: {\n query: string;\n country?: string;\n date?: DATE_RANGE;\n proMode?: boolean;\n maxSources?: number;\n onSearchResults: t.SearchToolConfig['onSearchResults'];\n images?: boolean;\n videos?: boolean;\n news?: boolean;\n }): Promise<t.SearchResultData> {\n try {\n // Execute parallel searches and merge results\n const searchResult = await executeParallelSearches({\n searchAPI,\n query,\n date,\n country,\n safeSearch,\n images,\n videos,\n news,\n logger,\n });\n\n onSearchResults?.(searchResult);\n\n const processedSources = await sourceProcessor.processSources({\n query,\n news,\n result: searchResult,\n proMode,\n onGetHighlights,\n numElements: maxSources,\n });\n\n return expandHighlights(processedSources);\n } catch (error) {\n logger.error('Error in search:', error);\n return {\n organic: [],\n topStories: [],\n images: [],\n videos: [],\n news: [],\n relatedSearches: [],\n error: error instanceof Error ? error.message : String(error),\n };\n }\n };\n}\n\nfunction createOnSearchResults({\n runnableConfig,\n onSearchResults,\n}: {\n runnableConfig: RunnableConfig;\n onSearchResults: t.SearchToolConfig['onSearchResults'];\n}) {\n return function (results: t.SearchResult): void {\n if (!onSearchResults) {\n return;\n }\n onSearchResults(results, runnableConfig);\n };\n}\n\nfunction createTool({\n schema,\n search,\n onSearchResults: _onSearchResults,\n}: {\n schema: t.SearchToolSchema;\n search: ReturnType<typeof createSearchProcessor>;\n onSearchResults: t.SearchToolConfig['onSearchResults'];\n}): DynamicStructuredTool<typeof schema> {\n return tool<typeof schema>(\n async (params, runnableConfig) => {\n const { query, date, country: _c, images, videos, news } = params;\n const country = typeof _c === 'string' && _c ? _c : undefined;\n const searchResult = await search({\n query,\n date,\n country,\n images,\n videos,\n news,\n onSearchResults: createOnSearchResults({\n runnableConfig,\n onSearchResults: _onSearchResults,\n }),\n });\n const turn = runnableConfig.toolCall?.turn ?? 0;\n const { output, references } = formatResultsForLLM(turn, searchResult);\n const data: t.SearchResultData = { turn, ...searchResult, references };\n return [output, { [Constants.WEB_SEARCH]: data }];\n },\n {\n name: Constants.WEB_SEARCH,\n description: `Real-time search. Results have required citation anchors.\n\nNote: Use ONCE per reply unless instructed otherwise.\n\nAnchors:\n- \\\\ue202turnXtypeY\n- X = turn idx, type = 'search' | 'news' | 'image' | 'ref', Y = item idx\n\nSpecial Markers:\n- \\\\ue203...\\\\ue204 — highlight start/end of cited text (for Standalone or Group citations)\n- \\\\ue200...\\\\ue201 — group block (e.g. \\\\ue200\\\\ue202turn0search1\\\\ue202turn0news2\\\\ue201)\n\n**CITE EVERY NON-OBVIOUS FACT/QUOTE:**\nUse anchor marker(s) immediately after the statement:\n- Standalone: \"Pure functions produce same output. \\\\ue202turn0search0\"\n- Standalone (multiple): \"Today's News \\\\ue202turn0search0\\\\ue202turn0news0\"\n- Highlight: \"\\\\ue203Highlight text.\\\\ue204\\\\ue202turn0news1\"\n- Group: \"Sources. \\\\ue200\\\\ue202turn0search0\\\\ue202turn0news1\\\\ue201\"\n- Group Highlight: \"\\\\ue203Highlight for group.\\\\ue204 \\\\ue200\\\\ue202turn0search0\\\\ue202turn0news1\\\\ue201\"\n- Image: \"See photo \\\\ue202turn0image0.\"\n\n**NEVER use markdown links, [1], or footnotes. CITE ONLY with anchors provided.**\n`.trim(),\n schema: schema,\n responseFormat: Constants.CONTENT_AND_ARTIFACT,\n }\n );\n}\n\n/**\n * Creates a search tool with a schema that dynamically includes the country field\n * only when the searchProvider is 'serper'.\n *\n * @param config - The search tool configuration\n * @returns A DynamicStructuredTool with a schema that depends on the searchProvider\n */\nexport const createSearchTool = (\n config: t.SearchToolConfig = {}\n): DynamicStructuredTool<typeof toolSchema> => {\n const {\n searchProvider = 'serper',\n serperApiKey,\n searxngInstanceUrl,\n searxngApiKey,\n rerankerType = 'cohere',\n topResults = 5,\n strategies = ['no_extraction'],\n filterContent = true,\n safeSearch = 1,\n firecrawlApiKey,\n firecrawlApiUrl,\n firecrawlFormats = ['markdown', 'html'],\n scraperTimeout,\n jinaApiKey,\n cohereApiKey,\n onSearchResults: _onSearchResults,\n onGetHighlights,\n } = config;\n\n const logger = config.logger || createDefaultLogger();\n\n const schemaObject: {\n query: z.ZodString;\n date: z.ZodOptional<z.ZodNativeEnum<typeof DATE_RANGE>>;\n country?: z.ZodOptional<z.ZodString>;\n images: z.ZodOptional<z.ZodBoolean>;\n videos: z.ZodOptional<z.ZodBoolean>;\n news: z.ZodOptional<z.ZodBoolean>;\n } = {\n query: querySchema,\n date: dateSchema,\n images: imagesSchema,\n videos: videosSchema,\n news: newsSchema,\n };\n\n if (searchProvider === 'serper') {\n schemaObject.country = countrySchema;\n }\n\n const toolSchema = z.object(schemaObject);\n\n const searchAPI = createSearchAPI({\n searchProvider,\n serperApiKey,\n searxngInstanceUrl,\n searxngApiKey,\n });\n\n const firecrawlScraper = createFirecrawlScraper({\n apiKey: firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY,\n apiUrl: firecrawlApiUrl,\n timeout: scraperTimeout,\n formats: firecrawlFormats,\n });\n\n const selectedReranker = createReranker({\n rerankerType,\n jinaApiKey,\n cohereApiKey,\n logger,\n });\n\n if (!selectedReranker) {\n logger.warn('No reranker selected. Using default ranking.');\n }\n\n const sourceProcessor = createSourceProcessor(\n {\n reranker: selectedReranker,\n topResults,\n strategies,\n filterContent,\n logger,\n },\n firecrawlScraper\n );\n\n const search = createSearchProcessor({\n searchAPI,\n safeSearch,\n sourceProcessor,\n onGetHighlights,\n logger,\n });\n\n return createTool({\n search,\n schema: toolSchema,\n onSearchResults: _onSearchResults,\n });\n};\n"],"names":[],"mappings":";;;;;;;;;;;AAqBA;;AAEG;AACH,eAAe,uBAAuB,CAAC,EACrC,SAAS,EACT,KAAK,EACL,IAAI,EACJ,OAAO,EACP,UAAU,EACV,MAAM,EACN,MAAM,EACN,IAAI,EACJ,MAAM,GAWP,EAAA;;AAEC,IAAA,MAAM,WAAW,GAA8B;;QAE7C,SAAS,CAAC,UAAU,CAAC;YACnB,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;SACX,CAAC;KACH;IAED,IAAI,MAAM,EAAE;QACV,WAAW,CAAC,IAAI,CACd;AACG,aAAA,UAAU,CAAC;YACV,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;AACV,YAAA,IAAI,EAAE,QAAQ;SACf;AACA,aAAA,KAAK,CAAC,CAAC,KAAK,KAAI;AACf,YAAA,MAAM,CAAC,KAAK,CAAC,wBAAwB,EAAE,KAAK,CAAC;YAC7C,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;AACd,gBAAA,KAAK,EAAE,CAAyB,sBAAA,EAAA,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAE,CAAA;aACzF;SACF,CAAC,CACL;;IAEH,IAAI,MAAM,EAAE;QACV,WAAW,CAAC,IAAI,CACd;AACG,aAAA,UAAU,CAAC;YACV,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;AACV,YAAA,IAAI,EAAE,QAAQ;SACf;AACA,aAAA,KAAK,CAAC,CAAC,KAAK,KAAI;AACf,YAAA,MAAM,CAAC,KAAK,CAAC,wBAAwB,EAAE,KAAK,CAAC;YAC7C,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;AACd,gBAAA,KAAK,EAAE,CAAyB,sBAAA,EAAA,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAE,CAAA;aACzF;SACF,CAAC,CACL;;IAEH,IAAI,IAAI,EAAE;QACR,WAAW,CAAC,IAAI,CACd;AACG,aAAA,UAAU,CAAC;YACV,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;AACV,YAAA,IAAI,EAAE,MAAM;SACb;AACA,aAAA,KAAK,CAAC,CAAC,KAAK,KAAI;AACf,YAAA,MAAM,CAAC,KAAK,CAAC,sBAAsB,EAAE,KAAK,CAAC;YAC3C,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;AACd,gBAAA,KAAK,EAAE,CAAuB,oBAAA,EAAA,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAE,CAAA;aACvF;SACF,CAAC,CACL;;;IAIH,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;;AAG9C,IAAA,MAAM,UAAU,GAAG,OAAO,CAAC,CAAC,CAAC;AAC7B,IAAA,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE;QACvB,MAAM,IAAI,KAAK,CAAC,UAAU,CAAC,KAAK,IAAI,eAAe,CAAC;;;IAItD,MAAM,aAAa,GAAG,EAAE,GAAG,UAAU,CAAC,IAAI,EAAE;;AAG5C,IAAA,IAAI,aAAa,CAAC,IAAI,KAAK,SAAS,IAAI,aAAa,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;AACrE,QAAA,MAAM,wBAAwB,GAAG,aAAa,CAAC;AAC5C,aAAA,MAAM,CAAC,CAAC,QAAQ,KAAK,QAAQ,CAAC,IAAI,KAAK,SAAS,IAAI,QAAQ,CAAC,IAAI,KAAK,EAAE;AACxE,aAAA,GAAG,CAAC,CAAC,QAAQ,MAAM;AAClB,YAAA,KAAK,EAAE,QAAQ,CAAC,KAAK,IAAI,EAAE;AAC3B,YAAA,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;AACzB,YAAA,MAAM,EAAE,QAAQ,CAAC,MAAM,IAAI,EAAE;AAC7B,YAAA,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;AACzB,YAAA,QAAQ,EAAE,QAAQ,CAAC,QAAQ,IAAI,EAAE;AACjC,YAAA,SAAS,EAAE,KAAK;AACjB,SAAA,CAAC,CAAC;QACL,aAAa,CAAC,UAAU,GAAG;AACzB,YAAA,IAAI,aAAa,CAAC,UAAU,IAAI,EAAE,CAAC;AACnC,YAAA,GAAG,wBAAwB;SAC5B;QACD,OAAO,aAAa,CAAC,IAAI;;IAG3B,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,KAAI;QAClC,IAAI,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,IAAI,KAAK,SAAS,EAAE;AAC/C,YAAA,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE;gBACrE,aAAa,CAAC,MAAM,GAAG;AACrB,oBAAA,IAAI,aAAa,CAAC,MAAM,IAAI,EAAE,CAAC;AAC/B,oBAAA,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM;iBACtB;;AAEH,YAAA,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE;gBACrE,aAAa,CAAC,MAAM,GAAG;AACrB,oBAAA,IAAI,aAAa,CAAC,MAAM,IAAI,EAAE,CAAC;AAC/B,oBAAA,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM;iBACtB;;AAEH,YAAA,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;AACjE,gBAAA,MAAM,gBAAgB,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,QAAQ,MAAM;AAC3D,oBAAA,GAAG,QAAQ;AACX,oBAAA,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;AAC1B,iBAAA,CAAC,CAAC;gBACH,aAAa,CAAC,UAAU,GAAG;AACzB,oBAAA,IAAI,aAAa,CAAC,UAAU,IAAI,EAAE,CAAC;AACnC,oBAAA,GAAG,gBAAgB;iBACpB;;;AAGP,KAAC,CAAC;IAEF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,aAAa,EAAE;AAC/C;AAEA,SAAS,qBAAqB,CAAC,EAC7B,SAAS,EACT,UAAU,EACV,eAAe,EACf,eAAe,EACf,MAAM,GAOP,EAAA;AACC,IAAA,OAAO,gBAAgB,EACrB,KAAK,EACL,IAAI,EACJ,OAAO,EACP,OAAO,GAAG,IAAI,EACd,UAAU,GAAG,CAAC,EACd,eAAe,EACf,MAAM,GAAG,KAAK,EACd,MAAM,GAAG,KAAK,EACd,IAAI,GAAG,KAAK,GAWb,EAAA;AACC,QAAA,IAAI;;AAEF,YAAA,MAAM,YAAY,GAAG,MAAM,uBAAuB,CAAC;gBACjD,SAAS;gBACT,KAAK;gBACL,IAAI;gBACJ,OAAO;gBACP,UAAU;gBACV,MAAM;gBACN,MAAM;gBACN,IAAI;gBACJ,MAAM;AACP,aAAA,CAAC;AAEF,YAAA,eAAe,GAAG,YAAY,CAAC;AAE/B,YAAA,MAAM,gBAAgB,GAAG,MAAM,eAAe,CAAC,cAAc,CAAC;gBAC5D,KAAK;gBACL,IAAI;AACJ,gBAAA,MAAM,EAAE,YAAY;gBACpB,OAAO;gBACP,eAAe;AACf,gBAAA,WAAW,EAAE,UAAU;AACxB,aAAA,CAAC;AAEF,YAAA,OAAO,gBAAgB,CAAC,gBAAgB,CAAC;;QACzC,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,CAAC,KAAK,CAAC,kBAAkB,EAAE,KAAK,CAAC;YACvC,OAAO;AACL,gBAAA,OAAO,EAAE,EAAE;AACX,gBAAA,UAAU,EAAE,EAAE;AACd,gBAAA,MAAM,EAAE,EAAE;AACV,gBAAA,MAAM,EAAE,EAAE;AACV,gBAAA,IAAI,EAAE,EAAE;AACR,gBAAA,eAAe,EAAE,EAAE;AACnB,gBAAA,KAAK,EAAE,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;aAC9D;;AAEL,KAAC;AACH;AAEA,SAAS,qBAAqB,CAAC,EAC7B,cAAc,EACd,eAAe,GAIhB,EAAA;AACC,IAAA,OAAO,UAAU,OAAuB,EAAA;QACtC,IAAI,CAAC,eAAe,EAAE;YACpB;;AAEF,QAAA,eAAe,CAAC,OAAO,EAAE,cAAc,CAAC;AAC1C,KAAC;AACH;AAEA,SAAS,UAAU,CAAC,EAClB,MAAM,EACN,MAAM,EACN,eAAe,EAAE,gBAAgB,GAKlC,EAAA;IACC,OAAO,IAAI,CACT,OAAO,MAAM,EAAE,cAAc,KAAI;AAC/B,QAAA,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM;AACjE,QAAA,MAAM,OAAO,GAAG,OAAO,EAAE,KAAK,QAAQ,IAAI,EAAE,GAAG,EAAE,GAAG,SAAS;AAC7D,QAAA,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC;YAChC,KAAK;YACL,IAAI;YACJ,OAAO;YACP,MAAM;YACN,MAAM;YACN,IAAI;YACJ,eAAe,EAAE,qBAAqB,CAAC;gBACrC,cAAc;AACd,gBAAA,eAAe,EAAE,gBAAgB;aAClC,CAAC;AACH,SAAA,CAAC;QACF,MAAM,IAAI,GAAG,cAAc,CAAC,QAAQ,EAAE,IAAI,IAAI,CAAC;AAC/C,QAAA,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,mBAAmB,CAAC,IAAI,EAAE,YAAY,CAAC;QACtE,MAAM,IAAI,GAAuB,EAAE,IAAI,EAAE,GAAG,YAAY,EAAE,UAAU,EAAE;AACtE,QAAA,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,SAAS,CAAC,UAAU,GAAG,IAAI,EAAE,CAAC;AACnD,KAAC,EACD;QACE,IAAI,EAAE,SAAS,CAAC,UAAU;AAC1B,QAAA,WAAW,EAAE,CAAA;;;;;;;;;;;;;;;;;;;;;;AAsBlB,CAAA,CAAC,IAAI,EAAE;AACF,QAAA,MAAM,EAAE,MAAM;QACd,cAAc,EAAE,SAAS,CAAC,oBAAoB;AAC/C,KAAA,CACF;AACH;AAEA;;;;;;AAMG;MACU,gBAAgB,GAAG,CAC9B,MAA6B,GAAA,EAAE,KACa;IAC5C,MAAM,EACJ,cAAc,GAAG,QAAQ,EACzB,YAAY,EACZ,kBAAkB,EAClB,aAAa,EACb,YAAY,GAAG,QAAQ,EACvB,UAAU,GAAG,CAAC,EACd,UAAU,GAAG,CAAC,eAAe,CAAC,EAC9B,aAAa,GAAG,IAAI,EACpB,UAAU,GAAG,CAAC,EACd,eAAe,EACf,eAAe,EACf,gBAAgB,GAAG,CAAC,UAAU,EAAE,MAAM,CAAC,EACvC,cAAc,EACd,UAAU,EACV,YAAY,EACZ,eAAe,EAAE,gBAAgB,EACjC,eAAe,GAChB,GAAG,MAAM;IAEV,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,mBAAmB,EAAE;AAErD,IAAA,MAAM,YAAY,GAOd;AACF,QAAA,KAAK,EAAE,WAAW;AAClB,QAAA,IAAI,EAAE,UAAU;AAChB,QAAA,MAAM,EAAE,YAAY;AACpB,QAAA,MAAM,EAAE,YAAY;AACpB,QAAA,IAAI,EAAE,UAAU;KACjB;AAED,IAAA,IAAI,cAAc,KAAK,QAAQ,EAAE;AAC/B,QAAA,YAAY,CAAC,OAAO,GAAG,aAAa;;IAGtC,MAAM,UAAU,GAAG,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC;IAEzC,MAAM,SAAS,GAAG,eAAe,CAAC;QAChC,cAAc;QACd,YAAY;QACZ,kBAAkB;QAClB,aAAa;AACd,KAAA,CAAC;IAEF,MAAM,gBAAgB,GAAG,sBAAsB,CAAC;AAC9C,QAAA,MAAM,EAAE,eAAe,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB;AACxD,QAAA,MAAM,EAAE,eAAe;AACvB,QAAA,OAAO,EAAE,cAAc;AACvB,QAAA,OAAO,EAAE,gBAAgB;AAC1B,KAAA,CAAC;IAEF,MAAM,gBAAgB,GAAG,cAAc,CAAC;QACtC,YAAY;QACZ,UAAU;QACV,YAAY;QACZ,MAAM;AACP,KAAA,CAAC;IAEF,IAAI,CAAC,gBAAgB,EAAE;AACrB,QAAA,MAAM,CAAC,IAAI,CAAC,8CAA8C,CAAC;;IAG7D,MAAM,eAAe,GAAG,qBAAqB,CAC3C;AACE,QAAA,QAAQ,EAAE,gBAAgB;QAC1B,UAAU;QAGV,MAAM;KACP,EACD,gBAAgB,CACjB;IAED,MAAM,MAAM,GAAG,qBAAqB,CAAC;QACnC,SAAS;QACT,UAAU;QACV,eAAe;QACf,eAAe;QACf,MAAM;AACP,KAAA,CAAC;AAEF,IAAA,OAAO,UAAU,CAAC;QAChB,MAAM;AACN,QAAA,MAAM,EAAE,UAAU;AAClB,QAAA,eAAe,EAAE,gBAAgB;AAClC,KAAA,CAAC;AACJ;;;;"}
|
|
1
|
+
{"version":3,"file":"tool.mjs","sources":["../../../../src/tools/search/tool.ts"],"sourcesContent":["import { z } from 'zod';\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\nimport type { RunnableConfig } from '@langchain/core/runnables';\nimport type * as t from './types';\nimport {\n DATE_RANGE,\n querySchema,\n dateSchema,\n countrySchema,\n imagesSchema,\n videosSchema,\n newsSchema,\n} from './schema';\nimport { createSearchAPI, createSourceProcessor } from './search';\nimport { createFirecrawlScraper } from './firecrawl';\nimport { expandHighlights } from './highlights';\nimport { formatResultsForLLM } from './format';\nimport { createDefaultLogger } from './utils';\nimport { createReranker } from './rerankers';\nimport { Constants } from '@/common';\n\n/**\n * Executes parallel searches and merges the results\n */\nasync function executeParallelSearches({\n searchAPI,\n query,\n date,\n country,\n safeSearch,\n images,\n videos,\n news,\n logger,\n}: {\n searchAPI: ReturnType<typeof createSearchAPI>;\n query: string;\n date?: DATE_RANGE;\n country?: string;\n safeSearch: t.SearchToolConfig['safeSearch'];\n images: boolean;\n videos: boolean;\n news: boolean;\n logger: t.Logger;\n}): Promise<t.SearchResult> {\n // Prepare all search tasks to run in parallel\n const searchTasks: Promise<t.SearchResult>[] = [\n // Main search\n searchAPI.getSources({\n query,\n date,\n country,\n safeSearch,\n }),\n ];\n\n if (images) {\n searchTasks.push(\n searchAPI\n .getSources({\n query,\n date,\n country,\n safeSearch,\n type: 'images',\n })\n .catch((error) => {\n logger.error('Error fetching images:', error);\n return {\n success: false,\n error: `Images search failed: ${error instanceof Error ? error.message : String(error)}`,\n };\n })\n );\n }\n if (videos) {\n searchTasks.push(\n searchAPI\n .getSources({\n query,\n date,\n country,\n safeSearch,\n type: 'videos',\n })\n .catch((error) => {\n logger.error('Error fetching videos:', error);\n return {\n success: false,\n error: `Videos search failed: ${error instanceof Error ? error.message : String(error)}`,\n };\n })\n );\n }\n if (news) {\n searchTasks.push(\n searchAPI\n .getSources({\n query,\n date,\n country,\n safeSearch,\n type: 'news',\n })\n .catch((error) => {\n logger.error('Error fetching news:', error);\n return {\n success: false,\n error: `News search failed: ${error instanceof Error ? error.message : String(error)}`,\n };\n })\n );\n }\n\n // Run all searches in parallel\n const results = await Promise.all(searchTasks);\n\n // Get the main search result (first result)\n const mainResult = results[0];\n if (!mainResult.success) {\n throw new Error(mainResult.error ?? 'Search failed');\n }\n\n // Merge additional results with the main results\n const mergedResults = { ...mainResult.data };\n\n // Convert existing news to topStories if present\n if (mergedResults.news !== undefined && mergedResults.news.length > 0) {\n const existingNewsAsTopStories = mergedResults.news\n .filter((newsItem) => newsItem.link !== undefined && newsItem.link !== '')\n .map((newsItem) => ({\n title: newsItem.title ?? '',\n link: newsItem.link ?? '',\n source: newsItem.source ?? '',\n date: newsItem.date ?? '',\n imageUrl: newsItem.imageUrl ?? '',\n processed: false,\n }));\n mergedResults.topStories = [\n ...(mergedResults.topStories ?? []),\n ...existingNewsAsTopStories,\n ];\n delete mergedResults.news;\n }\n\n results.slice(1).forEach((result) => {\n if (result.success && result.data !== undefined) {\n if (result.data.images !== undefined && result.data.images.length > 0) {\n mergedResults.images = [\n ...(mergedResults.images ?? []),\n ...result.data.images,\n ];\n }\n if (result.data.videos !== undefined && result.data.videos.length > 0) {\n mergedResults.videos = [\n ...(mergedResults.videos ?? []),\n ...result.data.videos,\n ];\n }\n if (result.data.news !== undefined && result.data.news.length > 0) {\n const newsAsTopStories = result.data.news.map((newsItem) => ({\n ...newsItem,\n link: newsItem.link ?? '',\n }));\n mergedResults.topStories = [\n ...(mergedResults.topStories ?? []),\n ...newsAsTopStories,\n ];\n }\n }\n });\n\n return { success: true, data: mergedResults };\n}\n\nfunction createSearchProcessor({\n searchAPI,\n safeSearch,\n sourceProcessor,\n onGetHighlights,\n logger,\n}: {\n safeSearch: t.SearchToolConfig['safeSearch'];\n searchAPI: ReturnType<typeof createSearchAPI>;\n sourceProcessor: ReturnType<typeof createSourceProcessor>;\n onGetHighlights: t.SearchToolConfig['onGetHighlights'];\n logger: t.Logger;\n}) {\n return async function ({\n query,\n date,\n country,\n proMode = true,\n maxSources = 5,\n onSearchResults,\n images = false,\n videos = false,\n news = false,\n }: {\n query: string;\n country?: string;\n date?: DATE_RANGE;\n proMode?: boolean;\n maxSources?: number;\n onSearchResults: t.SearchToolConfig['onSearchResults'];\n images?: boolean;\n videos?: boolean;\n news?: boolean;\n }): Promise<t.SearchResultData> {\n try {\n // Execute parallel searches and merge results\n const searchResult = await executeParallelSearches({\n searchAPI,\n query,\n date,\n country,\n safeSearch,\n images,\n videos,\n news,\n logger,\n });\n\n onSearchResults?.(searchResult);\n\n const processedSources = await sourceProcessor.processSources({\n query,\n news,\n result: searchResult,\n proMode,\n onGetHighlights,\n numElements: maxSources,\n });\n\n return expandHighlights(processedSources);\n } catch (error) {\n logger.error('Error in search:', error);\n return {\n organic: [],\n topStories: [],\n images: [],\n videos: [],\n news: [],\n relatedSearches: [],\n error: error instanceof Error ? error.message : String(error),\n };\n }\n };\n}\n\nfunction createOnSearchResults({\n runnableConfig,\n onSearchResults,\n}: {\n runnableConfig: RunnableConfig;\n onSearchResults: t.SearchToolConfig['onSearchResults'];\n}) {\n return function (results: t.SearchResult): void {\n if (!onSearchResults) {\n return;\n }\n onSearchResults(results, runnableConfig);\n };\n}\n\nfunction createTool({\n schema,\n search,\n onSearchResults: _onSearchResults,\n}: {\n schema: t.SearchToolSchema;\n search: ReturnType<typeof createSearchProcessor>;\n onSearchResults: t.SearchToolConfig['onSearchResults'];\n}): DynamicStructuredTool<typeof schema> {\n return tool<typeof schema>(\n async (params, runnableConfig) => {\n const { query, date, country: _c, images, videos, news } = params;\n const country = typeof _c === 'string' && _c ? _c : undefined;\n const searchResult = await search({\n query,\n date,\n country,\n images,\n videos,\n news,\n onSearchResults: createOnSearchResults({\n runnableConfig,\n onSearchResults: _onSearchResults,\n }),\n });\n const turn = runnableConfig.toolCall?.turn ?? 0;\n const { output, references } = formatResultsForLLM(turn, searchResult);\n const data: t.SearchResultData = { turn, ...searchResult, references };\n return [output, { [Constants.WEB_SEARCH]: data }];\n },\n {\n name: Constants.WEB_SEARCH,\n description: `Real-time search. Results have required citation anchors.\n\nNote: Use ONCE per reply unless instructed otherwise.\n\nAnchors:\n- \\\\ue202turnXtypeY\n- X = turn idx, type = 'search' | 'news' | 'image' | 'ref', Y = item idx\n\nSpecial Markers:\n- \\\\ue203...\\\\ue204 — highlight start/end of cited text (for Standalone or Group citations)\n- \\\\ue200...\\\\ue201 — group block (e.g. \\\\ue200\\\\ue202turn0search1\\\\ue202turn0news2\\\\ue201)\n\n**CITE EVERY NON-OBVIOUS FACT/QUOTE:**\nUse anchor marker(s) immediately after the statement:\n- Standalone: \"Pure functions produce same output. \\\\ue202turn0search0\"\n- Standalone (multiple): \"Today's News \\\\ue202turn0search0\\\\ue202turn0news0\"\n- Highlight: \"\\\\ue203Highlight text.\\\\ue204\\\\ue202turn0news1\"\n- Group: \"Sources. \\\\ue200\\\\ue202turn0search0\\\\ue202turn0news1\\\\ue201\"\n- Group Highlight: \"\\\\ue203Highlight for group.\\\\ue204 \\\\ue200\\\\ue202turn0search0\\\\ue202turn0news1\\\\ue201\"\n- Image: \"See photo \\\\ue202turn0image0.\"\n\n**NEVER use markdown links, [1], or footnotes. CITE ONLY with anchors provided.**\n`.trim(),\n schema: schema,\n responseFormat: Constants.CONTENT_AND_ARTIFACT,\n }\n );\n}\n\n/**\n * Creates a search tool with a schema that dynamically includes the country field\n * only when the searchProvider is 'serper'.\n *\n * @param config - The search tool configuration\n * @returns A DynamicStructuredTool with a schema that depends on the searchProvider\n */\nexport const createSearchTool = (\n config: t.SearchToolConfig = {}\n): DynamicStructuredTool<typeof toolSchema> => {\n const {\n searchProvider = 'serper',\n serperApiKey,\n searxngInstanceUrl,\n searxngApiKey,\n rerankerType = 'cohere',\n topResults = 5,\n strategies = ['no_extraction'],\n filterContent = true,\n safeSearch = 1,\n firecrawlApiKey,\n firecrawlApiUrl,\n firecrawlOptions,\n scraperTimeout,\n jinaApiKey,\n cohereApiKey,\n onSearchResults: _onSearchResults,\n onGetHighlights,\n } = config;\n\n const logger = config.logger || createDefaultLogger();\n\n const schemaObject: {\n query: z.ZodString;\n date: z.ZodOptional<z.ZodNativeEnum<typeof DATE_RANGE>>;\n country?: z.ZodOptional<z.ZodString>;\n images: z.ZodOptional<z.ZodBoolean>;\n videos: z.ZodOptional<z.ZodBoolean>;\n news: z.ZodOptional<z.ZodBoolean>;\n } = {\n query: querySchema,\n date: dateSchema,\n images: imagesSchema,\n videos: videosSchema,\n news: newsSchema,\n };\n\n if (searchProvider === 'serper') {\n schemaObject.country = countrySchema;\n }\n\n const toolSchema = z.object(schemaObject);\n\n const searchAPI = createSearchAPI({\n searchProvider,\n serperApiKey,\n searxngInstanceUrl,\n searxngApiKey,\n });\n\n const firecrawlScraper = createFirecrawlScraper({\n ...firecrawlOptions,\n apiKey: firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY,\n apiUrl: firecrawlApiUrl,\n timeout: scraperTimeout ?? firecrawlOptions?.timeout,\n formats: firecrawlOptions?.formats ?? ['markdown', 'rawHtml'],\n });\n\n const selectedReranker = createReranker({\n rerankerType,\n jinaApiKey,\n cohereApiKey,\n logger,\n });\n\n if (!selectedReranker) {\n logger.warn('No reranker selected. Using default ranking.');\n }\n\n const sourceProcessor = createSourceProcessor(\n {\n reranker: selectedReranker,\n topResults,\n strategies,\n filterContent,\n logger,\n },\n firecrawlScraper\n );\n\n const search = createSearchProcessor({\n searchAPI,\n safeSearch,\n sourceProcessor,\n onGetHighlights,\n logger,\n });\n\n return createTool({\n search,\n schema: toolSchema,\n onSearchResults: _onSearchResults,\n });\n};\n"],"names":[],"mappings":";;;;;;;;;;;AAqBA;;AAEG;AACH,eAAe,uBAAuB,CAAC,EACrC,SAAS,EACT,KAAK,EACL,IAAI,EACJ,OAAO,EACP,UAAU,EACV,MAAM,EACN,MAAM,EACN,IAAI,EACJ,MAAM,GAWP,EAAA;;AAEC,IAAA,MAAM,WAAW,GAA8B;;QAE7C,SAAS,CAAC,UAAU,CAAC;YACnB,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;SACX,CAAC;KACH;IAED,IAAI,MAAM,EAAE;QACV,WAAW,CAAC,IAAI,CACd;AACG,aAAA,UAAU,CAAC;YACV,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;AACV,YAAA,IAAI,EAAE,QAAQ;SACf;AACA,aAAA,KAAK,CAAC,CAAC,KAAK,KAAI;AACf,YAAA,MAAM,CAAC,KAAK,CAAC,wBAAwB,EAAE,KAAK,CAAC;YAC7C,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;AACd,gBAAA,KAAK,EAAE,CAAyB,sBAAA,EAAA,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAE,CAAA;aACzF;SACF,CAAC,CACL;;IAEH,IAAI,MAAM,EAAE;QACV,WAAW,CAAC,IAAI,CACd;AACG,aAAA,UAAU,CAAC;YACV,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;AACV,YAAA,IAAI,EAAE,QAAQ;SACf;AACA,aAAA,KAAK,CAAC,CAAC,KAAK,KAAI;AACf,YAAA,MAAM,CAAC,KAAK,CAAC,wBAAwB,EAAE,KAAK,CAAC;YAC7C,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;AACd,gBAAA,KAAK,EAAE,CAAyB,sBAAA,EAAA,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAE,CAAA;aACzF;SACF,CAAC,CACL;;IAEH,IAAI,IAAI,EAAE;QACR,WAAW,CAAC,IAAI,CACd;AACG,aAAA,UAAU,CAAC;YACV,KAAK;YACL,IAAI;YACJ,OAAO;YACP,UAAU;AACV,YAAA,IAAI,EAAE,MAAM;SACb;AACA,aAAA,KAAK,CAAC,CAAC,KAAK,KAAI;AACf,YAAA,MAAM,CAAC,KAAK,CAAC,sBAAsB,EAAE,KAAK,CAAC;YAC3C,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;AACd,gBAAA,KAAK,EAAE,CAAuB,oBAAA,EAAA,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAE,CAAA;aACvF;SACF,CAAC,CACL;;;IAIH,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;;AAG9C,IAAA,MAAM,UAAU,GAAG,OAAO,CAAC,CAAC,CAAC;AAC7B,IAAA,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE;QACvB,MAAM,IAAI,KAAK,CAAC,UAAU,CAAC,KAAK,IAAI,eAAe,CAAC;;;IAItD,MAAM,aAAa,GAAG,EAAE,GAAG,UAAU,CAAC,IAAI,EAAE;;AAG5C,IAAA,IAAI,aAAa,CAAC,IAAI,KAAK,SAAS,IAAI,aAAa,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;AACrE,QAAA,MAAM,wBAAwB,GAAG,aAAa,CAAC;AAC5C,aAAA,MAAM,CAAC,CAAC,QAAQ,KAAK,QAAQ,CAAC,IAAI,KAAK,SAAS,IAAI,QAAQ,CAAC,IAAI,KAAK,EAAE;AACxE,aAAA,GAAG,CAAC,CAAC,QAAQ,MAAM;AAClB,YAAA,KAAK,EAAE,QAAQ,CAAC,KAAK,IAAI,EAAE;AAC3B,YAAA,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;AACzB,YAAA,MAAM,EAAE,QAAQ,CAAC,MAAM,IAAI,EAAE;AAC7B,YAAA,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;AACzB,YAAA,QAAQ,EAAE,QAAQ,CAAC,QAAQ,IAAI,EAAE;AACjC,YAAA,SAAS,EAAE,KAAK;AACjB,SAAA,CAAC,CAAC;QACL,aAAa,CAAC,UAAU,GAAG;AACzB,YAAA,IAAI,aAAa,CAAC,UAAU,IAAI,EAAE,CAAC;AACnC,YAAA,GAAG,wBAAwB;SAC5B;QACD,OAAO,aAAa,CAAC,IAAI;;IAG3B,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,KAAI;QAClC,IAAI,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,IAAI,KAAK,SAAS,EAAE;AAC/C,YAAA,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE;gBACrE,aAAa,CAAC,MAAM,GAAG;AACrB,oBAAA,IAAI,aAAa,CAAC,MAAM,IAAI,EAAE,CAAC;AAC/B,oBAAA,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM;iBACtB;;AAEH,YAAA,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE;gBACrE,aAAa,CAAC,MAAM,GAAG;AACrB,oBAAA,IAAI,aAAa,CAAC,MAAM,IAAI,EAAE,CAAC;AAC/B,oBAAA,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM;iBACtB;;AAEH,YAAA,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;AACjE,gBAAA,MAAM,gBAAgB,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,QAAQ,MAAM;AAC3D,oBAAA,GAAG,QAAQ;AACX,oBAAA,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,EAAE;AAC1B,iBAAA,CAAC,CAAC;gBACH,aAAa,CAAC,UAAU,GAAG;AACzB,oBAAA,IAAI,aAAa,CAAC,UAAU,IAAI,EAAE,CAAC;AACnC,oBAAA,GAAG,gBAAgB;iBACpB;;;AAGP,KAAC,CAAC;IAEF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,aAAa,EAAE;AAC/C;AAEA,SAAS,qBAAqB,CAAC,EAC7B,SAAS,EACT,UAAU,EACV,eAAe,EACf,eAAe,EACf,MAAM,GAOP,EAAA;AACC,IAAA,OAAO,gBAAgB,EACrB,KAAK,EACL,IAAI,EACJ,OAAO,EACP,OAAO,GAAG,IAAI,EACd,UAAU,GAAG,CAAC,EACd,eAAe,EACf,MAAM,GAAG,KAAK,EACd,MAAM,GAAG,KAAK,EACd,IAAI,GAAG,KAAK,GAWb,EAAA;AACC,QAAA,IAAI;;AAEF,YAAA,MAAM,YAAY,GAAG,MAAM,uBAAuB,CAAC;gBACjD,SAAS;gBACT,KAAK;gBACL,IAAI;gBACJ,OAAO;gBACP,UAAU;gBACV,MAAM;gBACN,MAAM;gBACN,IAAI;gBACJ,MAAM;AACP,aAAA,CAAC;AAEF,YAAA,eAAe,GAAG,YAAY,CAAC;AAE/B,YAAA,MAAM,gBAAgB,GAAG,MAAM,eAAe,CAAC,cAAc,CAAC;gBAC5D,KAAK;gBACL,IAAI;AACJ,gBAAA,MAAM,EAAE,YAAY;gBACpB,OAAO;gBACP,eAAe;AACf,gBAAA,WAAW,EAAE,UAAU;AACxB,aAAA,CAAC;AAEF,YAAA,OAAO,gBAAgB,CAAC,gBAAgB,CAAC;;QACzC,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,CAAC,KAAK,CAAC,kBAAkB,EAAE,KAAK,CAAC;YACvC,OAAO;AACL,gBAAA,OAAO,EAAE,EAAE;AACX,gBAAA,UAAU,EAAE,EAAE;AACd,gBAAA,MAAM,EAAE,EAAE;AACV,gBAAA,MAAM,EAAE,EAAE;AACV,gBAAA,IAAI,EAAE,EAAE;AACR,gBAAA,eAAe,EAAE,EAAE;AACnB,gBAAA,KAAK,EAAE,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;aAC9D;;AAEL,KAAC;AACH;AAEA,SAAS,qBAAqB,CAAC,EAC7B,cAAc,EACd,eAAe,GAIhB,EAAA;AACC,IAAA,OAAO,UAAU,OAAuB,EAAA;QACtC,IAAI,CAAC,eAAe,EAAE;YACpB;;AAEF,QAAA,eAAe,CAAC,OAAO,EAAE,cAAc,CAAC;AAC1C,KAAC;AACH;AAEA,SAAS,UAAU,CAAC,EAClB,MAAM,EACN,MAAM,EACN,eAAe,EAAE,gBAAgB,GAKlC,EAAA;IACC,OAAO,IAAI,CACT,OAAO,MAAM,EAAE,cAAc,KAAI;AAC/B,QAAA,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM;AACjE,QAAA,MAAM,OAAO,GAAG,OAAO,EAAE,KAAK,QAAQ,IAAI,EAAE,GAAG,EAAE,GAAG,SAAS;AAC7D,QAAA,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC;YAChC,KAAK;YACL,IAAI;YACJ,OAAO;YACP,MAAM;YACN,MAAM;YACN,IAAI;YACJ,eAAe,EAAE,qBAAqB,CAAC;gBACrC,cAAc;AACd,gBAAA,eAAe,EAAE,gBAAgB;aAClC,CAAC;AACH,SAAA,CAAC;QACF,MAAM,IAAI,GAAG,cAAc,CAAC,QAAQ,EAAE,IAAI,IAAI,CAAC;AAC/C,QAAA,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,mBAAmB,CAAC,IAAI,EAAE,YAAY,CAAC;QACtE,MAAM,IAAI,GAAuB,EAAE,IAAI,EAAE,GAAG,YAAY,EAAE,UAAU,EAAE;AACtE,QAAA,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,SAAS,CAAC,UAAU,GAAG,IAAI,EAAE,CAAC;AACnD,KAAC,EACD;QACE,IAAI,EAAE,SAAS,CAAC,UAAU;AAC1B,QAAA,WAAW,EAAE,CAAA;;;;;;;;;;;;;;;;;;;;;;AAsBlB,CAAA,CAAC,IAAI,EAAE;AACF,QAAA,MAAM,EAAE,MAAM;QACd,cAAc,EAAE,SAAS,CAAC,oBAAoB;AAC/C,KAAA,CACF;AACH;AAEA;;;;;;AAMG;MACU,gBAAgB,GAAG,CAC9B,MAA6B,GAAA,EAAE,KACa;IAC5C,MAAM,EACJ,cAAc,GAAG,QAAQ,EACzB,YAAY,EACZ,kBAAkB,EAClB,aAAa,EACb,YAAY,GAAG,QAAQ,EACvB,UAAU,GAAG,CAAC,EACd,UAAU,GAAG,CAAC,eAAe,CAAC,EAC9B,aAAa,GAAG,IAAI,EACpB,UAAU,GAAG,CAAC,EACd,eAAe,EACf,eAAe,EACf,gBAAgB,EAChB,cAAc,EACd,UAAU,EACV,YAAY,EACZ,eAAe,EAAE,gBAAgB,EACjC,eAAe,GAChB,GAAG,MAAM;IAEV,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,mBAAmB,EAAE;AAErD,IAAA,MAAM,YAAY,GAOd;AACF,QAAA,KAAK,EAAE,WAAW;AAClB,QAAA,IAAI,EAAE,UAAU;AAChB,QAAA,MAAM,EAAE,YAAY;AACpB,QAAA,MAAM,EAAE,YAAY;AACpB,QAAA,IAAI,EAAE,UAAU;KACjB;AAED,IAAA,IAAI,cAAc,KAAK,QAAQ,EAAE;AAC/B,QAAA,YAAY,CAAC,OAAO,GAAG,aAAa;;IAGtC,MAAM,UAAU,GAAG,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC;IAEzC,MAAM,SAAS,GAAG,eAAe,CAAC;QAChC,cAAc;QACd,YAAY;QACZ,kBAAkB;QAClB,aAAa;AACd,KAAA,CAAC;IAEF,MAAM,gBAAgB,GAAG,sBAAsB,CAAC;AAC9C,QAAA,GAAG,gBAAgB;AACnB,QAAA,MAAM,EAAE,eAAe,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB;AACxD,QAAA,MAAM,EAAE,eAAe;AACvB,QAAA,OAAO,EAAE,cAAc,IAAI,gBAAgB,EAAE,OAAO;QACpD,OAAO,EAAE,gBAAgB,EAAE,OAAO,IAAI,CAAC,UAAU,EAAE,SAAS,CAAC;AAC9D,KAAA,CAAC;IAEF,MAAM,gBAAgB,GAAG,cAAc,CAAC;QACtC,YAAY;QACZ,UAAU;QACV,YAAY;QACZ,MAAM;AACP,KAAA,CAAC;IAEF,IAAI,CAAC,gBAAgB,EAAE;AACrB,QAAA,MAAM,CAAC,IAAI,CAAC,8CAA8C,CAAC;;IAG7D,MAAM,eAAe,GAAG,qBAAqB,CAC3C;AACE,QAAA,QAAQ,EAAE,gBAAgB;QAC1B,UAAU;QAGV,MAAM;KACP,EACD,gBAAgB,CACjB;IAED,MAAM,MAAM,GAAG,qBAAqB,CAAC;QACnC,SAAS;QACT,UAAU;QACV,eAAe;QACf,eAAe;QACf,MAAM;AACP,KAAA,CAAC;AAEF,IAAA,OAAO,UAAU,CAAC;QAChB,MAAM;AACN,QAAA,MAAM,EAAE,UAAU;AAClB,QAAA,eAAe,EAAE,gBAAgB;AAClC,KAAA,CAAC;AACJ;;;;"}
|
|
@@ -9,6 +9,21 @@ export declare class FirecrawlScraper {
|
|
|
9
9
|
private defaultFormats;
|
|
10
10
|
private timeout;
|
|
11
11
|
private logger;
|
|
12
|
+
private includeTags?;
|
|
13
|
+
private excludeTags?;
|
|
14
|
+
private waitFor?;
|
|
15
|
+
private maxAge?;
|
|
16
|
+
private mobile?;
|
|
17
|
+
private skipTlsVerification?;
|
|
18
|
+
private blockAds?;
|
|
19
|
+
private removeBase64Images?;
|
|
20
|
+
private parsePDF?;
|
|
21
|
+
private storeInCache?;
|
|
22
|
+
private zeroDataRetention?;
|
|
23
|
+
private headers?;
|
|
24
|
+
private location?;
|
|
25
|
+
private onlyMainContent?;
|
|
26
|
+
private changeTrackingOptions?;
|
|
12
27
|
constructor(config?: t.FirecrawlScraperConfig);
|
|
13
28
|
/**
|
|
14
29
|
* Scrape a single URL
|
|
@@ -85,7 +85,7 @@ export interface ProcessSourcesConfig {
|
|
|
85
85
|
export interface FirecrawlConfig {
|
|
86
86
|
firecrawlApiKey?: string;
|
|
87
87
|
firecrawlApiUrl?: string;
|
|
88
|
-
|
|
88
|
+
firecrawlOptions?: FirecrawlScraperConfig;
|
|
89
89
|
}
|
|
90
90
|
export interface ScraperContentResult {
|
|
91
91
|
content: string;
|
|
@@ -147,14 +147,7 @@ export type UsedReferences = {
|
|
|
147
147
|
reference: MediaReference;
|
|
148
148
|
}[];
|
|
149
149
|
/** Firecrawl */
|
|
150
|
-
export
|
|
151
|
-
formats?: string[];
|
|
152
|
-
includeTags?: string[];
|
|
153
|
-
excludeTags?: string[];
|
|
154
|
-
headers?: Record<string, string>;
|
|
155
|
-
waitFor?: number;
|
|
156
|
-
timeout?: number;
|
|
157
|
-
}
|
|
150
|
+
export type FirecrawlScrapeOptions = Omit<FirecrawlScraperConfig, 'apiKey' | 'apiUrl' | 'logger'>;
|
|
158
151
|
export interface ScrapeMetadata {
|
|
159
152
|
sourceURL?: string;
|
|
160
153
|
url?: string;
|
|
@@ -226,6 +219,24 @@ export interface FirecrawlScraperConfig {
|
|
|
226
219
|
formats?: string[];
|
|
227
220
|
timeout?: number;
|
|
228
221
|
logger?: Logger;
|
|
222
|
+
includeTags?: string[];
|
|
223
|
+
excludeTags?: string[];
|
|
224
|
+
waitFor?: number;
|
|
225
|
+
maxAge?: number;
|
|
226
|
+
mobile?: boolean;
|
|
227
|
+
skipTlsVerification?: boolean;
|
|
228
|
+
blockAds?: boolean;
|
|
229
|
+
removeBase64Images?: boolean;
|
|
230
|
+
parsePDF?: boolean;
|
|
231
|
+
storeInCache?: boolean;
|
|
232
|
+
zeroDataRetention?: boolean;
|
|
233
|
+
headers?: Record<string, string>;
|
|
234
|
+
location?: {
|
|
235
|
+
country?: string;
|
|
236
|
+
languages?: string[];
|
|
237
|
+
};
|
|
238
|
+
onlyMainContent?: boolean;
|
|
239
|
+
changeTrackingOptions?: object;
|
|
229
240
|
}
|
|
230
241
|
export type GetSourcesParams = {
|
|
231
242
|
query: string;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@librechat/agents",
|
|
3
|
-
"version": "2.4.
|
|
3
|
+
"version": "2.4.63",
|
|
4
4
|
"main": "./dist/cjs/main.cjs",
|
|
5
5
|
"module": "./dist/esm/main.mjs",
|
|
6
6
|
"types": "./dist/types/index.d.ts",
|
|
@@ -77,7 +77,7 @@
|
|
|
77
77
|
},
|
|
78
78
|
"dependencies": {
|
|
79
79
|
"@langchain/anthropic": "^0.3.24",
|
|
80
|
-
"@langchain/aws": "^0.1.
|
|
80
|
+
"@langchain/aws": "^0.1.12",
|
|
81
81
|
"@langchain/community": "^0.3.47",
|
|
82
82
|
"@langchain/core": "^0.3.62",
|
|
83
83
|
"@langchain/deepseek": "^0.0.2",
|
|
@@ -112,7 +112,7 @@
|
|
|
112
112
|
"@types/yargs-parser": "^21.0.3",
|
|
113
113
|
"@typescript-eslint/eslint-plugin": "^8.24.0",
|
|
114
114
|
"@typescript-eslint/parser": "^8.24.0",
|
|
115
|
-
"eslint": "^9.
|
|
115
|
+
"eslint": "^9.31.0",
|
|
116
116
|
"eslint-import-resolver-typescript": "^3.7.0",
|
|
117
117
|
"eslint-plugin-import": "^2.31.0",
|
|
118
118
|
"husky": "^9.1.7",
|
|
@@ -13,6 +13,21 @@ export class FirecrawlScraper {
|
|
|
13
13
|
private defaultFormats: string[];
|
|
14
14
|
private timeout: number;
|
|
15
15
|
private logger: t.Logger;
|
|
16
|
+
private includeTags?: string[];
|
|
17
|
+
private excludeTags?: string[];
|
|
18
|
+
private waitFor?: number;
|
|
19
|
+
private maxAge?: number;
|
|
20
|
+
private mobile?: boolean;
|
|
21
|
+
private skipTlsVerification?: boolean;
|
|
22
|
+
private blockAds?: boolean;
|
|
23
|
+
private removeBase64Images?: boolean;
|
|
24
|
+
private parsePDF?: boolean;
|
|
25
|
+
private storeInCache?: boolean;
|
|
26
|
+
private zeroDataRetention?: boolean;
|
|
27
|
+
private headers?: Record<string, string>;
|
|
28
|
+
private location?: { country?: string; languages?: string[] };
|
|
29
|
+
private onlyMainContent?: boolean;
|
|
30
|
+
private changeTrackingOptions?: object;
|
|
16
31
|
|
|
17
32
|
constructor(config: t.FirecrawlScraperConfig = {}) {
|
|
18
33
|
this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';
|
|
@@ -23,11 +38,27 @@ export class FirecrawlScraper {
|
|
|
23
38
|
'https://api.firecrawl.dev';
|
|
24
39
|
this.apiUrl = `${baseUrl.replace(/\/+$/, '')}/v1/scrape`;
|
|
25
40
|
|
|
26
|
-
this.defaultFormats = config.formats ?? ['markdown', '
|
|
41
|
+
this.defaultFormats = config.formats ?? ['markdown', 'rawHtml'];
|
|
27
42
|
this.timeout = config.timeout ?? 7500;
|
|
28
43
|
|
|
29
44
|
this.logger = config.logger || createDefaultLogger();
|
|
30
45
|
|
|
46
|
+
this.includeTags = config.includeTags;
|
|
47
|
+
this.excludeTags = config.excludeTags;
|
|
48
|
+
this.waitFor = config.waitFor;
|
|
49
|
+
this.maxAge = config.maxAge;
|
|
50
|
+
this.mobile = config.mobile;
|
|
51
|
+
this.skipTlsVerification = config.skipTlsVerification;
|
|
52
|
+
this.blockAds = config.blockAds;
|
|
53
|
+
this.removeBase64Images = config.removeBase64Images;
|
|
54
|
+
this.parsePDF = config.parsePDF;
|
|
55
|
+
this.storeInCache = config.storeInCache;
|
|
56
|
+
this.zeroDataRetention = config.zeroDataRetention;
|
|
57
|
+
this.headers = config.headers;
|
|
58
|
+
this.location = config.location;
|
|
59
|
+
this.onlyMainContent = config.onlyMainContent;
|
|
60
|
+
this.changeTrackingOptions = config.changeTrackingOptions;
|
|
61
|
+
|
|
31
62
|
if (!this.apiKey) {
|
|
32
63
|
this.logger.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');
|
|
33
64
|
}
|
|
@@ -58,25 +89,36 @@ export class FirecrawlScraper {
|
|
|
58
89
|
}
|
|
59
90
|
|
|
60
91
|
try {
|
|
61
|
-
const
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
92
|
+
const payload = omitUndefined({
|
|
93
|
+
url,
|
|
94
|
+
formats: options.formats ?? this.defaultFormats,
|
|
95
|
+
includeTags: options.includeTags ?? this.includeTags,
|
|
96
|
+
excludeTags: options.excludeTags ?? this.excludeTags,
|
|
97
|
+
headers: options.headers ?? this.headers,
|
|
98
|
+
waitFor: options.waitFor ?? this.waitFor,
|
|
99
|
+
timeout: options.timeout ?? this.timeout,
|
|
100
|
+
onlyMainContent: options.onlyMainContent ?? this.onlyMainContent,
|
|
101
|
+
maxAge: options.maxAge ?? this.maxAge,
|
|
102
|
+
mobile: options.mobile ?? this.mobile,
|
|
103
|
+
skipTlsVerification:
|
|
104
|
+
options.skipTlsVerification ?? this.skipTlsVerification,
|
|
105
|
+
parsePDF: options.parsePDF ?? this.parsePDF,
|
|
106
|
+
location: options.location ?? this.location,
|
|
107
|
+
removeBase64Images:
|
|
108
|
+
options.removeBase64Images ?? this.removeBase64Images,
|
|
109
|
+
blockAds: options.blockAds ?? this.blockAds,
|
|
110
|
+
storeInCache: options.storeInCache ?? this.storeInCache,
|
|
111
|
+
zeroDataRetention: options.zeroDataRetention ?? this.zeroDataRetention,
|
|
112
|
+
changeTrackingOptions:
|
|
113
|
+
options.changeTrackingOptions ?? this.changeTrackingOptions,
|
|
114
|
+
});
|
|
115
|
+
const response = await axios.post(this.apiUrl, payload, {
|
|
116
|
+
headers: {
|
|
117
|
+
'Content-Type': 'application/json',
|
|
118
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
71
119
|
},
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
'Content-Type': 'application/json',
|
|
75
|
-
Authorization: `Bearer ${this.apiKey}`,
|
|
76
|
-
},
|
|
77
|
-
timeout: this.timeout,
|
|
78
|
-
}
|
|
79
|
-
);
|
|
120
|
+
timeout: this.timeout,
|
|
121
|
+
});
|
|
80
122
|
|
|
81
123
|
return [url, response.data];
|
|
82
124
|
} catch (error) {
|
|
@@ -156,3 +198,10 @@ export const createFirecrawlScraper = (
|
|
|
156
198
|
): FirecrawlScraper => {
|
|
157
199
|
return new FirecrawlScraper(config);
|
|
158
200
|
};
|
|
201
|
+
|
|
202
|
+
// Helper function to clean up payload for firecrawl
|
|
203
|
+
function omitUndefined<T extends object>(obj: T): Partial<T> {
|
|
204
|
+
return Object.fromEntries(
|
|
205
|
+
Object.entries(obj).filter(([, v]) => v !== undefined)
|
|
206
|
+
) as Partial<T>;
|
|
207
|
+
}
|
package/src/tools/search/tool.ts
CHANGED
|
@@ -346,7 +346,7 @@ export const createSearchTool = (
|
|
|
346
346
|
safeSearch = 1,
|
|
347
347
|
firecrawlApiKey,
|
|
348
348
|
firecrawlApiUrl,
|
|
349
|
-
|
|
349
|
+
firecrawlOptions,
|
|
350
350
|
scraperTimeout,
|
|
351
351
|
jinaApiKey,
|
|
352
352
|
cohereApiKey,
|
|
@@ -385,10 +385,11 @@ export const createSearchTool = (
|
|
|
385
385
|
});
|
|
386
386
|
|
|
387
387
|
const firecrawlScraper = createFirecrawlScraper({
|
|
388
|
+
...firecrawlOptions,
|
|
388
389
|
apiKey: firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY,
|
|
389
390
|
apiUrl: firecrawlApiUrl,
|
|
390
|
-
timeout: scraperTimeout,
|
|
391
|
-
formats:
|
|
391
|
+
timeout: scraperTimeout ?? firecrawlOptions?.timeout,
|
|
392
|
+
formats: firecrawlOptions?.formats ?? ['markdown', 'rawHtml'],
|
|
392
393
|
});
|
|
393
394
|
|
|
394
395
|
const selectedReranker = createReranker({
|
|
@@ -94,7 +94,7 @@ export interface ProcessSourcesConfig {
|
|
|
94
94
|
export interface FirecrawlConfig {
|
|
95
95
|
firecrawlApiKey?: string;
|
|
96
96
|
firecrawlApiUrl?: string;
|
|
97
|
-
|
|
97
|
+
firecrawlOptions?: FirecrawlScraperConfig;
|
|
98
98
|
}
|
|
99
99
|
|
|
100
100
|
export interface ScraperContentResult {
|
|
@@ -170,15 +170,10 @@ export type UsedReferences = {
|
|
|
170
170
|
}[];
|
|
171
171
|
|
|
172
172
|
/** Firecrawl */
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
excludeTags?: string[];
|
|
178
|
-
headers?: Record<string, string>;
|
|
179
|
-
waitFor?: number;
|
|
180
|
-
timeout?: number;
|
|
181
|
-
}
|
|
173
|
+
export type FirecrawlScrapeOptions = Omit<
|
|
174
|
+
FirecrawlScraperConfig,
|
|
175
|
+
'apiKey' | 'apiUrl' | 'logger'
|
|
176
|
+
>;
|
|
182
177
|
|
|
183
178
|
export interface ScrapeMetadata {
|
|
184
179
|
// Core source information
|
|
@@ -261,6 +256,21 @@ export interface FirecrawlScraperConfig {
|
|
|
261
256
|
formats?: string[];
|
|
262
257
|
timeout?: number;
|
|
263
258
|
logger?: Logger;
|
|
259
|
+
includeTags?: string[];
|
|
260
|
+
excludeTags?: string[];
|
|
261
|
+
waitFor?: number;
|
|
262
|
+
maxAge?: number;
|
|
263
|
+
mobile?: boolean;
|
|
264
|
+
skipTlsVerification?: boolean;
|
|
265
|
+
blockAds?: boolean;
|
|
266
|
+
removeBase64Images?: boolean;
|
|
267
|
+
parsePDF?: boolean;
|
|
268
|
+
storeInCache?: boolean;
|
|
269
|
+
zeroDataRetention?: boolean;
|
|
270
|
+
headers?: Record<string, string>;
|
|
271
|
+
location?: { country?: string; languages?: string[] };
|
|
272
|
+
onlyMainContent?: boolean;
|
|
273
|
+
changeTrackingOptions?: object;
|
|
264
274
|
}
|
|
265
275
|
|
|
266
276
|
export type GetSourcesParams = {
|