@librechat/agents 2.4.31 → 2.4.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/events.cjs +3 -3
- package/dist/cjs/events.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +2 -1
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/main.cjs +5 -2
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/ids.cjs +23 -0
- package/dist/cjs/messages/ids.cjs.map +1 -0
- package/dist/cjs/splitStream.cjs +2 -1
- package/dist/cjs/splitStream.cjs.map +1 -1
- package/dist/cjs/stream.cjs +87 -154
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +14 -3
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/handlers.cjs +144 -0
- package/dist/cjs/tools/handlers.cjs.map +1 -0
- package/dist/cjs/tools/search/content.cjs +140 -0
- package/dist/cjs/tools/search/content.cjs.map +1 -0
- package/dist/cjs/tools/search/firecrawl.cjs +23 -41
- package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
- package/dist/cjs/tools/search/format.cjs +161 -74
- package/dist/cjs/tools/search/format.cjs.map +1 -1
- package/dist/cjs/tools/search/highlights.cjs +64 -12
- package/dist/cjs/tools/search/highlights.cjs.map +1 -1
- package/dist/cjs/tools/search/rerankers.cjs +43 -36
- package/dist/cjs/tools/search/rerankers.cjs.map +1 -1
- package/dist/cjs/tools/search/schema.cjs +70 -0
- package/dist/cjs/tools/search/schema.cjs.map +1 -0
- package/dist/cjs/tools/search/search.cjs +150 -69
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +247 -58
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/search/utils.cjs +66 -0
- package/dist/cjs/tools/search/utils.cjs.map +1 -0
- package/dist/esm/events.mjs +1 -1
- package/dist/esm/events.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +2 -1
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/main.mjs +3 -1
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/ids.mjs +21 -0
- package/dist/esm/messages/ids.mjs.map +1 -0
- package/dist/esm/splitStream.mjs +2 -1
- package/dist/esm/splitStream.mjs.map +1 -1
- package/dist/esm/stream.mjs +87 -152
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +14 -3
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/handlers.mjs +141 -0
- package/dist/esm/tools/handlers.mjs.map +1 -0
- package/dist/esm/tools/search/content.mjs +119 -0
- package/dist/esm/tools/search/content.mjs.map +1 -0
- package/dist/esm/tools/search/firecrawl.mjs +24 -41
- package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
- package/dist/esm/tools/search/format.mjs +161 -74
- package/dist/esm/tools/search/format.mjs.map +1 -1
- package/dist/esm/tools/search/highlights.mjs +64 -12
- package/dist/esm/tools/search/highlights.mjs.map +1 -1
- package/dist/esm/tools/search/rerankers.mjs +43 -36
- package/dist/esm/tools/search/rerankers.mjs.map +1 -1
- package/dist/esm/tools/search/schema.mjs +61 -0
- package/dist/esm/tools/search/schema.mjs.map +1 -0
- package/dist/esm/tools/search/search.mjs +150 -69
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +246 -57
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/search/utils.mjs +61 -0
- package/dist/esm/tools/search/utils.mjs.map +1 -0
- package/dist/types/graphs/Graph.d.ts +1 -1
- package/dist/types/index.d.ts +1 -0
- package/dist/types/messages/ids.d.ts +3 -0
- package/dist/types/messages/index.d.ts +1 -0
- package/dist/types/stream.d.ts +0 -8
- package/dist/types/tools/ToolNode.d.ts +6 -0
- package/dist/types/tools/example.d.ts +23 -3
- package/dist/types/tools/handlers.d.ts +8 -0
- package/dist/types/tools/search/content.d.ts +4 -0
- package/dist/types/tools/search/firecrawl.d.ts +7 -86
- package/dist/types/tools/search/format.d.ts +4 -1
- package/dist/types/tools/search/highlights.d.ts +1 -1
- package/dist/types/tools/search/rerankers.d.ts +8 -4
- package/dist/types/tools/search/schema.d.ts +16 -0
- package/dist/types/tools/search/search.d.ts +2 -2
- package/dist/types/tools/search/test.d.ts +1 -0
- package/dist/types/tools/search/tool.d.ts +25 -4
- package/dist/types/tools/search/types.d.ts +443 -53
- package/dist/types/tools/search/utils.d.ts +10 -0
- package/package.json +9 -7
- package/src/events.ts +49 -15
- package/src/graphs/Graph.ts +6 -2
- package/src/index.ts +1 -0
- package/src/messages/ids.ts +26 -0
- package/src/messages/index.ts +1 -0
- package/src/scripts/search.ts +8 -3
- package/src/splitStream.test.ts +132 -71
- package/src/splitStream.ts +2 -1
- package/src/stream.ts +94 -183
- package/src/tools/ToolNode.ts +37 -14
- package/src/tools/handlers.ts +167 -0
- package/src/tools/search/content.test.ts +173 -0
- package/src/tools/search/content.ts +147 -0
- package/src/tools/search/firecrawl.ts +36 -148
- package/src/tools/search/format.ts +205 -74
- package/src/tools/search/highlights.ts +99 -16
- package/src/tools/search/output.md +2775 -0
- package/src/tools/search/rerankers.ts +57 -36
- package/src/tools/search/schema.ts +63 -0
- package/src/tools/search/search.ts +230 -117
- package/src/tools/search/test.html +884 -0
- package/src/tools/search/test.md +643 -0
- package/src/tools/search/test.ts +159 -0
- package/src/tools/search/tool.ts +363 -87
- package/src/tools/search/types.ts +503 -61
- package/src/tools/search/utils.ts +79 -0
- package/src/utils/llmConfig.ts +1 -1
|
@@ -1,35 +1,7 @@
|
|
|
1
1
|
import axios from 'axios';
|
|
2
|
+
import { processContent } from './content.mjs';
|
|
3
|
+
import { createDefaultLogger } from './utils.mjs';
|
|
2
4
|
|
|
3
|
-
/* eslint-disable no-console */
|
|
4
|
-
const getDomainName = (link, metadata) => {
|
|
5
|
-
try {
|
|
6
|
-
const url = metadata?.sourceURL ?? metadata?.url ?? (link || '');
|
|
7
|
-
const domain = new URL(url).hostname.replace(/^www\./, '');
|
|
8
|
-
if (domain) {
|
|
9
|
-
return domain;
|
|
10
|
-
}
|
|
11
|
-
}
|
|
12
|
-
catch (e) {
|
|
13
|
-
// URL parsing failed
|
|
14
|
-
console.error('Error parsing URL:', e);
|
|
15
|
-
}
|
|
16
|
-
return;
|
|
17
|
-
};
|
|
18
|
-
function getAttribution(link, metadata) {
|
|
19
|
-
if (!metadata)
|
|
20
|
-
return getDomainName(link, metadata);
|
|
21
|
-
const possibleAttributions = [
|
|
22
|
-
metadata.ogSiteName,
|
|
23
|
-
metadata['og:site_name'],
|
|
24
|
-
metadata.title?.split('|').pop()?.trim(),
|
|
25
|
-
metadata['twitter:site']?.replace(/^@/, ''),
|
|
26
|
-
];
|
|
27
|
-
const attribution = possibleAttributions.find((attr) => attr != null && typeof attr === 'string' && attr.trim() !== '');
|
|
28
|
-
if (attribution != null) {
|
|
29
|
-
return attribution;
|
|
30
|
-
}
|
|
31
|
-
return getDomainName(link, metadata);
|
|
32
|
-
}
|
|
33
5
|
/**
|
|
34
6
|
* Firecrawl scraper implementation
|
|
35
7
|
* Uses the Firecrawl API to scrape web pages
|
|
@@ -39,6 +11,7 @@ class FirecrawlScraper {
|
|
|
39
11
|
apiUrl;
|
|
40
12
|
defaultFormats;
|
|
41
13
|
timeout;
|
|
14
|
+
logger;
|
|
42
15
|
constructor(config = {}) {
|
|
43
16
|
this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';
|
|
44
17
|
const baseUrl = config.apiUrl ??
|
|
@@ -46,11 +19,12 @@ class FirecrawlScraper {
|
|
|
46
19
|
'https://api.firecrawl.dev';
|
|
47
20
|
this.apiUrl = `${baseUrl.replace(/\/+$/, '')}/v1/scrape`;
|
|
48
21
|
this.defaultFormats = config.formats ?? ['markdown', 'html'];
|
|
49
|
-
this.timeout = config.timeout ??
|
|
22
|
+
this.timeout = config.timeout ?? 15000;
|
|
23
|
+
this.logger = config.logger || createDefaultLogger();
|
|
50
24
|
if (!this.apiKey) {
|
|
51
|
-
|
|
25
|
+
this.logger.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');
|
|
52
26
|
}
|
|
53
|
-
|
|
27
|
+
this.logger.debug(`Firecrawl scraper initialized with API URL: ${this.apiUrl}`);
|
|
54
28
|
}
|
|
55
29
|
/**
|
|
56
30
|
* Scrape a single URL
|
|
@@ -104,21 +78,30 @@ class FirecrawlScraper {
|
|
|
104
78
|
*/
|
|
105
79
|
extractContent(response) {
|
|
106
80
|
if (!response.success || !response.data) {
|
|
107
|
-
return '';
|
|
81
|
+
return ['', undefined];
|
|
82
|
+
}
|
|
83
|
+
if (response.data.markdown != null && response.data.html != null) {
|
|
84
|
+
try {
|
|
85
|
+
const { markdown, ...rest } = processContent(response.data.html, response.data.markdown);
|
|
86
|
+
return [markdown, rest];
|
|
87
|
+
}
|
|
88
|
+
catch (error) {
|
|
89
|
+
this.logger.error('Error processing content:', error);
|
|
90
|
+
return [response.data.markdown, undefined];
|
|
91
|
+
}
|
|
108
92
|
}
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
return response.data.markdown;
|
|
93
|
+
else if (response.data.markdown != null) {
|
|
94
|
+
return [response.data.markdown, undefined];
|
|
112
95
|
}
|
|
113
96
|
// Fall back to HTML content
|
|
114
97
|
if (response.data.html != null) {
|
|
115
|
-
return response.data.html;
|
|
98
|
+
return [response.data.html, undefined];
|
|
116
99
|
}
|
|
117
100
|
// Fall back to raw HTML content
|
|
118
101
|
if (response.data.rawHtml != null) {
|
|
119
|
-
return response.data.rawHtml;
|
|
102
|
+
return [response.data.rawHtml, undefined];
|
|
120
103
|
}
|
|
121
|
-
return '';
|
|
104
|
+
return ['', undefined];
|
|
122
105
|
}
|
|
123
106
|
/**
|
|
124
107
|
* Extract metadata from scrape response
|
|
@@ -141,5 +124,5 @@ const createFirecrawlScraper = (config = {}) => {
|
|
|
141
124
|
return new FirecrawlScraper(config);
|
|
142
125
|
};
|
|
143
126
|
|
|
144
|
-
export { FirecrawlScraper, createFirecrawlScraper
|
|
127
|
+
export { FirecrawlScraper, createFirecrawlScraper };
|
|
145
128
|
//# sourceMappingURL=firecrawl.mjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"firecrawl.mjs","sources":["../../../../src/tools/search/firecrawl.ts"],"sourcesContent":["/* eslint-disable no-console */\nimport axios from 'axios';\n\nexport interface FirecrawlScrapeOptions {\n formats?: string[];\n includeTags?: string[];\n excludeTags?: string[];\n headers?: Record<string, string>;\n waitFor?: number;\n timeout?: number;\n}\n\ninterface ScrapeMetadata {\n // Core source information\n sourceURL?: string;\n url?: string;\n scrapeId?: string;\n statusCode?: number;\n // Basic metadata\n title?: string;\n description?: string;\n language?: string;\n favicon?: string;\n viewport?: string;\n robots?: string;\n 'theme-color'?: string;\n // Open Graph metadata\n 'og:url'?: string;\n 'og:title'?: string;\n 'og:description'?: string;\n 'og:type'?: string;\n 'og:image'?: string;\n 'og:image:width'?: string;\n 'og:image:height'?: string;\n 'og:site_name'?: string;\n ogUrl?: string;\n ogTitle?: string;\n ogDescription?: string;\n ogImage?: string;\n ogSiteName?: string;\n // Article metadata\n 'article:author'?: string;\n 'article:published_time'?: string;\n 'article:modified_time'?: string;\n 'article:section'?: string;\n 'article:tag'?: string;\n 'article:publisher'?: string;\n publishedTime?: string;\n modifiedTime?: string;\n // Twitter metadata\n 'twitter:site'?: string;\n 'twitter:creator'?: string;\n 'twitter:card'?: string;\n 'twitter:image'?: string;\n 'twitter:dnt'?: string;\n 'twitter:app:name:iphone'?: string;\n 'twitter:app:id:iphone'?: string;\n 'twitter:app:url:iphone'?: string;\n 'twitter:app:name:ipad'?: string;\n 'twitter:app:id:ipad'?: string;\n 'twitter:app:url:ipad'?: string;\n 'twitter:app:name:googleplay'?: string;\n 'twitter:app:id:googleplay'?: string;\n 'twitter:app:url:googleplay'?: string;\n // Facebook metadata\n 'fb:app_id'?: string;\n // App links\n 'al:ios:url'?: string;\n 'al:ios:app_name'?: string;\n 'al:ios:app_store_id'?: string;\n // Allow for additional properties that might be present\n [key: string]: string | number | boolean | null | undefined;\n}\n\nexport interface FirecrawlScrapeResponse {\n success: boolean;\n data?: {\n markdown?: string;\n html?: string;\n rawHtml?: string;\n screenshot?: string;\n links?: string[];\n metadata?: ScrapeMetadata;\n };\n error?: string;\n}\n\nexport interface FirecrawlScraperConfig {\n apiKey?: string;\n apiUrl?: string;\n formats?: string[];\n timeout?: number;\n}\nconst getDomainName = (\n link: string,\n metadata?: ScrapeMetadata\n): string | undefined => {\n try {\n const url = metadata?.sourceURL ?? metadata?.url ?? (link || '');\n const domain = new URL(url).hostname.replace(/^www\\./, '');\n if (domain) {\n return domain;\n }\n } catch (e) {\n // URL parsing failed\n console.error('Error parsing URL:', e);\n }\n\n return;\n};\n\nexport function getAttribution(\n link: string,\n metadata?: ScrapeMetadata\n): string | undefined {\n if (!metadata) return getDomainName(link, metadata);\n\n const possibleAttributions = [\n metadata.ogSiteName,\n metadata['og:site_name'],\n metadata.title?.split('|').pop()?.trim(),\n metadata['twitter:site']?.replace(/^@/, ''),\n ];\n\n const attribution = possibleAttributions.find(\n (attr) => attr != null && typeof attr === 'string' && attr.trim() !== ''\n );\n if (attribution != null) {\n return attribution;\n }\n\n return getDomainName(link, metadata);\n}\n\n/**\n * Firecrawl scraper implementation\n * Uses the Firecrawl API to scrape web pages\n */\nexport class FirecrawlScraper {\n private apiKey: string;\n private apiUrl: string;\n private defaultFormats: string[];\n private timeout: number;\n\n constructor(config: FirecrawlScraperConfig = {}) {\n this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';\n\n const baseUrl =\n config.apiUrl ??\n process.env.FIRECRAWL_BASE_URL ??\n 'https://api.firecrawl.dev';\n this.apiUrl = `${baseUrl.replace(/\\/+$/, '')}/v1/scrape`;\n\n this.defaultFormats = config.formats ?? ['markdown', 'html'];\n this.timeout = config.timeout ?? 30000;\n\n if (!this.apiKey) {\n console.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');\n }\n\n console.log(`Firecrawl scraper initialized with API URL: ${this.apiUrl}`);\n }\n\n /**\n * Scrape a single URL\n * @param url URL to scrape\n * @param options Scrape options\n * @returns Scrape response\n */\n async scrapeUrl(\n url: string,\n options: FirecrawlScrapeOptions = {}\n ): Promise<[string, FirecrawlScrapeResponse]> {\n if (!this.apiKey) {\n return [\n url,\n {\n success: false,\n error: 'FIRECRAWL_API_KEY is not set',\n },\n ];\n }\n\n try {\n const response = await axios.post(\n this.apiUrl,\n {\n url,\n formats: options.formats || this.defaultFormats,\n includeTags: options.includeTags,\n excludeTags: options.excludeTags,\n headers: options.headers,\n waitFor: options.waitFor,\n timeout: options.timeout ?? this.timeout,\n },\n {\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`,\n },\n timeout: this.timeout,\n }\n );\n\n return [url, response.data];\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return [\n url,\n {\n success: false,\n error: `Firecrawl API request failed: ${errorMessage}`,\n },\n ];\n }\n }\n\n /**\n * Extract content from scrape response\n * @param response Scrape response\n * @returns Extracted content or empty string if not available\n */\n extractContent(response: FirecrawlScrapeResponse): string {\n if (!response.success || !response.data) {\n return '';\n }\n\n // Prefer markdown content if available\n if (response.data.markdown != null) {\n return response.data.markdown;\n }\n\n // Fall back to HTML content\n if (response.data.html != null) {\n return response.data.html;\n }\n\n // Fall back to raw HTML content\n if (response.data.rawHtml != null) {\n return response.data.rawHtml;\n }\n\n return '';\n }\n\n /**\n * Extract metadata from scrape response\n * @param response Scrape response\n * @returns Metadata object\n */\n extractMetadata(response: FirecrawlScrapeResponse): ScrapeMetadata {\n if (!response.success || !response.data || !response.data.metadata) {\n return {};\n }\n\n return response.data.metadata;\n }\n}\n\n/**\n * Create a Firecrawl scraper instance\n * @param config Scraper configuration\n * @returns Firecrawl scraper instance\n */\nexport const createFirecrawlScraper = (\n config: FirecrawlScraperConfig = {}\n): FirecrawlScraper => {\n return new FirecrawlScraper(config);\n};\n"],"names":[],"mappings":";;AAAA;AA6FA,MAAM,aAAa,GAAG,CACpB,IAAY,EACZ,QAAyB,KACH;AACtB,IAAA,IAAI;AACF,QAAA,MAAM,GAAG,GAAG,QAAQ,EAAE,SAAS,IAAI,QAAQ,EAAE,GAAG,KAAK,IAAI,IAAI,EAAE,CAAC;AAChE,QAAA,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;QAC1D,IAAI,MAAM,EAAE;AACV,YAAA,OAAO,MAAM;;;IAEf,OAAO,CAAC,EAAE;;AAEV,QAAA,OAAO,CAAC,KAAK,CAAC,oBAAoB,EAAE,CAAC,CAAC;;IAGxC;AACF,CAAC;AAEe,SAAA,cAAc,CAC5B,IAAY,EACZ,QAAyB,EAAA;AAEzB,IAAA,IAAI,CAAC,QAAQ;AAAE,QAAA,OAAO,aAAa,CAAC,IAAI,EAAE,QAAQ,CAAC;AAEnD,IAAA,MAAM,oBAAoB,GAAG;AAC3B,QAAA,QAAQ,CAAC,UAAU;QACnB,QAAQ,CAAC,cAAc,CAAC;AACxB,QAAA,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,IAAI,EAAE;QACxC,QAAQ,CAAC,cAAc,CAAC,EAAE,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;KAC5C;IAED,MAAM,WAAW,GAAG,oBAAoB,CAAC,IAAI,CAC3C,CAAC,IAAI,KAAK,IAAI,IAAI,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CACzE;AACD,IAAA,IAAI,WAAW,IAAI,IAAI,EAAE;AACvB,QAAA,OAAO,WAAW;;AAGpB,IAAA,OAAO,aAAa,CAAC,IAAI,EAAE,QAAQ,CAAC;AACtC;AAEA;;;AAGG;MACU,gBAAgB,CAAA;AACnB,IAAA,MAAM;AACN,IAAA,MAAM;AACN,IAAA,cAAc;AACd,IAAA,OAAO;AAEf,IAAA,WAAA,CAAY,SAAiC,EAAE,EAAA;AAC7C,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,EAAE;AAElE,QAAA,MAAM,OAAO,GACX,MAAM,CAAC,MAAM;YACb,OAAO,CAAC,GAAG,CAAC,kBAAkB;AAC9B,YAAA,2BAA2B;AAC7B,QAAA,IAAI,CAAC,MAAM,GAAG,CAAA,EAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,YAAY;AAExD,QAAA,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC,OAAO,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC;QAC5D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,KAAK;AAEtC,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;AAChB,YAAA,OAAO,CAAC,IAAI,CAAC,uDAAuD,CAAC;;QAGvE,OAAO,CAAC,GAAG,CAAC,CAAA,4CAAA,EAA+C,IAAI,CAAC,MAAM,CAAE,CAAA,CAAC;;AAG3E;;;;;AAKG;AACH,IAAA,MAAM,SAAS,CACb,GAAW,EACX,UAAkC,EAAE,EAAA;AAEpC,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;YAChB,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;AACd,oBAAA,KAAK,EAAE,8BAA8B;AACtC,iBAAA;aACF;;AAGH,QAAA,IAAI;YACF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,IAAI,CAAC,MAAM,EACX;gBACE,GAAG;AACH,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc;gBAC/C,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,OAAO,EAAE,OAAO,CAAC,OAAO;gBACxB,OAAO,EAAE,OAAO,CAAC,OAAO;AACxB,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;aACzC,EACD;AACE,gBAAA,OAAO,EAAE;AACP,oBAAA,cAAc,EAAE,kBAAkB;AAClC,oBAAA,aAAa,EAAE,CAAA,OAAA,EAAU,IAAI,CAAC,MAAM,CAAE,CAAA;AACvC,iBAAA;gBACD,OAAO,EAAE,IAAI,CAAC,OAAO;AACtB,aAAA,CACF;AAED,YAAA,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,CAAC;;QAC3B,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACxD,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,CAAiC,8BAAA,EAAA,YAAY,CAAE,CAAA;AACvD,iBAAA;aACF;;;AAIL;;;;AAIG;AACH,IAAA,cAAc,CAAC,QAAiC,EAAA;QAC9C,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;AACvC,YAAA,OAAO,EAAE;;;QAIX,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,EAAE;AAClC,YAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,QAAQ;;;QAI/B,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;AAC9B,YAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,IAAI;;;QAI3B,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,IAAI,EAAE;AACjC,YAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,OAAO;;AAG9B,QAAA,OAAO,EAAE;;AAGX;;;;AAIG;AACH,IAAA,eAAe,CAAC,QAAiC,EAAA;AAC/C,QAAA,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE;AAClE,YAAA,OAAO,EAAE;;AAGX,QAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,QAAQ;;AAEhC;AAED;;;;AAIG;MACU,sBAAsB,GAAG,CACpC,MAAiC,GAAA,EAAE,KACf;AACpB,IAAA,OAAO,IAAI,gBAAgB,CAAC,MAAM,CAAC;AACrC;;;;"}
|
|
1
|
+
{"version":3,"file":"firecrawl.mjs","sources":["../../../../src/tools/search/firecrawl.ts"],"sourcesContent":["import axios from 'axios';\nimport { processContent } from './content';\nimport type * as t from './types';\nimport { createDefaultLogger } from './utils';\n\n/**\n * Firecrawl scraper implementation\n * Uses the Firecrawl API to scrape web pages\n */\nexport class FirecrawlScraper {\n private apiKey: string;\n private apiUrl: string;\n private defaultFormats: string[];\n private timeout: number;\n private logger: t.Logger;\n\n constructor(config: t.FirecrawlScraperConfig = {}) {\n this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';\n\n const baseUrl =\n config.apiUrl ??\n process.env.FIRECRAWL_BASE_URL ??\n 'https://api.firecrawl.dev';\n this.apiUrl = `${baseUrl.replace(/\\/+$/, '')}/v1/scrape`;\n\n this.defaultFormats = config.formats ?? ['markdown', 'html'];\n this.timeout = config.timeout ?? 15000;\n\n this.logger = config.logger || createDefaultLogger();\n\n if (!this.apiKey) {\n this.logger.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');\n }\n\n this.logger.debug(\n `Firecrawl scraper initialized with API URL: ${this.apiUrl}`\n );\n }\n\n /**\n * Scrape a single URL\n * @param url URL to scrape\n * @param options Scrape options\n * @returns Scrape response\n */\n async scrapeUrl(\n url: string,\n options: t.FirecrawlScrapeOptions = {}\n ): Promise<[string, t.FirecrawlScrapeResponse]> {\n if (!this.apiKey) {\n return [\n url,\n {\n success: false,\n error: 'FIRECRAWL_API_KEY is not set',\n },\n ];\n }\n\n try {\n const response = await axios.post(\n this.apiUrl,\n {\n url,\n formats: options.formats || this.defaultFormats,\n includeTags: options.includeTags,\n excludeTags: options.excludeTags,\n headers: options.headers,\n waitFor: options.waitFor,\n timeout: options.timeout ?? this.timeout,\n },\n {\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`,\n },\n timeout: this.timeout,\n }\n );\n\n return [url, response.data];\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return [\n url,\n {\n success: false,\n error: `Firecrawl API request failed: ${errorMessage}`,\n },\n ];\n }\n }\n\n /**\n * Extract content from scrape response\n * @param response Scrape response\n * @returns Extracted content or empty string if not available\n */\n extractContent(\n response: t.FirecrawlScrapeResponse\n ): [string, undefined | t.References] {\n if (!response.success || !response.data) {\n return ['', undefined];\n }\n\n if (response.data.markdown != null && response.data.html != null) {\n try {\n const { markdown, ...rest } = processContent(\n response.data.html,\n response.data.markdown\n );\n return [markdown, rest];\n } catch (error) {\n this.logger.error('Error processing content:', error);\n return [response.data.markdown, undefined];\n }\n } else if (response.data.markdown != null) {\n return [response.data.markdown, undefined];\n }\n\n // Fall back to HTML content\n if (response.data.html != null) {\n return [response.data.html, undefined];\n }\n\n // Fall back to raw HTML content\n if (response.data.rawHtml != null) {\n return [response.data.rawHtml, undefined];\n }\n\n return ['', undefined];\n }\n\n /**\n * Extract metadata from scrape response\n * @param response Scrape response\n * @returns Metadata object\n */\n extractMetadata(response: t.FirecrawlScrapeResponse): t.ScrapeMetadata {\n if (!response.success || !response.data || !response.data.metadata) {\n return {};\n }\n\n return response.data.metadata;\n }\n}\n\n/**\n * Create a Firecrawl scraper instance\n * @param config Scraper configuration\n * @returns Firecrawl scraper instance\n */\nexport const createFirecrawlScraper = (\n config: t.FirecrawlScraperConfig = {}\n): FirecrawlScraper => {\n return new FirecrawlScraper(config);\n};\n"],"names":[],"mappings":";;;;AAKA;;;AAGG;MACU,gBAAgB,CAAA;AACnB,IAAA,MAAM;AACN,IAAA,MAAM;AACN,IAAA,cAAc;AACd,IAAA,OAAO;AACP,IAAA,MAAM;AAEd,IAAA,WAAA,CAAY,SAAmC,EAAE,EAAA;AAC/C,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,EAAE;AAElE,QAAA,MAAM,OAAO,GACX,MAAM,CAAC,MAAM;YACb,OAAO,CAAC,GAAG,CAAC,kBAAkB;AAC9B,YAAA,2BAA2B;AAC7B,QAAA,IAAI,CAAC,MAAM,GAAG,CAAA,EAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,YAAY;AAExD,QAAA,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC,OAAO,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC;QAC5D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,KAAK;QAEtC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,mBAAmB,EAAE;AAEpD,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;AAChB,YAAA,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,uDAAuD,CAAC;;QAG3E,IAAI,CAAC,MAAM,CAAC,KAAK,CACf,CAA+C,4CAAA,EAAA,IAAI,CAAC,MAAM,CAAE,CAAA,CAC7D;;AAGH;;;;;AAKG;AACH,IAAA,MAAM,SAAS,CACb,GAAW,EACX,UAAoC,EAAE,EAAA;AAEtC,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;YAChB,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;AACd,oBAAA,KAAK,EAAE,8BAA8B;AACtC,iBAAA;aACF;;AAGH,QAAA,IAAI;YACF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,IAAI,CAAC,MAAM,EACX;gBACE,GAAG;AACH,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc;gBAC/C,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,OAAO,EAAE,OAAO,CAAC,OAAO;gBACxB,OAAO,EAAE,OAAO,CAAC,OAAO;AACxB,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;aACzC,EACD;AACE,gBAAA,OAAO,EAAE;AACP,oBAAA,cAAc,EAAE,kBAAkB;AAClC,oBAAA,aAAa,EAAE,CAAA,OAAA,EAAU,IAAI,CAAC,MAAM,CAAE,CAAA;AACvC,iBAAA;gBACD,OAAO,EAAE,IAAI,CAAC,OAAO;AACtB,aAAA,CACF;AAED,YAAA,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,CAAC;;QAC3B,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACxD,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,CAAiC,8BAAA,EAAA,YAAY,CAAE,CAAA;AACvD,iBAAA;aACF;;;AAIL;;;;AAIG;AACH,IAAA,cAAc,CACZ,QAAmC,EAAA;QAEnC,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;AACvC,YAAA,OAAO,CAAC,EAAE,EAAE,SAAS,CAAC;;AAGxB,QAAA,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;AAChE,YAAA,IAAI;gBACF,MAAM,EAAE,QAAQ,EAAE,GAAG,IAAI,EAAE,GAAG,cAAc,CAC1C,QAAQ,CAAC,IAAI,CAAC,IAAI,EAClB,QAAQ,CAAC,IAAI,CAAC,QAAQ,CACvB;AACD,gBAAA,OAAO,CAAC,QAAQ,EAAE,IAAI,CAAC;;YACvB,OAAO,KAAK,EAAE;gBACd,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,2BAA2B,EAAE,KAAK,CAAC;gBACrD,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC;;;aAEvC,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,EAAE;YACzC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC;;;QAI5C,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;YAC9B,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,SAAS,CAAC;;;QAIxC,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,IAAI,EAAE;YACjC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,CAAC;;AAG3C,QAAA,OAAO,CAAC,EAAE,EAAE,SAAS,CAAC;;AAGxB;;;;AAIG;AACH,IAAA,eAAe,CAAC,QAAmC,EAAA;AACjD,QAAA,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE;AAClE,YAAA,OAAO,EAAE;;AAGX,QAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,QAAQ;;AAEhC;AAED;;;;AAIG;MACU,sBAAsB,GAAG,CACpC,MAAmC,GAAA,EAAE,KACjB;AACpB,IAAA,OAAO,IAAI,gBAAgB,CAAC,MAAM,CAAC;AACrC;;;;"}
|
|
@@ -1,113 +1,200 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
import { getDomainName, fileExtRegex } from './utils.mjs';
|
|
2
|
+
|
|
3
|
+
function addHighlightSection() {
|
|
4
|
+
return ['\n## Highlights', ''];
|
|
5
|
+
}
|
|
6
|
+
// Helper function to format a source (organic or top story)
|
|
7
|
+
function formatSource(source, index, turn, sourceType, references) {
|
|
8
|
+
/** Array of all lines to include in the output */
|
|
9
|
+
const outputLines = [];
|
|
10
|
+
// Add the title
|
|
11
|
+
outputLines.push(`# ${sourceType.charAt(0).toUpperCase() + sourceType.slice(1)} ${index}: ${source.title != null && source.title ? `"${source.title}"` : '(no title)'}`);
|
|
12
|
+
outputLines.push(`\nAnchor: \\ue202turn${turn}${sourceType}${index}`);
|
|
13
|
+
outputLines.push(`URL: ${source.link}`);
|
|
14
|
+
// Add optional fields
|
|
15
|
+
if ('snippet' in source && source.snippet != null) {
|
|
16
|
+
outputLines.push(`Summary: ${source.snippet}`);
|
|
17
|
+
}
|
|
18
|
+
if (source.date != null) {
|
|
19
|
+
outputLines.push(`Date: ${source.date}`);
|
|
20
|
+
}
|
|
21
|
+
if (source.attribution != null) {
|
|
22
|
+
outputLines.push(`Source: ${source.attribution}`);
|
|
23
|
+
}
|
|
24
|
+
// Add highlight section or empty line
|
|
25
|
+
if ((source.highlights?.length ?? 0) > 0) {
|
|
26
|
+
outputLines.push(...addHighlightSection());
|
|
27
|
+
}
|
|
28
|
+
else {
|
|
29
|
+
outputLines.push('');
|
|
30
|
+
}
|
|
31
|
+
// Process highlights if they exist
|
|
32
|
+
(source.highlights ?? [])
|
|
33
|
+
.filter((h) => h.text.trim().length > 0)
|
|
34
|
+
.forEach((h, hIndex) => {
|
|
35
|
+
outputLines.push(`### Highlight ${hIndex + 1} [Relevance: ${h.score.toFixed(2)}]`);
|
|
36
|
+
outputLines.push('');
|
|
37
|
+
outputLines.push('```text');
|
|
38
|
+
outputLines.push(h.text.trim());
|
|
39
|
+
outputLines.push('```');
|
|
40
|
+
outputLines.push('');
|
|
41
|
+
if (h.references != null && h.references.length) {
|
|
42
|
+
let hasHeader = false;
|
|
43
|
+
const refLines = [];
|
|
44
|
+
for (let j = 0; j < h.references.length; j++) {
|
|
45
|
+
const ref = h.references[j];
|
|
46
|
+
if (ref.reference.originalUrl.includes('mailto:')) {
|
|
47
|
+
continue;
|
|
48
|
+
}
|
|
49
|
+
references.push({
|
|
50
|
+
type: ref.type,
|
|
51
|
+
link: ref.reference.originalUrl,
|
|
52
|
+
attribution: getDomainName(ref.reference.originalUrl),
|
|
53
|
+
title: (((ref.reference.title ?? '') || ref.reference.text) ??
|
|
54
|
+
'').split('\n')[0],
|
|
55
|
+
});
|
|
56
|
+
if (ref.type !== 'link') {
|
|
57
|
+
continue;
|
|
58
|
+
}
|
|
59
|
+
if (fileExtRegex.test(ref.reference.originalUrl)) {
|
|
60
|
+
continue;
|
|
61
|
+
}
|
|
62
|
+
if (!hasHeader) {
|
|
63
|
+
refLines.push('Core References:');
|
|
64
|
+
hasHeader = true;
|
|
65
|
+
}
|
|
66
|
+
refLines.push(`- ${ref.type}#${ref.originalIndex + 1}: ${ref.reference.originalUrl}`);
|
|
67
|
+
refLines.push(`\t- Anchor: \\ue202turn${turn}ref${references.length - 1}`);
|
|
68
|
+
}
|
|
69
|
+
if (hasHeader) {
|
|
70
|
+
outputLines.push(...refLines);
|
|
71
|
+
outputLines.push('');
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
if (hIndex < (source.highlights?.length ?? 0) - 1) {
|
|
75
|
+
outputLines.push('---');
|
|
76
|
+
outputLines.push('');
|
|
77
|
+
}
|
|
78
|
+
});
|
|
79
|
+
outputLines.push('');
|
|
80
|
+
return outputLines.join('\n');
|
|
81
|
+
}
|
|
82
|
+
function formatResultsForLLM(turn, results) {
|
|
83
|
+
/** Array to collect all output lines */
|
|
84
|
+
const outputLines = [];
|
|
3
85
|
const addSection = (title) => {
|
|
4
|
-
|
|
86
|
+
outputLines.push('');
|
|
87
|
+
outputLines.push(`=== ${title} ===`);
|
|
88
|
+
outputLines.push('');
|
|
5
89
|
};
|
|
90
|
+
const references = [];
|
|
6
91
|
// Organic (web) results
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
.join('\n');
|
|
27
|
-
});
|
|
92
|
+
if (results.organic?.length != null && results.organic.length > 0) {
|
|
93
|
+
addSection(`Web Results, Turn ${turn}`);
|
|
94
|
+
for (let i = 0; i < results.organic.length; i++) {
|
|
95
|
+
const r = results.organic[i];
|
|
96
|
+
outputLines.push(formatSource(r, i, turn, 'search', references));
|
|
97
|
+
delete results.organic[i].highlights;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
// Top stories (news)
|
|
101
|
+
const topStories = results.topStories ?? [];
|
|
102
|
+
if (topStories.length) {
|
|
103
|
+
addSection('News Results');
|
|
104
|
+
for (let i = 0; i < topStories.length; i++) {
|
|
105
|
+
const r = topStories[i];
|
|
106
|
+
outputLines.push(formatSource(r, i, turn, 'news', references));
|
|
107
|
+
if (results.topStories?.[i]?.highlights) {
|
|
108
|
+
delete results.topStories[i].highlights;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
28
111
|
}
|
|
29
|
-
// Ignoring these sections for now
|
|
30
|
-
// // Top stories (news)
|
|
31
|
-
// const topStores = results.topStories ?? [];
|
|
32
|
-
// if (topStores.length) {
|
|
33
|
-
// addSection('News Results');
|
|
34
|
-
// topStores.forEach((r, i) => {
|
|
35
|
-
// output += [
|
|
36
|
-
// `Anchor: \ue202turn0news${i + 1}`,
|
|
37
|
-
// `Title: ${r.title ?? '(no title)'}`,
|
|
38
|
-
// `URL: ${r.link}`,
|
|
39
|
-
// r.snippet != null ? `Snippet: ${r.snippet}` : '',
|
|
40
|
-
// r.date != null ? `Date: ${r.date}` : '',
|
|
41
|
-
// r.attribution != null ? `Source: ${r.attribution}` : '',
|
|
42
|
-
// ''
|
|
43
|
-
// ].filter(Boolean).join('\n');
|
|
44
|
-
// });
|
|
45
|
-
// }
|
|
46
112
|
// // Images
|
|
47
113
|
// const images = results.images ?? [];
|
|
48
114
|
// if (images.length) {
|
|
49
115
|
// addSection('Image Results');
|
|
50
|
-
// images.
|
|
51
|
-
//
|
|
52
|
-
//
|
|
53
|
-
//
|
|
54
|
-
//
|
|
55
|
-
//
|
|
56
|
-
//
|
|
57
|
-
// });
|
|
116
|
+
// const imageLines = images.map((img, i) => [
|
|
117
|
+
// `Anchor: \ue202turn0image${i}`,
|
|
118
|
+
// `Title: ${img.title ?? '(no title)'}`,
|
|
119
|
+
// `Image URL: ${img.imageUrl}`,
|
|
120
|
+
// ''
|
|
121
|
+
// ].join('\n'));
|
|
122
|
+
// outputLines.push(imageLines.join('\n'));
|
|
58
123
|
// }
|
|
59
124
|
// Knowledge Graph
|
|
60
125
|
if (results.knowledgeGraph != null) {
|
|
61
126
|
addSection('Knowledge Graph');
|
|
62
|
-
|
|
63
|
-
|
|
127
|
+
const kgLines = [
|
|
128
|
+
`**Title:** ${results.knowledgeGraph.title ?? '(no title)'}`,
|
|
129
|
+
results.knowledgeGraph.type != null
|
|
130
|
+
? `**Type:** ${results.knowledgeGraph.type}`
|
|
131
|
+
: '',
|
|
64
132
|
results.knowledgeGraph.description != null
|
|
65
|
-
?
|
|
133
|
+
? `**Description:** ${results.knowledgeGraph.description}`
|
|
66
134
|
: '',
|
|
67
|
-
results.knowledgeGraph.
|
|
68
|
-
?
|
|
135
|
+
results.knowledgeGraph.descriptionSource != null
|
|
136
|
+
? `**Description Source:** ${results.knowledgeGraph.descriptionSource}`
|
|
137
|
+
: '',
|
|
138
|
+
results.knowledgeGraph.descriptionLink != null
|
|
139
|
+
? `**Description Link:** ${results.knowledgeGraph.descriptionLink}`
|
|
69
140
|
: '',
|
|
70
141
|
results.knowledgeGraph.imageUrl != null
|
|
71
|
-
?
|
|
142
|
+
? `**Image URL:** ${results.knowledgeGraph.imageUrl}`
|
|
143
|
+
: '',
|
|
144
|
+
results.knowledgeGraph.website != null
|
|
145
|
+
? `**Website:** ${results.knowledgeGraph.website}`
|
|
72
146
|
: '',
|
|
73
147
|
results.knowledgeGraph.attributes != null
|
|
74
|
-
?
|
|
148
|
+
? `**Attributes:**\n\`\`\`json\n${JSON.stringify(results.knowledgeGraph.attributes, null, 2)}\n\`\`\``
|
|
75
149
|
: '',
|
|
76
150
|
'',
|
|
77
|
-
]
|
|
78
|
-
|
|
79
|
-
.join('\n');
|
|
151
|
+
].filter(Boolean);
|
|
152
|
+
outputLines.push(kgLines.join('\n\n'));
|
|
80
153
|
}
|
|
81
154
|
// Answer Box
|
|
82
155
|
if (results.answerBox != null) {
|
|
83
156
|
addSection('Answer Box');
|
|
84
|
-
|
|
157
|
+
const abLines = [
|
|
85
158
|
results.answerBox.title != null
|
|
86
|
-
?
|
|
87
|
-
: '',
|
|
88
|
-
results.answerBox.answer != null
|
|
89
|
-
? `Answer: ${results.answerBox.answer}`
|
|
159
|
+
? `**Title:** ${results.answerBox.title}`
|
|
90
160
|
: '',
|
|
91
161
|
results.answerBox.snippet != null
|
|
92
|
-
?
|
|
162
|
+
? `**Snippet:** ${results.answerBox.snippet}`
|
|
163
|
+
: '',
|
|
164
|
+
results.answerBox.snippetHighlighted != null
|
|
165
|
+
? `**Snippet Highlighted:** ${results.answerBox.snippetHighlighted
|
|
166
|
+
.map((s) => `\`${s}\``)
|
|
167
|
+
.join(' ')}`
|
|
168
|
+
: '',
|
|
169
|
+
results.answerBox.link != null
|
|
170
|
+
? `**Link:** ${results.answerBox.link}`
|
|
93
171
|
: '',
|
|
94
|
-
results.answerBox.date != null ? `Date: ${results.answerBox.date}` : '',
|
|
95
172
|
'',
|
|
96
|
-
]
|
|
97
|
-
|
|
98
|
-
.join('\n');
|
|
173
|
+
].filter(Boolean);
|
|
174
|
+
outputLines.push(abLines.join('\n\n'));
|
|
99
175
|
}
|
|
100
176
|
// People also ask
|
|
101
177
|
const peopleAlsoAsk = results.peopleAlsoAsk ?? [];
|
|
102
178
|
if (peopleAlsoAsk.length) {
|
|
103
179
|
addSection('People Also Ask');
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
180
|
+
const paaLines = [];
|
|
181
|
+
peopleAlsoAsk.forEach((p, i) => {
|
|
182
|
+
const questionLines = [
|
|
183
|
+
`### Question ${i + 1}:`,
|
|
184
|
+
`"${p.question}"`,
|
|
185
|
+
`${p.snippet != null && p.snippet ? `Snippet: ${p.snippet}` : ''}`,
|
|
186
|
+
`${p.title != null && p.title ? `Title: ${p.title}` : ''}`,
|
|
187
|
+
`${p.link != null && p.link ? `Link: ${p.link}` : ''}`,
|
|
188
|
+
'',
|
|
189
|
+
].filter(Boolean);
|
|
190
|
+
paaLines.push(questionLines.join('\n\n'));
|
|
108
191
|
});
|
|
192
|
+
outputLines.push(paaLines.join(''));
|
|
109
193
|
}
|
|
110
|
-
return
|
|
194
|
+
return {
|
|
195
|
+
output: outputLines.join('\n').trim(),
|
|
196
|
+
references,
|
|
197
|
+
};
|
|
111
198
|
}
|
|
112
199
|
|
|
113
200
|
export { formatResultsForLLM };
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"format.mjs","sources":["../../../../src/tools/search/format.ts"],"sourcesContent":["import type * as t from './types';\n\nexport function formatResultsForLLM(results: t.SearchResultData): string {\n let output = '';\n\n const addSection = (title: string): void => {\n output += `\\n=== ${title} ===\\n`;\n };\n\n // Organic (web) results\n const organic = results.organic ?? [];\n if (organic.length) {\n addSection('Web Results');\n organic.forEach((r, i) => {\n output += [\n `Source ${i + 1}: ${r.title ?? '(no title)'}`,\n `Citation Anchor: \\\\ue202turn0search${i + 1}`,\n `URL: ${r.link}`,\n r.snippet != null ? `Summary: ${r.snippet}` : '',\n r.date != null ? `Date: ${r.date}` : '',\n r.attribution != null ? `Source: ${r.attribution}` : '',\n '',\n '--- Content Highlights ---',\n ...(r.highlights ?? [])\n .filter((h) => h.text.trim().length > 0)\n .map((h) => `[Relevance: ${h.score.toFixed(2)}]\\n${h.text.trim()}`),\n '',\n ]\n .filter(Boolean)\n .join('\\n');\n });\n }\n\n // Ignoring these sections for now\n // // Top stories (news)\n // const topStores = results.topStories ?? [];\n // if (topStores.length) {\n // addSection('News Results');\n // topStores.forEach((r, i) => {\n // output += [\n // `Anchor: \\ue202turn0news${i + 1}`,\n // `Title: ${r.title ?? '(no title)'}`,\n // `URL: ${r.link}`,\n // r.snippet != null ? `Snippet: ${r.snippet}` : '',\n // r.date != null ? `Date: ${r.date}` : '',\n // r.attribution != null ? `Source: ${r.attribution}` : '',\n // ''\n // ].filter(Boolean).join('\\n');\n // });\n // }\n\n // // Images\n // const images = results.images ?? [];\n // if (images.length) {\n // addSection('Image Results');\n // images.forEach((img, i) => {\n // output += [\n // `Anchor: \\ue202turn0image${i + 1}`,\n // `Title: ${img.title ?? '(no title)'}`,\n // `Image URL: ${img.imageUrl}`,\n // ''\n // ].join('\\n');\n // });\n // }\n\n // Knowledge Graph\n if (results.knowledgeGraph != null) {\n addSection('Knowledge Graph');\n output += [\n `Title: ${results.knowledgeGraph.title ?? '(no title)'}`,\n results.knowledgeGraph.description != null\n ? `Description: ${results.knowledgeGraph.description}`\n : '',\n results.knowledgeGraph.type != null\n ? `Type: ${results.knowledgeGraph.type}`\n : '',\n results.knowledgeGraph.imageUrl != null\n ? `Image URL: ${results.knowledgeGraph.imageUrl}`\n : '',\n results.knowledgeGraph.attributes != null\n ? `Attributes: ${JSON.stringify(results.knowledgeGraph.attributes, null, 2)}`\n : '',\n '',\n ]\n .filter(Boolean)\n .join('\\n');\n }\n\n // Answer Box\n if (results.answerBox != null) {\n addSection('Answer Box');\n output += [\n results.answerBox.title != null\n ? `Title: ${results.answerBox.title}`\n : '',\n results.answerBox.answer != null\n ? `Answer: ${results.answerBox.answer}`\n : '',\n results.answerBox.snippet != null\n ? `Snippet: ${results.answerBox.snippet}`\n : '',\n results.answerBox.date != null ? `Date: ${results.answerBox.date}` : '',\n '',\n ]\n .filter(Boolean)\n .join('\\n');\n }\n\n // People also ask\n const peopleAlsoAsk = results.peopleAlsoAsk ?? [];\n if (peopleAlsoAsk.length) {\n addSection('People Also Ask');\n peopleAlsoAsk.forEach((p, _i) => {\n output += [`Q: ${p.question}`, `A: ${p.answer}`, '']\n .filter(Boolean)\n .join('\\n');\n });\n }\n\n return output.trim();\n}\n"],"names":[],"mappings":"AAEM,SAAU,mBAAmB,CAAC,OAA2B,EAAA;IAC7D,IAAI,MAAM,GAAG,EAAE;AAEf,IAAA,MAAM,UAAU,GAAG,CAAC,KAAa,KAAU;AACzC,QAAA,MAAM,IAAI,CAAA,MAAA,EAAS,KAAK,CAAA,MAAA,CAAQ;AAClC,KAAC;;AAGD,IAAA,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,EAAE;AACrC,IAAA,IAAI,OAAO,CAAC,MAAM,EAAE;QAClB,UAAU,CAAC,aAAa,CAAC;QACzB,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,KAAI;AACvB,YAAA,MAAM,IAAI;gBACR,CAAU,OAAA,EAAA,CAAC,GAAG,CAAC,CAAA,EAAA,EAAK,CAAC,CAAC,KAAK,IAAI,YAAY,CAAE,CAAA;gBAC7C,CAAsC,mCAAA,EAAA,CAAC,GAAG,CAAC,CAAE,CAAA;gBAC7C,CAAQ,KAAA,EAAA,CAAC,CAAC,IAAI,CAAE,CAAA;AAChB,gBAAA,CAAC,CAAC,OAAO,IAAI,IAAI,GAAG,CAAY,SAAA,EAAA,CAAC,CAAC,OAAO,CAAA,CAAE,GAAG,EAAE;AAChD,gBAAA,CAAC,CAAC,IAAI,IAAI,IAAI,GAAG,CAAS,MAAA,EAAA,CAAC,CAAC,IAAI,CAAA,CAAE,GAAG,EAAE;AACvC,gBAAA,CAAC,CAAC,WAAW,IAAI,IAAI,GAAG,CAAW,QAAA,EAAA,CAAC,CAAC,WAAW,CAAA,CAAE,GAAG,EAAE;gBACvD,EAAE;gBACF,4BAA4B;AAC5B,gBAAA,GAAG,CAAC,CAAC,CAAC,UAAU,IAAI,EAAE;AACnB,qBAAA,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC;qBACtC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAA,YAAA,EAAe,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAA,GAAA,EAAM,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAA,CAAE,CAAC;gBACrE,EAAE;AACH;iBACE,MAAM,CAAC,OAAO;iBACd,IAAI,CAAC,IAAI,CAAC;AACf,SAAC,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAoCJ,IAAA,IAAI,OAAO,CAAC,cAAc,IAAI,IAAI,EAAE;QAClC,UAAU,CAAC,iBAAiB,CAAC;AAC7B,QAAA,MAAM,IAAI;AACR,YAAA,CAAA,OAAA,EAAU,OAAO,CAAC,cAAc,CAAC,KAAK,IAAI,YAAY,CAAE,CAAA;AACxD,YAAA,OAAO,CAAC,cAAc,CAAC,WAAW,IAAI;AACpC,kBAAE,CAAgB,aAAA,EAAA,OAAO,CAAC,cAAc,CAAC,WAAW,CAAE;AACtD,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,IAAI,IAAI;AAC7B,kBAAE,CAAS,MAAA,EAAA,OAAO,CAAC,cAAc,CAAC,IAAI,CAAE;AACxC,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,QAAQ,IAAI;AACjC,kBAAE,CAAc,WAAA,EAAA,OAAO,CAAC,cAAc,CAAC,QAAQ,CAAE;AACjD,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,UAAU,IAAI;AACnC,kBAAE,CAAe,YAAA,EAAA,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,cAAc,CAAC,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC,CAAE;AAC7E,kBAAE,EAAE;YACN,EAAE;AACH;aACE,MAAM,CAAC,OAAO;aACd,IAAI,CAAC,IAAI,CAAC;;;AAIf,IAAA,IAAI,OAAO,CAAC,SAAS,IAAI,IAAI,EAAE;QAC7B,UAAU,CAAC,YAAY,CAAC;AACxB,QAAA,MAAM,IAAI;AACR,YAAA,OAAO,CAAC,SAAS,CAAC,KAAK,IAAI;AACzB,kBAAE,CAAU,OAAA,EAAA,OAAO,CAAC,SAAS,CAAC,KAAK,CAAE;AACrC,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,SAAS,CAAC,MAAM,IAAI;AAC1B,kBAAE,CAAW,QAAA,EAAA,OAAO,CAAC,SAAS,CAAC,MAAM,CAAE;AACvC,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,SAAS,CAAC,OAAO,IAAI;AAC3B,kBAAE,CAAY,SAAA,EAAA,OAAO,CAAC,SAAS,CAAC,OAAO,CAAE;AACzC,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,SAAS,CAAC,IAAI,IAAI,IAAI,GAAG,SAAS,OAAO,CAAC,SAAS,CAAC,IAAI,EAAE,GAAG,EAAE;YACvE,EAAE;AACH;aACE,MAAM,CAAC,OAAO;aACd,IAAI,CAAC,IAAI,CAAC;;;AAIf,IAAA,MAAM,aAAa,GAAG,OAAO,CAAC,aAAa,IAAI,EAAE;AACjD,IAAA,IAAI,aAAa,CAAC,MAAM,EAAE;QACxB,UAAU,CAAC,iBAAiB,CAAC;QAC7B,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,KAAI;AAC9B,YAAA,MAAM,IAAI,CAAC,CAAM,GAAA,EAAA,CAAC,CAAC,QAAQ,CAAA,CAAE,EAAE,CAAA,GAAA,EAAM,CAAC,CAAC,MAAM,CAAE,CAAA,EAAE,EAAE;iBAChD,MAAM,CAAC,OAAO;iBACd,IAAI,CAAC,IAAI,CAAC;AACf,SAAC,CAAC;;AAGJ,IAAA,OAAO,MAAM,CAAC,IAAI,EAAE;AACtB;;;;"}
|
|
1
|
+
{"version":3,"file":"format.mjs","sources":["../../../../src/tools/search/format.ts"],"sourcesContent":["import type * as t from './types';\nimport { getDomainName, fileExtRegex } from './utils';\n\nfunction addHighlightSection(): string[] {\n return ['\\n## Highlights', ''];\n}\n\n// Helper function to format a source (organic or top story)\nfunction formatSource(\n source: t.ValidSource,\n index: number,\n turn: number,\n sourceType: 'search' | 'news',\n references: t.ResultReference[]\n): string {\n /** Array of all lines to include in the output */\n const outputLines: string[] = [];\n\n // Add the title\n outputLines.push(\n `# ${sourceType.charAt(0).toUpperCase() + sourceType.slice(1)} ${index}: ${source.title != null && source.title ? `\"${source.title}\"` : '(no title)'}`\n );\n outputLines.push(`\\nAnchor: \\\\ue202turn${turn}${sourceType}${index}`);\n outputLines.push(`URL: ${source.link}`);\n\n // Add optional fields\n if ('snippet' in source && source.snippet != null) {\n outputLines.push(`Summary: ${source.snippet}`);\n }\n\n if (source.date != null) {\n outputLines.push(`Date: ${source.date}`);\n }\n\n if (source.attribution != null) {\n outputLines.push(`Source: ${source.attribution}`);\n }\n\n // Add highlight section or empty line\n if ((source.highlights?.length ?? 0) > 0) {\n outputLines.push(...addHighlightSection());\n } else {\n outputLines.push('');\n }\n\n // Process highlights if they exist\n (source.highlights ?? [])\n .filter((h) => h.text.trim().length > 0)\n .forEach((h, hIndex) => {\n outputLines.push(\n `### Highlight ${hIndex + 1} [Relevance: ${h.score.toFixed(2)}]`\n );\n outputLines.push('');\n outputLines.push('```text');\n outputLines.push(h.text.trim());\n outputLines.push('```');\n outputLines.push('');\n\n if (h.references != null && h.references.length) {\n let hasHeader = false;\n const refLines: string[] = [];\n\n for (let j = 0; j < h.references.length; j++) {\n const ref = h.references[j];\n if (ref.reference.originalUrl.includes('mailto:')) {\n continue;\n }\n references.push({\n type: ref.type,\n link: ref.reference.originalUrl,\n attribution: getDomainName(ref.reference.originalUrl),\n title: (\n ((ref.reference.title ?? '') || ref.reference.text) ??\n ''\n ).split('\\n')[0],\n });\n\n if (ref.type !== 'link') {\n continue;\n }\n\n if (fileExtRegex.test(ref.reference.originalUrl)) {\n continue;\n }\n\n if (!hasHeader) {\n refLines.push('Core References:');\n hasHeader = true;\n }\n\n refLines.push(\n `- ${ref.type}#${ref.originalIndex + 1}: ${ref.reference.originalUrl}`\n );\n refLines.push(\n `\\t- Anchor: \\\\ue202turn${turn}ref${references.length - 1}`\n );\n }\n\n if (hasHeader) {\n outputLines.push(...refLines);\n outputLines.push('');\n }\n }\n\n if (hIndex < (source.highlights?.length ?? 0) - 1) {\n outputLines.push('---');\n outputLines.push('');\n }\n });\n\n outputLines.push('');\n return outputLines.join('\\n');\n}\n\nexport function formatResultsForLLM(\n turn: number,\n results: t.SearchResultData\n): { output: string; references: t.ResultReference[] } {\n /** Array to collect all output lines */\n const outputLines: string[] = [];\n\n const addSection = (title: string): void => {\n outputLines.push('');\n outputLines.push(`=== ${title} ===`);\n outputLines.push('');\n };\n\n const references: t.ResultReference[] = [];\n\n // Organic (web) results\n if (results.organic?.length != null && results.organic.length > 0) {\n addSection(`Web Results, Turn ${turn}`);\n for (let i = 0; i < results.organic.length; i++) {\n const r = results.organic[i];\n outputLines.push(formatSource(r, i, turn, 'search', references));\n delete results.organic[i].highlights;\n }\n }\n\n // Top stories (news)\n const topStories = results.topStories ?? [];\n if (topStories.length) {\n addSection('News Results');\n for (let i = 0; i < topStories.length; i++) {\n const r = topStories[i];\n outputLines.push(formatSource(r, i, turn, 'news', references));\n if (results.topStories?.[i]?.highlights) {\n delete results.topStories[i].highlights;\n }\n }\n }\n\n // // Images\n // const images = results.images ?? [];\n // if (images.length) {\n // addSection('Image Results');\n // const imageLines = images.map((img, i) => [\n // `Anchor: \\ue202turn0image${i}`,\n // `Title: ${img.title ?? '(no title)'}`,\n // `Image URL: ${img.imageUrl}`,\n // ''\n // ].join('\\n'));\n // outputLines.push(imageLines.join('\\n'));\n // }\n\n // Knowledge Graph\n if (results.knowledgeGraph != null) {\n addSection('Knowledge Graph');\n const kgLines = [\n `**Title:** ${results.knowledgeGraph.title ?? '(no title)'}`,\n results.knowledgeGraph.type != null\n ? `**Type:** ${results.knowledgeGraph.type}`\n : '',\n results.knowledgeGraph.description != null\n ? `**Description:** ${results.knowledgeGraph.description}`\n : '',\n results.knowledgeGraph.descriptionSource != null\n ? `**Description Source:** ${results.knowledgeGraph.descriptionSource}`\n : '',\n results.knowledgeGraph.descriptionLink != null\n ? `**Description Link:** ${results.knowledgeGraph.descriptionLink}`\n : '',\n results.knowledgeGraph.imageUrl != null\n ? `**Image URL:** ${results.knowledgeGraph.imageUrl}`\n : '',\n results.knowledgeGraph.website != null\n ? `**Website:** ${results.knowledgeGraph.website}`\n : '',\n results.knowledgeGraph.attributes != null\n ? `**Attributes:**\\n\\`\\`\\`json\\n${JSON.stringify(\n results.knowledgeGraph.attributes,\n null,\n 2\n )}\\n\\`\\`\\``\n : '',\n '',\n ].filter(Boolean);\n\n outputLines.push(kgLines.join('\\n\\n'));\n }\n\n // Answer Box\n if (results.answerBox != null) {\n addSection('Answer Box');\n const abLines = [\n results.answerBox.title != null\n ? `**Title:** ${results.answerBox.title}`\n : '',\n results.answerBox.snippet != null\n ? `**Snippet:** ${results.answerBox.snippet}`\n : '',\n results.answerBox.snippetHighlighted != null\n ? `**Snippet Highlighted:** ${results.answerBox.snippetHighlighted\n .map((s) => `\\`${s}\\``)\n .join(' ')}`\n : '',\n results.answerBox.link != null\n ? `**Link:** ${results.answerBox.link}`\n : '',\n '',\n ].filter(Boolean);\n\n outputLines.push(abLines.join('\\n\\n'));\n }\n\n // People also ask\n const peopleAlsoAsk = results.peopleAlsoAsk ?? [];\n if (peopleAlsoAsk.length) {\n addSection('People Also Ask');\n\n const paaLines: string[] = [];\n peopleAlsoAsk.forEach((p, i) => {\n const questionLines = [\n `### Question ${i + 1}:`,\n `\"${p.question}\"`,\n `${p.snippet != null && p.snippet ? `Snippet: ${p.snippet}` : ''}`,\n `${p.title != null && p.title ? `Title: ${p.title}` : ''}`,\n `${p.link != null && p.link ? `Link: ${p.link}` : ''}`,\n '',\n ].filter(Boolean);\n\n paaLines.push(questionLines.join('\\n\\n'));\n });\n\n outputLines.push(paaLines.join(''));\n }\n\n return {\n output: outputLines.join('\\n').trim(),\n references,\n };\n}\n"],"names":[],"mappings":";;AAGA,SAAS,mBAAmB,GAAA;AAC1B,IAAA,OAAO,CAAC,iBAAiB,EAAE,EAAE,CAAC;AAChC;AAEA;AACA,SAAS,YAAY,CACnB,MAAqB,EACrB,KAAa,EACb,IAAY,EACZ,UAA6B,EAC7B,UAA+B,EAAA;;IAG/B,MAAM,WAAW,GAAa,EAAE;;IAGhC,WAAW,CAAC,IAAI,CACd,CAAA,EAAA,EAAK,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAA,CAAA,EAAI,KAAK,CAAA,EAAA,EAAK,MAAM,CAAC,KAAK,IAAI,IAAI,IAAI,MAAM,CAAC,KAAK,GAAG,IAAI,MAAM,CAAC,KAAK,CAAG,CAAA,CAAA,GAAG,YAAY,CAAE,CAAA,CACvJ;IACD,WAAW,CAAC,IAAI,CAAC,CAAwB,qBAAA,EAAA,IAAI,CAAG,EAAA,UAAU,CAAG,EAAA,KAAK,CAAE,CAAA,CAAC;IACrE,WAAW,CAAC,IAAI,CAAC,CAAA,KAAA,EAAQ,MAAM,CAAC,IAAI,CAAE,CAAA,CAAC;;IAGvC,IAAI,SAAS,IAAI,MAAM,IAAI,MAAM,CAAC,OAAO,IAAI,IAAI,EAAE;QACjD,WAAW,CAAC,IAAI,CAAC,CAAA,SAAA,EAAY,MAAM,CAAC,OAAO,CAAE,CAAA,CAAC;;AAGhD,IAAA,IAAI,MAAM,CAAC,IAAI,IAAI,IAAI,EAAE;QACvB,WAAW,CAAC,IAAI,CAAC,CAAA,MAAA,EAAS,MAAM,CAAC,IAAI,CAAE,CAAA,CAAC;;AAG1C,IAAA,IAAI,MAAM,CAAC,WAAW,IAAI,IAAI,EAAE;QAC9B,WAAW,CAAC,IAAI,CAAC,CAAA,QAAA,EAAW,MAAM,CAAC,WAAW,CAAE,CAAA,CAAC;;;AAInD,IAAA,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,MAAM,IAAI,CAAC,IAAI,CAAC,EAAE;AACxC,QAAA,WAAW,CAAC,IAAI,CAAC,GAAG,mBAAmB,EAAE,CAAC;;SACrC;AACL,QAAA,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;;;AAItB,IAAA,CAAC,MAAM,CAAC,UAAU,IAAI,EAAE;AACrB,SAAA,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC;AACtC,SAAA,OAAO,CAAC,CAAC,CAAC,EAAE,MAAM,KAAI;AACrB,QAAA,WAAW,CAAC,IAAI,CACd,iBAAiB,MAAM,GAAG,CAAC,CAAgB,aAAA,EAAA,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAA,CAAA,CAAG,CACjE;AACD,QAAA,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;AACpB,QAAA,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC;QAC3B,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;AAC/B,QAAA,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC;AACvB,QAAA,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;AAEpB,QAAA,IAAI,CAAC,CAAC,UAAU,IAAI,IAAI,IAAI,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE;YAC/C,IAAI,SAAS,GAAG,KAAK;YACrB,MAAM,QAAQ,GAAa,EAAE;AAE7B,YAAA,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAC5C,MAAM,GAAG,GAAG,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC;gBAC3B,IAAI,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE;oBACjD;;gBAEF,UAAU,CAAC,IAAI,CAAC;oBACd,IAAI,EAAE,GAAG,CAAC,IAAI;AACd,oBAAA,IAAI,EAAE,GAAG,CAAC,SAAS,CAAC,WAAW;oBAC/B,WAAW,EAAE,aAAa,CAAC,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC;AACrD,oBAAA,KAAK,EAAE,CACL,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,KAAK,IAAI,EAAE,KAAK,GAAG,CAAC,SAAS,CAAC,IAAI;wBAClD,EAAE,EACF,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACjB,iBAAA,CAAC;AAEF,gBAAA,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE;oBACvB;;gBAGF,IAAI,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC,EAAE;oBAChD;;gBAGF,IAAI,CAAC,SAAS,EAAE;AACd,oBAAA,QAAQ,CAAC,IAAI,CAAC,kBAAkB,CAAC;oBACjC,SAAS,GAAG,IAAI;;gBAGlB,QAAQ,CAAC,IAAI,CACX,CAAA,EAAA,EAAK,GAAG,CAAC,IAAI,IAAI,GAAG,CAAC,aAAa,GAAG,CAAC,KAAK,GAAG,CAAC,SAAS,CAAC,WAAW,CAAE,CAAA,CACvE;AACD,gBAAA,QAAQ,CAAC,IAAI,CACX,CAAA,uBAAA,EAA0B,IAAI,CAAA,GAAA,EAAM,UAAU,CAAC,MAAM,GAAG,CAAC,CAAA,CAAE,CAC5D;;YAGH,IAAI,SAAS,EAAE;AACb,gBAAA,WAAW,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC;AAC7B,gBAAA,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;;;AAIxB,QAAA,IAAI,MAAM,GAAG,CAAC,MAAM,CAAC,UAAU,EAAE,MAAM,IAAI,CAAC,IAAI,CAAC,EAAE;AACjD,YAAA,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC;AACvB,YAAA,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;;AAExB,KAAC,CAAC;AAEJ,IAAA,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;AACpB,IAAA,OAAO,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC;AAC/B;AAEgB,SAAA,mBAAmB,CACjC,IAAY,EACZ,OAA2B,EAAA;;IAG3B,MAAM,WAAW,GAAa,EAAE;AAEhC,IAAA,MAAM,UAAU,GAAG,CAAC,KAAa,KAAU;AACzC,QAAA,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;AACpB,QAAA,WAAW,CAAC,IAAI,CAAC,OAAO,KAAK,CAAA,IAAA,CAAM,CAAC;AACpC,QAAA,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;AACtB,KAAC;IAED,MAAM,UAAU,GAAwB,EAAE;;AAG1C,IAAA,IAAI,OAAO,CAAC,OAAO,EAAE,MAAM,IAAI,IAAI,IAAI,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE;AACjE,QAAA,UAAU,CAAC,CAAA,kBAAA,EAAqB,IAAI,CAAA,CAAE,CAAC;AACvC,QAAA,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YAC/C,MAAM,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC;AAC5B,YAAA,WAAW,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC;YAChE,OAAO,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,UAAU;;;;AAKxC,IAAA,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,EAAE;AAC3C,IAAA,IAAI,UAAU,CAAC,MAAM,EAAE;QACrB,UAAU,CAAC,cAAc,CAAC;AAC1B,QAAA,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;AAC1C,YAAA,MAAM,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC;AACvB,YAAA,WAAW,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;YAC9D,IAAI,OAAO,CAAC,UAAU,GAAG,CAAC,CAAC,EAAE,UAAU,EAAE;gBACvC,OAAO,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,UAAU;;;;;;;;;;;;;;;;;AAmB7C,IAAA,IAAI,OAAO,CAAC,cAAc,IAAI,IAAI,EAAE;QAClC,UAAU,CAAC,iBAAiB,CAAC;AAC7B,QAAA,MAAM,OAAO,GAAG;AACd,YAAA,CAAA,WAAA,EAAc,OAAO,CAAC,cAAc,CAAC,KAAK,IAAI,YAAY,CAAE,CAAA;AAC5D,YAAA,OAAO,CAAC,cAAc,CAAC,IAAI,IAAI;AAC7B,kBAAE,CAAa,UAAA,EAAA,OAAO,CAAC,cAAc,CAAC,IAAI,CAAE;AAC5C,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,WAAW,IAAI;AACpC,kBAAE,CAAoB,iBAAA,EAAA,OAAO,CAAC,cAAc,CAAC,WAAW,CAAE;AAC1D,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,iBAAiB,IAAI;AAC1C,kBAAE,CAA2B,wBAAA,EAAA,OAAO,CAAC,cAAc,CAAC,iBAAiB,CAAE;AACvE,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,eAAe,IAAI;AACxC,kBAAE,CAAyB,sBAAA,EAAA,OAAO,CAAC,cAAc,CAAC,eAAe,CAAE;AACnE,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,QAAQ,IAAI;AACjC,kBAAE,CAAkB,eAAA,EAAA,OAAO,CAAC,cAAc,CAAC,QAAQ,CAAE;AACrD,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,OAAO,IAAI;AAChC,kBAAE,CAAgB,aAAA,EAAA,OAAO,CAAC,cAAc,CAAC,OAAO,CAAE;AAClD,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,UAAU,IAAI;AACnC,kBAAE,CAAgC,6BAAA,EAAA,IAAI,CAAC,SAAS,CAC9C,OAAO,CAAC,cAAc,CAAC,UAAU,EACjC,IAAI,EACJ,CAAC,CACF,CAAU,QAAA;AACX,kBAAE,EAAE;YACN,EAAE;AACH,SAAA,CAAC,MAAM,CAAC,OAAO,CAAC;QAEjB,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;;;AAIxC,IAAA,IAAI,OAAO,CAAC,SAAS,IAAI,IAAI,EAAE;QAC7B,UAAU,CAAC,YAAY,CAAC;AACxB,QAAA,MAAM,OAAO,GAAG;AACd,YAAA,OAAO,CAAC,SAAS,CAAC,KAAK,IAAI;AACzB,kBAAE,CAAc,WAAA,EAAA,OAAO,CAAC,SAAS,CAAC,KAAK,CAAE;AACzC,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,SAAS,CAAC,OAAO,IAAI;AAC3B,kBAAE,CAAgB,aAAA,EAAA,OAAO,CAAC,SAAS,CAAC,OAAO,CAAE;AAC7C,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,SAAS,CAAC,kBAAkB,IAAI;AACtC,kBAAE,CAA4B,yBAAA,EAAA,OAAO,CAAC,SAAS,CAAC;qBAC7C,GAAG,CAAC,CAAC,CAAC,KAAK,CAAA,EAAA,EAAK,CAAC,CAAA,EAAA,CAAI;qBACrB,IAAI,CAAC,GAAG,CAAC,CAAE;AACd,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,SAAS,CAAC,IAAI,IAAI;AACxB,kBAAE,CAAa,UAAA,EAAA,OAAO,CAAC,SAAS,CAAC,IAAI,CAAE;AACvC,kBAAE,EAAE;YACN,EAAE;AACH,SAAA,CAAC,MAAM,CAAC,OAAO,CAAC;QAEjB,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;;;AAIxC,IAAA,MAAM,aAAa,GAAG,OAAO,CAAC,aAAa,IAAI,EAAE;AACjD,IAAA,IAAI,aAAa,CAAC,MAAM,EAAE;QACxB,UAAU,CAAC,iBAAiB,CAAC;QAE7B,MAAM,QAAQ,GAAa,EAAE;QAC7B,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,KAAI;AAC7B,YAAA,MAAM,aAAa,GAAG;gBACpB,CAAgB,aAAA,EAAA,CAAC,GAAG,CAAC,CAAG,CAAA,CAAA;gBACxB,CAAI,CAAA,EAAA,CAAC,CAAC,QAAQ,CAAG,CAAA,CAAA;gBACjB,CAAG,EAAA,CAAC,CAAC,OAAO,IAAI,IAAI,IAAI,CAAC,CAAC,OAAO,GAAG,CAAA,SAAA,EAAY,CAAC,CAAC,OAAO,EAAE,GAAG,EAAE,CAAE,CAAA;gBAClE,CAAG,EAAA,CAAC,CAAC,KAAK,IAAI,IAAI,IAAI,CAAC,CAAC,KAAK,GAAG,CAAA,OAAA,EAAU,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,CAAE,CAAA;gBAC1D,CAAG,EAAA,CAAC,CAAC,IAAI,IAAI,IAAI,IAAI,CAAC,CAAC,IAAI,GAAG,CAAA,MAAA,EAAS,CAAC,CAAC,IAAI,EAAE,GAAG,EAAE,CAAE,CAAA;gBACtD,EAAE;AACH,aAAA,CAAC,MAAM,CAAC,OAAO,CAAC;YAEjB,QAAQ,CAAC,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC3C,SAAC,CAAC;QAEF,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;;IAGrC,OAAO;QACL,MAAM,EAAE,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE;QACrC,UAAU;KACX;AACH;;;;"}
|
|
@@ -101,6 +101,57 @@ function findBestBoundary(text, direction = 'backward') {
|
|
|
101
101
|
// No match found, use character boundary
|
|
102
102
|
return direction === 'backward' ? text.length : 0;
|
|
103
103
|
}
|
|
104
|
+
/**
|
|
105
|
+
* Tracks references used in a highlight without changing their numbers
|
|
106
|
+
*/
|
|
107
|
+
function trackReferencesInHighlight(text, sourceResult // Source containing the original references
|
|
108
|
+
) {
|
|
109
|
+
// Track used references
|
|
110
|
+
const references = [];
|
|
111
|
+
if (!text || text.length === 0 || !text.includes('#')) {
|
|
112
|
+
return { references }; // Early return
|
|
113
|
+
}
|
|
114
|
+
// Quick check for reference markers
|
|
115
|
+
if (!text.includes('link#') &&
|
|
116
|
+
!text.includes('image#') &&
|
|
117
|
+
!text.includes('video#')) {
|
|
118
|
+
return { references };
|
|
119
|
+
}
|
|
120
|
+
// Get references from the source if available
|
|
121
|
+
const sourceRefs = sourceResult.references || {
|
|
122
|
+
links: [],
|
|
123
|
+
images: [],
|
|
124
|
+
videos: [],
|
|
125
|
+
};
|
|
126
|
+
// Find references but don't modify text
|
|
127
|
+
const refRegex = /\((link|image|video)#(\d+)(?:\s+"([^"]*)")?\)/g;
|
|
128
|
+
let match;
|
|
129
|
+
while ((match = refRegex.exec(text)) !== null) {
|
|
130
|
+
const [, type, indexStr] = match;
|
|
131
|
+
const originalIndex = parseInt(indexStr, 10) - 1; // Convert to 0-based
|
|
132
|
+
// Get the source array for this type
|
|
133
|
+
const refType = type;
|
|
134
|
+
const sourceArray = sourceRefs[`${refType}s`];
|
|
135
|
+
// Skip if invalid reference
|
|
136
|
+
if (!sourceArray ||
|
|
137
|
+
originalIndex < 0 ||
|
|
138
|
+
originalIndex >= sourceArray.length) {
|
|
139
|
+
continue; // Skip invalid references
|
|
140
|
+
}
|
|
141
|
+
// Get original reference
|
|
142
|
+
const reference = sourceArray[originalIndex];
|
|
143
|
+
// Track if not already tracked
|
|
144
|
+
const alreadyTracked = references.some((ref) => ref.type === refType && ref.originalIndex === originalIndex);
|
|
145
|
+
if (!alreadyTracked) {
|
|
146
|
+
references.push({
|
|
147
|
+
type: refType,
|
|
148
|
+
originalIndex,
|
|
149
|
+
reference,
|
|
150
|
+
});
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
return { references };
|
|
154
|
+
}
|
|
104
155
|
/**
|
|
105
156
|
* Expand highlights in search results using smart boundary detection.
|
|
106
157
|
*
|
|
@@ -110,19 +161,16 @@ function findBestBoundary(text, direction = 'backward') {
|
|
|
110
161
|
* @param searchResults - Search results object
|
|
111
162
|
* @param mainExpandBy - Primary expansion size on each side (default: 300)
|
|
112
163
|
* @param separatorExpandBy - Additional range to look for separators (default: 150)
|
|
113
|
-
* @returns Copy of search results with expanded highlights
|
|
164
|
+
* @returns Copy of search results with expanded highlights and tracked references
|
|
114
165
|
*/
|
|
115
166
|
function expandHighlights(searchResults, mainExpandBy = 300, separatorExpandBy = 150) {
|
|
116
|
-
//
|
|
167
|
+
// Avoid deep copy - only copy what we modify
|
|
117
168
|
const resultCopy = { ...searchResults };
|
|
118
|
-
|
|
119
|
-
if (resultCopy.organic) {
|
|
169
|
+
if (resultCopy.organic)
|
|
120
170
|
resultCopy.organic = [...resultCopy.organic];
|
|
121
|
-
|
|
122
|
-
if (resultCopy.topStories) {
|
|
171
|
+
if (resultCopy.topStories)
|
|
123
172
|
resultCopy.topStories = [...resultCopy.topStories];
|
|
124
|
-
|
|
125
|
-
// 5. Process the results efficiently
|
|
173
|
+
// Process the results efficiently
|
|
126
174
|
const processResultTypes = ['organic', 'topStories'];
|
|
127
175
|
for (const resultType of processResultTypes) {
|
|
128
176
|
if (!resultCopy[resultType])
|
|
@@ -141,17 +189,18 @@ function expandHighlights(searchResults, mainExpandBy = 300, separatorExpandBy =
|
|
|
141
189
|
const highlights = [];
|
|
142
190
|
// Process each highlight
|
|
143
191
|
for (const highlight of result.highlights) {
|
|
144
|
-
const
|
|
145
|
-
let startPos = content.indexOf(
|
|
146
|
-
let highlightLen =
|
|
192
|
+
const { references } = trackReferencesInHighlight(highlight.text, result);
|
|
193
|
+
let startPos = content.indexOf(highlight.text);
|
|
194
|
+
let highlightLen = highlight.text.length;
|
|
147
195
|
if (startPos === -1) {
|
|
148
196
|
// Try with stripped whitespace
|
|
149
|
-
const strippedHighlight =
|
|
197
|
+
const strippedHighlight = highlight.text.trim();
|
|
150
198
|
startPos = content.indexOf(strippedHighlight);
|
|
151
199
|
if (startPos === -1) {
|
|
152
200
|
highlights.push({
|
|
153
201
|
text: highlight.text,
|
|
154
202
|
score: highlight.score,
|
|
203
|
+
references,
|
|
155
204
|
});
|
|
156
205
|
continue;
|
|
157
206
|
}
|
|
@@ -178,9 +227,12 @@ function expandHighlights(searchResults, mainExpandBy = 300, separatorExpandBy =
|
|
|
178
227
|
highlights.push({
|
|
179
228
|
text: expandedHighlightText,
|
|
180
229
|
score: highlight.score,
|
|
230
|
+
references,
|
|
181
231
|
});
|
|
182
232
|
}
|
|
183
233
|
resultCopy.highlights = highlights;
|
|
234
|
+
delete resultCopy.content;
|
|
235
|
+
delete resultCopy.references;
|
|
184
236
|
return resultCopy;
|
|
185
237
|
});
|
|
186
238
|
}
|