@librechat/agents 2.4.31 → 2.4.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/dist/cjs/events.cjs +3 -3
  2. package/dist/cjs/events.cjs.map +1 -1
  3. package/dist/cjs/graphs/Graph.cjs +2 -1
  4. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  5. package/dist/cjs/main.cjs +5 -2
  6. package/dist/cjs/main.cjs.map +1 -1
  7. package/dist/cjs/messages/ids.cjs +23 -0
  8. package/dist/cjs/messages/ids.cjs.map +1 -0
  9. package/dist/cjs/splitStream.cjs +2 -1
  10. package/dist/cjs/splitStream.cjs.map +1 -1
  11. package/dist/cjs/stream.cjs +87 -154
  12. package/dist/cjs/stream.cjs.map +1 -1
  13. package/dist/cjs/tools/ToolNode.cjs +14 -3
  14. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  15. package/dist/cjs/tools/handlers.cjs +144 -0
  16. package/dist/cjs/tools/handlers.cjs.map +1 -0
  17. package/dist/cjs/tools/search/content.cjs +140 -0
  18. package/dist/cjs/tools/search/content.cjs.map +1 -0
  19. package/dist/cjs/tools/search/firecrawl.cjs +23 -41
  20. package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
  21. package/dist/cjs/tools/search/format.cjs +161 -74
  22. package/dist/cjs/tools/search/format.cjs.map +1 -1
  23. package/dist/cjs/tools/search/highlights.cjs +64 -12
  24. package/dist/cjs/tools/search/highlights.cjs.map +1 -1
  25. package/dist/cjs/tools/search/rerankers.cjs +35 -50
  26. package/dist/cjs/tools/search/rerankers.cjs.map +1 -1
  27. package/dist/cjs/tools/search/schema.cjs +70 -0
  28. package/dist/cjs/tools/search/schema.cjs.map +1 -0
  29. package/dist/cjs/tools/search/search.cjs +153 -69
  30. package/dist/cjs/tools/search/search.cjs.map +1 -1
  31. package/dist/cjs/tools/search/tool.cjs +247 -58
  32. package/dist/cjs/tools/search/tool.cjs.map +1 -1
  33. package/dist/cjs/tools/search/utils.cjs +66 -0
  34. package/dist/cjs/tools/search/utils.cjs.map +1 -0
  35. package/dist/esm/events.mjs +1 -1
  36. package/dist/esm/events.mjs.map +1 -1
  37. package/dist/esm/graphs/Graph.mjs +2 -1
  38. package/dist/esm/graphs/Graph.mjs.map +1 -1
  39. package/dist/esm/main.mjs +3 -1
  40. package/dist/esm/main.mjs.map +1 -1
  41. package/dist/esm/messages/ids.mjs +21 -0
  42. package/dist/esm/messages/ids.mjs.map +1 -0
  43. package/dist/esm/splitStream.mjs +2 -1
  44. package/dist/esm/splitStream.mjs.map +1 -1
  45. package/dist/esm/stream.mjs +87 -152
  46. package/dist/esm/stream.mjs.map +1 -1
  47. package/dist/esm/tools/ToolNode.mjs +14 -3
  48. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  49. package/dist/esm/tools/handlers.mjs +141 -0
  50. package/dist/esm/tools/handlers.mjs.map +1 -0
  51. package/dist/esm/tools/search/content.mjs +119 -0
  52. package/dist/esm/tools/search/content.mjs.map +1 -0
  53. package/dist/esm/tools/search/firecrawl.mjs +24 -41
  54. package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
  55. package/dist/esm/tools/search/format.mjs +161 -74
  56. package/dist/esm/tools/search/format.mjs.map +1 -1
  57. package/dist/esm/tools/search/highlights.mjs +64 -12
  58. package/dist/esm/tools/search/highlights.mjs.map +1 -1
  59. package/dist/esm/tools/search/rerankers.mjs +35 -50
  60. package/dist/esm/tools/search/rerankers.mjs.map +1 -1
  61. package/dist/esm/tools/search/schema.mjs +61 -0
  62. package/dist/esm/tools/search/schema.mjs.map +1 -0
  63. package/dist/esm/tools/search/search.mjs +153 -69
  64. package/dist/esm/tools/search/search.mjs.map +1 -1
  65. package/dist/esm/tools/search/tool.mjs +246 -57
  66. package/dist/esm/tools/search/tool.mjs.map +1 -1
  67. package/dist/esm/tools/search/utils.mjs +61 -0
  68. package/dist/esm/tools/search/utils.mjs.map +1 -0
  69. package/dist/types/graphs/Graph.d.ts +1 -1
  70. package/dist/types/index.d.ts +1 -0
  71. package/dist/types/messages/ids.d.ts +3 -0
  72. package/dist/types/messages/index.d.ts +1 -0
  73. package/dist/types/stream.d.ts +0 -8
  74. package/dist/types/tools/ToolNode.d.ts +6 -0
  75. package/dist/types/tools/example.d.ts +23 -3
  76. package/dist/types/tools/handlers.d.ts +8 -0
  77. package/dist/types/tools/search/content.d.ts +4 -0
  78. package/dist/types/tools/search/firecrawl.d.ts +7 -86
  79. package/dist/types/tools/search/format.d.ts +4 -1
  80. package/dist/types/tools/search/highlights.d.ts +1 -1
  81. package/dist/types/tools/search/rerankers.d.ts +8 -5
  82. package/dist/types/tools/search/schema.d.ts +16 -0
  83. package/dist/types/tools/search/search.d.ts +2 -2
  84. package/dist/types/tools/search/test.d.ts +1 -0
  85. package/dist/types/tools/search/tool.d.ts +25 -4
  86. package/dist/types/tools/search/types.d.ts +443 -53
  87. package/dist/types/tools/search/utils.d.ts +10 -0
  88. package/package.json +9 -7
  89. package/src/events.ts +49 -15
  90. package/src/graphs/Graph.ts +6 -2
  91. package/src/index.ts +1 -0
  92. package/src/messages/ids.ts +26 -0
  93. package/src/messages/index.ts +1 -0
  94. package/src/scripts/search.ts +8 -3
  95. package/src/splitStream.test.ts +132 -71
  96. package/src/splitStream.ts +2 -1
  97. package/src/stream.ts +94 -183
  98. package/src/tools/ToolNode.ts +37 -14
  99. package/src/tools/handlers.ts +167 -0
  100. package/src/tools/search/content.test.ts +173 -0
  101. package/src/tools/search/content.ts +147 -0
  102. package/src/tools/search/firecrawl.ts +36 -148
  103. package/src/tools/search/format.ts +205 -74
  104. package/src/tools/search/highlights.ts +99 -16
  105. package/src/tools/search/output.md +2775 -0
  106. package/src/tools/search/rerankers.ts +50 -62
  107. package/src/tools/search/schema.ts +63 -0
  108. package/src/tools/search/search.ts +232 -116
  109. package/src/tools/search/test.html +884 -0
  110. package/src/tools/search/test.md +643 -0
  111. package/src/tools/search/test.ts +159 -0
  112. package/src/tools/search/tool.ts +363 -87
  113. package/src/tools/search/types.ts +503 -61
  114. package/src/tools/search/utils.ts +79 -0
  115. package/src/utils/llmConfig.ts +1 -1
@@ -1,35 +1,7 @@
1
1
  import axios from 'axios';
2
+ import { processContent } from './content.mjs';
3
+ import { createDefaultLogger } from './utils.mjs';
2
4
 
3
- /* eslint-disable no-console */
4
- const getDomainName = (link, metadata) => {
5
- try {
6
- const url = metadata?.sourceURL ?? metadata?.url ?? (link || '');
7
- const domain = new URL(url).hostname.replace(/^www\./, '');
8
- if (domain) {
9
- return domain;
10
- }
11
- }
12
- catch (e) {
13
- // URL parsing failed
14
- console.error('Error parsing URL:', e);
15
- }
16
- return;
17
- };
18
- function getAttribution(link, metadata) {
19
- if (!metadata)
20
- return getDomainName(link, metadata);
21
- const possibleAttributions = [
22
- metadata.ogSiteName,
23
- metadata['og:site_name'],
24
- metadata.title?.split('|').pop()?.trim(),
25
- metadata['twitter:site']?.replace(/^@/, ''),
26
- ];
27
- const attribution = possibleAttributions.find((attr) => attr != null && typeof attr === 'string' && attr.trim() !== '');
28
- if (attribution != null) {
29
- return attribution;
30
- }
31
- return getDomainName(link, metadata);
32
- }
33
5
  /**
34
6
  * Firecrawl scraper implementation
35
7
  * Uses the Firecrawl API to scrape web pages
@@ -39,6 +11,7 @@ class FirecrawlScraper {
39
11
  apiUrl;
40
12
  defaultFormats;
41
13
  timeout;
14
+ logger;
42
15
  constructor(config = {}) {
43
16
  this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';
44
17
  const baseUrl = config.apiUrl ??
@@ -46,11 +19,12 @@ class FirecrawlScraper {
46
19
  'https://api.firecrawl.dev';
47
20
  this.apiUrl = `${baseUrl.replace(/\/+$/, '')}/v1/scrape`;
48
21
  this.defaultFormats = config.formats ?? ['markdown', 'html'];
49
- this.timeout = config.timeout ?? 30000;
22
+ this.timeout = config.timeout ?? 15000;
23
+ this.logger = config.logger || createDefaultLogger();
50
24
  if (!this.apiKey) {
51
- console.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');
25
+ this.logger.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');
52
26
  }
53
- console.log(`Firecrawl scraper initialized with API URL: ${this.apiUrl}`);
27
+ this.logger.debug(`Firecrawl scraper initialized with API URL: ${this.apiUrl}`);
54
28
  }
55
29
  /**
56
30
  * Scrape a single URL
@@ -104,21 +78,30 @@ class FirecrawlScraper {
104
78
  */
105
79
  extractContent(response) {
106
80
  if (!response.success || !response.data) {
107
- return '';
81
+ return ['', undefined];
82
+ }
83
+ if (response.data.markdown != null && response.data.html != null) {
84
+ try {
85
+ const { markdown, ...rest } = processContent(response.data.html, response.data.markdown);
86
+ return [markdown, rest];
87
+ }
88
+ catch (error) {
89
+ this.logger.error('Error processing content:', error);
90
+ return [response.data.markdown, undefined];
91
+ }
108
92
  }
109
- // Prefer markdown content if available
110
- if (response.data.markdown != null) {
111
- return response.data.markdown;
93
+ else if (response.data.markdown != null) {
94
+ return [response.data.markdown, undefined];
112
95
  }
113
96
  // Fall back to HTML content
114
97
  if (response.data.html != null) {
115
- return response.data.html;
98
+ return [response.data.html, undefined];
116
99
  }
117
100
  // Fall back to raw HTML content
118
101
  if (response.data.rawHtml != null) {
119
- return response.data.rawHtml;
102
+ return [response.data.rawHtml, undefined];
120
103
  }
121
- return '';
104
+ return ['', undefined];
122
105
  }
123
106
  /**
124
107
  * Extract metadata from scrape response
@@ -141,5 +124,5 @@ const createFirecrawlScraper = (config = {}) => {
141
124
  return new FirecrawlScraper(config);
142
125
  };
143
126
 
144
- export { FirecrawlScraper, createFirecrawlScraper, getAttribution };
127
+ export { FirecrawlScraper, createFirecrawlScraper };
145
128
  //# sourceMappingURL=firecrawl.mjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"firecrawl.mjs","sources":["../../../../src/tools/search/firecrawl.ts"],"sourcesContent":["/* eslint-disable no-console */\nimport axios from 'axios';\n\nexport interface FirecrawlScrapeOptions {\n formats?: string[];\n includeTags?: string[];\n excludeTags?: string[];\n headers?: Record<string, string>;\n waitFor?: number;\n timeout?: number;\n}\n\ninterface ScrapeMetadata {\n // Core source information\n sourceURL?: string;\n url?: string;\n scrapeId?: string;\n statusCode?: number;\n // Basic metadata\n title?: string;\n description?: string;\n language?: string;\n favicon?: string;\n viewport?: string;\n robots?: string;\n 'theme-color'?: string;\n // Open Graph metadata\n 'og:url'?: string;\n 'og:title'?: string;\n 'og:description'?: string;\n 'og:type'?: string;\n 'og:image'?: string;\n 'og:image:width'?: string;\n 'og:image:height'?: string;\n 'og:site_name'?: string;\n ogUrl?: string;\n ogTitle?: string;\n ogDescription?: string;\n ogImage?: string;\n ogSiteName?: string;\n // Article metadata\n 'article:author'?: string;\n 'article:published_time'?: string;\n 'article:modified_time'?: string;\n 'article:section'?: string;\n 'article:tag'?: string;\n 'article:publisher'?: string;\n publishedTime?: string;\n modifiedTime?: string;\n // Twitter metadata\n 'twitter:site'?: string;\n 'twitter:creator'?: string;\n 'twitter:card'?: string;\n 'twitter:image'?: string;\n 'twitter:dnt'?: string;\n 'twitter:app:name:iphone'?: string;\n 'twitter:app:id:iphone'?: string;\n 'twitter:app:url:iphone'?: string;\n 'twitter:app:name:ipad'?: string;\n 'twitter:app:id:ipad'?: string;\n 'twitter:app:url:ipad'?: string;\n 'twitter:app:name:googleplay'?: string;\n 'twitter:app:id:googleplay'?: string;\n 'twitter:app:url:googleplay'?: string;\n // Facebook metadata\n 'fb:app_id'?: string;\n // App links\n 'al:ios:url'?: string;\n 'al:ios:app_name'?: string;\n 'al:ios:app_store_id'?: string;\n // Allow for additional properties that might be present\n [key: string]: string | number | boolean | null | undefined;\n}\n\nexport interface FirecrawlScrapeResponse {\n success: boolean;\n data?: {\n markdown?: string;\n html?: string;\n rawHtml?: string;\n screenshot?: string;\n links?: string[];\n metadata?: ScrapeMetadata;\n };\n error?: string;\n}\n\nexport interface FirecrawlScraperConfig {\n apiKey?: string;\n apiUrl?: string;\n formats?: string[];\n timeout?: number;\n}\nconst getDomainName = (\n link: string,\n metadata?: ScrapeMetadata\n): string | undefined => {\n try {\n const url = metadata?.sourceURL ?? metadata?.url ?? (link || '');\n const domain = new URL(url).hostname.replace(/^www\\./, '');\n if (domain) {\n return domain;\n }\n } catch (e) {\n // URL parsing failed\n console.error('Error parsing URL:', e);\n }\n\n return;\n};\n\nexport function getAttribution(\n link: string,\n metadata?: ScrapeMetadata\n): string | undefined {\n if (!metadata) return getDomainName(link, metadata);\n\n const possibleAttributions = [\n metadata.ogSiteName,\n metadata['og:site_name'],\n metadata.title?.split('|').pop()?.trim(),\n metadata['twitter:site']?.replace(/^@/, ''),\n ];\n\n const attribution = possibleAttributions.find(\n (attr) => attr != null && typeof attr === 'string' && attr.trim() !== ''\n );\n if (attribution != null) {\n return attribution;\n }\n\n return getDomainName(link, metadata);\n}\n\n/**\n * Firecrawl scraper implementation\n * Uses the Firecrawl API to scrape web pages\n */\nexport class FirecrawlScraper {\n private apiKey: string;\n private apiUrl: string;\n private defaultFormats: string[];\n private timeout: number;\n\n constructor(config: FirecrawlScraperConfig = {}) {\n this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';\n\n const baseUrl =\n config.apiUrl ??\n process.env.FIRECRAWL_BASE_URL ??\n 'https://api.firecrawl.dev';\n this.apiUrl = `${baseUrl.replace(/\\/+$/, '')}/v1/scrape`;\n\n this.defaultFormats = config.formats ?? ['markdown', 'html'];\n this.timeout = config.timeout ?? 30000;\n\n if (!this.apiKey) {\n console.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');\n }\n\n console.log(`Firecrawl scraper initialized with API URL: ${this.apiUrl}`);\n }\n\n /**\n * Scrape a single URL\n * @param url URL to scrape\n * @param options Scrape options\n * @returns Scrape response\n */\n async scrapeUrl(\n url: string,\n options: FirecrawlScrapeOptions = {}\n ): Promise<[string, FirecrawlScrapeResponse]> {\n if (!this.apiKey) {\n return [\n url,\n {\n success: false,\n error: 'FIRECRAWL_API_KEY is not set',\n },\n ];\n }\n\n try {\n const response = await axios.post(\n this.apiUrl,\n {\n url,\n formats: options.formats || this.defaultFormats,\n includeTags: options.includeTags,\n excludeTags: options.excludeTags,\n headers: options.headers,\n waitFor: options.waitFor,\n timeout: options.timeout ?? this.timeout,\n },\n {\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`,\n },\n timeout: this.timeout,\n }\n );\n\n return [url, response.data];\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return [\n url,\n {\n success: false,\n error: `Firecrawl API request failed: ${errorMessage}`,\n },\n ];\n }\n }\n\n /**\n * Extract content from scrape response\n * @param response Scrape response\n * @returns Extracted content or empty string if not available\n */\n extractContent(response: FirecrawlScrapeResponse): string {\n if (!response.success || !response.data) {\n return '';\n }\n\n // Prefer markdown content if available\n if (response.data.markdown != null) {\n return response.data.markdown;\n }\n\n // Fall back to HTML content\n if (response.data.html != null) {\n return response.data.html;\n }\n\n // Fall back to raw HTML content\n if (response.data.rawHtml != null) {\n return response.data.rawHtml;\n }\n\n return '';\n }\n\n /**\n * Extract metadata from scrape response\n * @param response Scrape response\n * @returns Metadata object\n */\n extractMetadata(response: FirecrawlScrapeResponse): ScrapeMetadata {\n if (!response.success || !response.data || !response.data.metadata) {\n return {};\n }\n\n return response.data.metadata;\n }\n}\n\n/**\n * Create a Firecrawl scraper instance\n * @param config Scraper configuration\n * @returns Firecrawl scraper instance\n */\nexport const createFirecrawlScraper = (\n config: FirecrawlScraperConfig = {}\n): FirecrawlScraper => {\n return new FirecrawlScraper(config);\n};\n"],"names":[],"mappings":";;AAAA;AA6FA,MAAM,aAAa,GAAG,CACpB,IAAY,EACZ,QAAyB,KACH;AACtB,IAAA,IAAI;AACF,QAAA,MAAM,GAAG,GAAG,QAAQ,EAAE,SAAS,IAAI,QAAQ,EAAE,GAAG,KAAK,IAAI,IAAI,EAAE,CAAC;AAChE,QAAA,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;QAC1D,IAAI,MAAM,EAAE;AACV,YAAA,OAAO,MAAM;;;IAEf,OAAO,CAAC,EAAE;;AAEV,QAAA,OAAO,CAAC,KAAK,CAAC,oBAAoB,EAAE,CAAC,CAAC;;IAGxC;AACF,CAAC;AAEe,SAAA,cAAc,CAC5B,IAAY,EACZ,QAAyB,EAAA;AAEzB,IAAA,IAAI,CAAC,QAAQ;AAAE,QAAA,OAAO,aAAa,CAAC,IAAI,EAAE,QAAQ,CAAC;AAEnD,IAAA,MAAM,oBAAoB,GAAG;AAC3B,QAAA,QAAQ,CAAC,UAAU;QACnB,QAAQ,CAAC,cAAc,CAAC;AACxB,QAAA,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,IAAI,EAAE;QACxC,QAAQ,CAAC,cAAc,CAAC,EAAE,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;KAC5C;IAED,MAAM,WAAW,GAAG,oBAAoB,CAAC,IAAI,CAC3C,CAAC,IAAI,KAAK,IAAI,IAAI,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CACzE;AACD,IAAA,IAAI,WAAW,IAAI,IAAI,EAAE;AACvB,QAAA,OAAO,WAAW;;AAGpB,IAAA,OAAO,aAAa,CAAC,IAAI,EAAE,QAAQ,CAAC;AACtC;AAEA;;;AAGG;MACU,gBAAgB,CAAA;AACnB,IAAA,MAAM;AACN,IAAA,MAAM;AACN,IAAA,cAAc;AACd,IAAA,OAAO;AAEf,IAAA,WAAA,CAAY,SAAiC,EAAE,EAAA;AAC7C,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,EAAE;AAElE,QAAA,MAAM,OAAO,GACX,MAAM,CAAC,MAAM;YACb,OAAO,CAAC,GAAG,CAAC,kBAAkB;AAC9B,YAAA,2BAA2B;AAC7B,QAAA,IAAI,CAAC,MAAM,GAAG,CAAA,EAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,YAAY;AAExD,QAAA,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC,OAAO,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC;QAC5D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,KAAK;AAEtC,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;AAChB,YAAA,OAAO,CAAC,IAAI,CAAC,uDAAuD,CAAC;;QAGvE,OAAO,CAAC,GAAG,CAAC,CAAA,4CAAA,EAA+C,IAAI,CAAC,MAAM,CAAE,CAAA,CAAC;;AAG3E;;;;;AAKG;AACH,IAAA,MAAM,SAAS,CACb,GAAW,EACX,UAAkC,EAAE,EAAA;AAEpC,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;YAChB,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;AACd,oBAAA,KAAK,EAAE,8BAA8B;AACtC,iBAAA;aACF;;AAGH,QAAA,IAAI;YACF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,IAAI,CAAC,MAAM,EACX;gBACE,GAAG;AACH,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc;gBAC/C,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,OAAO,EAAE,OAAO,CAAC,OAAO;gBACxB,OAAO,EAAE,OAAO,CAAC,OAAO;AACxB,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;aACzC,EACD;AACE,gBAAA,OAAO,EAAE;AACP,oBAAA,cAAc,EAAE,kBAAkB;AAClC,oBAAA,aAAa,EAAE,CAAA,OAAA,EAAU,IAAI,CAAC,MAAM,CAAE,CAAA;AACvC,iBAAA;gBACD,OAAO,EAAE,IAAI,CAAC,OAAO;AACtB,aAAA,CACF;AAED,YAAA,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,CAAC;;QAC3B,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACxD,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,CAAiC,8BAAA,EAAA,YAAY,CAAE,CAAA;AACvD,iBAAA;aACF;;;AAIL;;;;AAIG;AACH,IAAA,cAAc,CAAC,QAAiC,EAAA;QAC9C,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;AACvC,YAAA,OAAO,EAAE;;;QAIX,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,EAAE;AAClC,YAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,QAAQ;;;QAI/B,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;AAC9B,YAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,IAAI;;;QAI3B,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,IAAI,EAAE;AACjC,YAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,OAAO;;AAG9B,QAAA,OAAO,EAAE;;AAGX;;;;AAIG;AACH,IAAA,eAAe,CAAC,QAAiC,EAAA;AAC/C,QAAA,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE;AAClE,YAAA,OAAO,EAAE;;AAGX,QAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,QAAQ;;AAEhC;AAED;;;;AAIG;MACU,sBAAsB,GAAG,CACpC,MAAiC,GAAA,EAAE,KACf;AACpB,IAAA,OAAO,IAAI,gBAAgB,CAAC,MAAM,CAAC;AACrC;;;;"}
1
+ {"version":3,"file":"firecrawl.mjs","sources":["../../../../src/tools/search/firecrawl.ts"],"sourcesContent":["import axios from 'axios';\nimport { processContent } from './content';\nimport type * as t from './types';\nimport { createDefaultLogger } from './utils';\n\n/**\n * Firecrawl scraper implementation\n * Uses the Firecrawl API to scrape web pages\n */\nexport class FirecrawlScraper {\n private apiKey: string;\n private apiUrl: string;\n private defaultFormats: string[];\n private timeout: number;\n private logger: t.Logger;\n\n constructor(config: t.FirecrawlScraperConfig = {}) {\n this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';\n\n const baseUrl =\n config.apiUrl ??\n process.env.FIRECRAWL_BASE_URL ??\n 'https://api.firecrawl.dev';\n this.apiUrl = `${baseUrl.replace(/\\/+$/, '')}/v1/scrape`;\n\n this.defaultFormats = config.formats ?? ['markdown', 'html'];\n this.timeout = config.timeout ?? 15000;\n\n this.logger = config.logger || createDefaultLogger();\n\n if (!this.apiKey) {\n this.logger.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');\n }\n\n this.logger.debug(\n `Firecrawl scraper initialized with API URL: ${this.apiUrl}`\n );\n }\n\n /**\n * Scrape a single URL\n * @param url URL to scrape\n * @param options Scrape options\n * @returns Scrape response\n */\n async scrapeUrl(\n url: string,\n options: t.FirecrawlScrapeOptions = {}\n ): Promise<[string, t.FirecrawlScrapeResponse]> {\n if (!this.apiKey) {\n return [\n url,\n {\n success: false,\n error: 'FIRECRAWL_API_KEY is not set',\n },\n ];\n }\n\n try {\n const response = await axios.post(\n this.apiUrl,\n {\n url,\n formats: options.formats || this.defaultFormats,\n includeTags: options.includeTags,\n excludeTags: options.excludeTags,\n headers: options.headers,\n waitFor: options.waitFor,\n timeout: options.timeout ?? this.timeout,\n },\n {\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`,\n },\n timeout: this.timeout,\n }\n );\n\n return [url, response.data];\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return [\n url,\n {\n success: false,\n error: `Firecrawl API request failed: ${errorMessage}`,\n },\n ];\n }\n }\n\n /**\n * Extract content from scrape response\n * @param response Scrape response\n * @returns Extracted content or empty string if not available\n */\n extractContent(\n response: t.FirecrawlScrapeResponse\n ): [string, undefined | t.References] {\n if (!response.success || !response.data) {\n return ['', undefined];\n }\n\n if (response.data.markdown != null && response.data.html != null) {\n try {\n const { markdown, ...rest } = processContent(\n response.data.html,\n response.data.markdown\n );\n return [markdown, rest];\n } catch (error) {\n this.logger.error('Error processing content:', error);\n return [response.data.markdown, undefined];\n }\n } else if (response.data.markdown != null) {\n return [response.data.markdown, undefined];\n }\n\n // Fall back to HTML content\n if (response.data.html != null) {\n return [response.data.html, undefined];\n }\n\n // Fall back to raw HTML content\n if (response.data.rawHtml != null) {\n return [response.data.rawHtml, undefined];\n }\n\n return ['', undefined];\n }\n\n /**\n * Extract metadata from scrape response\n * @param response Scrape response\n * @returns Metadata object\n */\n extractMetadata(response: t.FirecrawlScrapeResponse): t.ScrapeMetadata {\n if (!response.success || !response.data || !response.data.metadata) {\n return {};\n }\n\n return response.data.metadata;\n }\n}\n\n/**\n * Create a Firecrawl scraper instance\n * @param config Scraper configuration\n * @returns Firecrawl scraper instance\n */\nexport const createFirecrawlScraper = (\n config: t.FirecrawlScraperConfig = {}\n): FirecrawlScraper => {\n return new FirecrawlScraper(config);\n};\n"],"names":[],"mappings":";;;;AAKA;;;AAGG;MACU,gBAAgB,CAAA;AACnB,IAAA,MAAM;AACN,IAAA,MAAM;AACN,IAAA,cAAc;AACd,IAAA,OAAO;AACP,IAAA,MAAM;AAEd,IAAA,WAAA,CAAY,SAAmC,EAAE,EAAA;AAC/C,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,EAAE;AAElE,QAAA,MAAM,OAAO,GACX,MAAM,CAAC,MAAM;YACb,OAAO,CAAC,GAAG,CAAC,kBAAkB;AAC9B,YAAA,2BAA2B;AAC7B,QAAA,IAAI,CAAC,MAAM,GAAG,CAAA,EAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,YAAY;AAExD,QAAA,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC,OAAO,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC;QAC5D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,KAAK;QAEtC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,mBAAmB,EAAE;AAEpD,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;AAChB,YAAA,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,uDAAuD,CAAC;;QAG3E,IAAI,CAAC,MAAM,CAAC,KAAK,CACf,CAA+C,4CAAA,EAAA,IAAI,CAAC,MAAM,CAAE,CAAA,CAC7D;;AAGH;;;;;AAKG;AACH,IAAA,MAAM,SAAS,CACb,GAAW,EACX,UAAoC,EAAE,EAAA;AAEtC,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;YAChB,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;AACd,oBAAA,KAAK,EAAE,8BAA8B;AACtC,iBAAA;aACF;;AAGH,QAAA,IAAI;YACF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,IAAI,CAAC,MAAM,EACX;gBACE,GAAG;AACH,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc;gBAC/C,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,OAAO,EAAE,OAAO,CAAC,OAAO;gBACxB,OAAO,EAAE,OAAO,CAAC,OAAO;AACxB,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;aACzC,EACD;AACE,gBAAA,OAAO,EAAE;AACP,oBAAA,cAAc,EAAE,kBAAkB;AAClC,oBAAA,aAAa,EAAE,CAAA,OAAA,EAAU,IAAI,CAAC,MAAM,CAAE,CAAA;AACvC,iBAAA;gBACD,OAAO,EAAE,IAAI,CAAC,OAAO;AACtB,aAAA,CACF;AAED,YAAA,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,CAAC;;QAC3B,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACxD,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,CAAiC,8BAAA,EAAA,YAAY,CAAE,CAAA;AACvD,iBAAA;aACF;;;AAIL;;;;AAIG;AACH,IAAA,cAAc,CACZ,QAAmC,EAAA;QAEnC,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;AACvC,YAAA,OAAO,CAAC,EAAE,EAAE,SAAS,CAAC;;AAGxB,QAAA,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;AAChE,YAAA,IAAI;gBACF,MAAM,EAAE,QAAQ,EAAE,GAAG,IAAI,EAAE,GAAG,cAAc,CAC1C,QAAQ,CAAC,IAAI,CAAC,IAAI,EAClB,QAAQ,CAAC,IAAI,CAAC,QAAQ,CACvB;AACD,gBAAA,OAAO,CAAC,QAAQ,EAAE,IAAI,CAAC;;YACvB,OAAO,KAAK,EAAE;gBACd,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,2BAA2B,EAAE,KAAK,CAAC;gBACrD,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC;;;aAEvC,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,EAAE;YACzC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC;;;QAI5C,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;YAC9B,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,SAAS,CAAC;;;QAIxC,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,IAAI,EAAE;YACjC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,CAAC;;AAG3C,QAAA,OAAO,CAAC,EAAE,EAAE,SAAS,CAAC;;AAGxB;;;;AAIG;AACH,IAAA,eAAe,CAAC,QAAmC,EAAA;AACjD,QAAA,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE;AAClE,YAAA,OAAO,EAAE;;AAGX,QAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,QAAQ;;AAEhC;AAED;;;;AAIG;MACU,sBAAsB,GAAG,CACpC,MAAmC,GAAA,EAAE,KACjB;AACpB,IAAA,OAAO,IAAI,gBAAgB,CAAC,MAAM,CAAC;AACrC;;;;"}
@@ -1,113 +1,200 @@
1
- function formatResultsForLLM(results) {
2
- let output = '';
1
+ import { getDomainName, fileExtRegex } from './utils.mjs';
2
+
3
+ function addHighlightSection() {
4
+ return ['\n## Highlights', ''];
5
+ }
6
+ // Helper function to format a source (organic or top story)
7
+ function formatSource(source, index, turn, sourceType, references) {
8
+ /** Array of all lines to include in the output */
9
+ const outputLines = [];
10
+ // Add the title
11
+ outputLines.push(`# ${sourceType.charAt(0).toUpperCase() + sourceType.slice(1)} ${index}: ${source.title != null && source.title ? `"${source.title}"` : '(no title)'}`);
12
+ outputLines.push(`\nAnchor: \\ue202turn${turn}${sourceType}${index}`);
13
+ outputLines.push(`URL: ${source.link}`);
14
+ // Add optional fields
15
+ if ('snippet' in source && source.snippet != null) {
16
+ outputLines.push(`Summary: ${source.snippet}`);
17
+ }
18
+ if (source.date != null) {
19
+ outputLines.push(`Date: ${source.date}`);
20
+ }
21
+ if (source.attribution != null) {
22
+ outputLines.push(`Source: ${source.attribution}`);
23
+ }
24
+ // Add highlight section or empty line
25
+ if ((source.highlights?.length ?? 0) > 0) {
26
+ outputLines.push(...addHighlightSection());
27
+ }
28
+ else {
29
+ outputLines.push('');
30
+ }
31
+ // Process highlights if they exist
32
+ (source.highlights ?? [])
33
+ .filter((h) => h.text.trim().length > 0)
34
+ .forEach((h, hIndex) => {
35
+ outputLines.push(`### Highlight ${hIndex + 1} [Relevance: ${h.score.toFixed(2)}]`);
36
+ outputLines.push('');
37
+ outputLines.push('```text');
38
+ outputLines.push(h.text.trim());
39
+ outputLines.push('```');
40
+ outputLines.push('');
41
+ if (h.references != null && h.references.length) {
42
+ let hasHeader = false;
43
+ const refLines = [];
44
+ for (let j = 0; j < h.references.length; j++) {
45
+ const ref = h.references[j];
46
+ if (ref.reference.originalUrl.includes('mailto:')) {
47
+ continue;
48
+ }
49
+ references.push({
50
+ type: ref.type,
51
+ link: ref.reference.originalUrl,
52
+ attribution: getDomainName(ref.reference.originalUrl),
53
+ title: (((ref.reference.title ?? '') || ref.reference.text) ??
54
+ '').split('\n')[0],
55
+ });
56
+ if (ref.type !== 'link') {
57
+ continue;
58
+ }
59
+ if (fileExtRegex.test(ref.reference.originalUrl)) {
60
+ continue;
61
+ }
62
+ if (!hasHeader) {
63
+ refLines.push('Core References:');
64
+ hasHeader = true;
65
+ }
66
+ refLines.push(`- ${ref.type}#${ref.originalIndex + 1}: ${ref.reference.originalUrl}`);
67
+ refLines.push(`\t- Anchor: \\ue202turn${turn}ref${references.length - 1}`);
68
+ }
69
+ if (hasHeader) {
70
+ outputLines.push(...refLines);
71
+ outputLines.push('');
72
+ }
73
+ }
74
+ if (hIndex < (source.highlights?.length ?? 0) - 1) {
75
+ outputLines.push('---');
76
+ outputLines.push('');
77
+ }
78
+ });
79
+ outputLines.push('');
80
+ return outputLines.join('\n');
81
+ }
82
+ function formatResultsForLLM(turn, results) {
83
+ /** Array to collect all output lines */
84
+ const outputLines = [];
3
85
  const addSection = (title) => {
4
- output += `\n=== ${title} ===\n`;
86
+ outputLines.push('');
87
+ outputLines.push(`=== ${title} ===`);
88
+ outputLines.push('');
5
89
  };
90
+ const references = [];
6
91
  // Organic (web) results
7
- const organic = results.organic ?? [];
8
- if (organic.length) {
9
- addSection('Web Results');
10
- organic.forEach((r, i) => {
11
- output += [
12
- `Source ${i + 1}: ${r.title ?? '(no title)'}`,
13
- `Citation Anchor: \\ue202turn0search${i + 1}`,
14
- `URL: ${r.link}`,
15
- r.snippet != null ? `Summary: ${r.snippet}` : '',
16
- r.date != null ? `Date: ${r.date}` : '',
17
- r.attribution != null ? `Source: ${r.attribution}` : '',
18
- '',
19
- '--- Content Highlights ---',
20
- ...(r.highlights ?? [])
21
- .filter((h) => h.text.trim().length > 0)
22
- .map((h) => `[Relevance: ${h.score.toFixed(2)}]\n${h.text.trim()}`),
23
- '',
24
- ]
25
- .filter(Boolean)
26
- .join('\n');
27
- });
92
+ if (results.organic?.length != null && results.organic.length > 0) {
93
+ addSection(`Web Results, Turn ${turn}`);
94
+ for (let i = 0; i < results.organic.length; i++) {
95
+ const r = results.organic[i];
96
+ outputLines.push(formatSource(r, i, turn, 'search', references));
97
+ delete results.organic[i].highlights;
98
+ }
99
+ }
100
+ // Top stories (news)
101
+ const topStories = results.topStories ?? [];
102
+ if (topStories.length) {
103
+ addSection('News Results');
104
+ for (let i = 0; i < topStories.length; i++) {
105
+ const r = topStories[i];
106
+ outputLines.push(formatSource(r, i, turn, 'news', references));
107
+ if (results.topStories?.[i]?.highlights) {
108
+ delete results.topStories[i].highlights;
109
+ }
110
+ }
28
111
  }
29
- // Ignoring these sections for now
30
- // // Top stories (news)
31
- // const topStores = results.topStories ?? [];
32
- // if (topStores.length) {
33
- // addSection('News Results');
34
- // topStores.forEach((r, i) => {
35
- // output += [
36
- // `Anchor: \ue202turn0news${i + 1}`,
37
- // `Title: ${r.title ?? '(no title)'}`,
38
- // `URL: ${r.link}`,
39
- // r.snippet != null ? `Snippet: ${r.snippet}` : '',
40
- // r.date != null ? `Date: ${r.date}` : '',
41
- // r.attribution != null ? `Source: ${r.attribution}` : '',
42
- // ''
43
- // ].filter(Boolean).join('\n');
44
- // });
45
- // }
46
112
  // // Images
47
113
  // const images = results.images ?? [];
48
114
  // if (images.length) {
49
115
  // addSection('Image Results');
50
- // images.forEach((img, i) => {
51
- // output += [
52
- // `Anchor: \ue202turn0image${i + 1}`,
53
- // `Title: ${img.title ?? '(no title)'}`,
54
- // `Image URL: ${img.imageUrl}`,
55
- // ''
56
- // ].join('\n');
57
- // });
116
+ // const imageLines = images.map((img, i) => [
117
+ // `Anchor: \ue202turn0image${i}`,
118
+ // `Title: ${img.title ?? '(no title)'}`,
119
+ // `Image URL: ${img.imageUrl}`,
120
+ // ''
121
+ // ].join('\n'));
122
+ // outputLines.push(imageLines.join('\n'));
58
123
  // }
59
124
  // Knowledge Graph
60
125
  if (results.knowledgeGraph != null) {
61
126
  addSection('Knowledge Graph');
62
- output += [
63
- `Title: ${results.knowledgeGraph.title ?? '(no title)'}`,
127
+ const kgLines = [
128
+ `**Title:** ${results.knowledgeGraph.title ?? '(no title)'}`,
129
+ results.knowledgeGraph.type != null
130
+ ? `**Type:** ${results.knowledgeGraph.type}`
131
+ : '',
64
132
  results.knowledgeGraph.description != null
65
- ? `Description: ${results.knowledgeGraph.description}`
133
+ ? `**Description:** ${results.knowledgeGraph.description}`
66
134
  : '',
67
- results.knowledgeGraph.type != null
68
- ? `Type: ${results.knowledgeGraph.type}`
135
+ results.knowledgeGraph.descriptionSource != null
136
+ ? `**Description Source:** ${results.knowledgeGraph.descriptionSource}`
137
+ : '',
138
+ results.knowledgeGraph.descriptionLink != null
139
+ ? `**Description Link:** ${results.knowledgeGraph.descriptionLink}`
69
140
  : '',
70
141
  results.knowledgeGraph.imageUrl != null
71
- ? `Image URL: ${results.knowledgeGraph.imageUrl}`
142
+ ? `**Image URL:** ${results.knowledgeGraph.imageUrl}`
143
+ : '',
144
+ results.knowledgeGraph.website != null
145
+ ? `**Website:** ${results.knowledgeGraph.website}`
72
146
  : '',
73
147
  results.knowledgeGraph.attributes != null
74
- ? `Attributes: ${JSON.stringify(results.knowledgeGraph.attributes, null, 2)}`
148
+ ? `**Attributes:**\n\`\`\`json\n${JSON.stringify(results.knowledgeGraph.attributes, null, 2)}\n\`\`\``
75
149
  : '',
76
150
  '',
77
- ]
78
- .filter(Boolean)
79
- .join('\n');
151
+ ].filter(Boolean);
152
+ outputLines.push(kgLines.join('\n\n'));
80
153
  }
81
154
  // Answer Box
82
155
  if (results.answerBox != null) {
83
156
  addSection('Answer Box');
84
- output += [
157
+ const abLines = [
85
158
  results.answerBox.title != null
86
- ? `Title: ${results.answerBox.title}`
87
- : '',
88
- results.answerBox.answer != null
89
- ? `Answer: ${results.answerBox.answer}`
159
+ ? `**Title:** ${results.answerBox.title}`
90
160
  : '',
91
161
  results.answerBox.snippet != null
92
- ? `Snippet: ${results.answerBox.snippet}`
162
+ ? `**Snippet:** ${results.answerBox.snippet}`
163
+ : '',
164
+ results.answerBox.snippetHighlighted != null
165
+ ? `**Snippet Highlighted:** ${results.answerBox.snippetHighlighted
166
+ .map((s) => `\`${s}\``)
167
+ .join(' ')}`
168
+ : '',
169
+ results.answerBox.link != null
170
+ ? `**Link:** ${results.answerBox.link}`
93
171
  : '',
94
- results.answerBox.date != null ? `Date: ${results.answerBox.date}` : '',
95
172
  '',
96
- ]
97
- .filter(Boolean)
98
- .join('\n');
173
+ ].filter(Boolean);
174
+ outputLines.push(abLines.join('\n\n'));
99
175
  }
100
176
  // People also ask
101
177
  const peopleAlsoAsk = results.peopleAlsoAsk ?? [];
102
178
  if (peopleAlsoAsk.length) {
103
179
  addSection('People Also Ask');
104
- peopleAlsoAsk.forEach((p, _i) => {
105
- output += [`Q: ${p.question}`, `A: ${p.answer}`, '']
106
- .filter(Boolean)
107
- .join('\n');
180
+ const paaLines = [];
181
+ peopleAlsoAsk.forEach((p, i) => {
182
+ const questionLines = [
183
+ `### Question ${i + 1}:`,
184
+ `"${p.question}"`,
185
+ `${p.snippet != null && p.snippet ? `Snippet: ${p.snippet}` : ''}`,
186
+ `${p.title != null && p.title ? `Title: ${p.title}` : ''}`,
187
+ `${p.link != null && p.link ? `Link: ${p.link}` : ''}`,
188
+ '',
189
+ ].filter(Boolean);
190
+ paaLines.push(questionLines.join('\n\n'));
108
191
  });
192
+ outputLines.push(paaLines.join(''));
109
193
  }
110
- return output.trim();
194
+ return {
195
+ output: outputLines.join('\n').trim(),
196
+ references,
197
+ };
111
198
  }
112
199
 
113
200
  export { formatResultsForLLM };
@@ -1 +1 @@
1
- {"version":3,"file":"format.mjs","sources":["../../../../src/tools/search/format.ts"],"sourcesContent":["import type * as t from './types';\n\nexport function formatResultsForLLM(results: t.SearchResultData): string {\n let output = '';\n\n const addSection = (title: string): void => {\n output += `\\n=== ${title} ===\\n`;\n };\n\n // Organic (web) results\n const organic = results.organic ?? [];\n if (organic.length) {\n addSection('Web Results');\n organic.forEach((r, i) => {\n output += [\n `Source ${i + 1}: ${r.title ?? '(no title)'}`,\n `Citation Anchor: \\\\ue202turn0search${i + 1}`,\n `URL: ${r.link}`,\n r.snippet != null ? `Summary: ${r.snippet}` : '',\n r.date != null ? `Date: ${r.date}` : '',\n r.attribution != null ? `Source: ${r.attribution}` : '',\n '',\n '--- Content Highlights ---',\n ...(r.highlights ?? [])\n .filter((h) => h.text.trim().length > 0)\n .map((h) => `[Relevance: ${h.score.toFixed(2)}]\\n${h.text.trim()}`),\n '',\n ]\n .filter(Boolean)\n .join('\\n');\n });\n }\n\n // Ignoring these sections for now\n // // Top stories (news)\n // const topStores = results.topStories ?? [];\n // if (topStores.length) {\n // addSection('News Results');\n // topStores.forEach((r, i) => {\n // output += [\n // `Anchor: \\ue202turn0news${i + 1}`,\n // `Title: ${r.title ?? '(no title)'}`,\n // `URL: ${r.link}`,\n // r.snippet != null ? `Snippet: ${r.snippet}` : '',\n // r.date != null ? `Date: ${r.date}` : '',\n // r.attribution != null ? `Source: ${r.attribution}` : '',\n // ''\n // ].filter(Boolean).join('\\n');\n // });\n // }\n\n // // Images\n // const images = results.images ?? [];\n // if (images.length) {\n // addSection('Image Results');\n // images.forEach((img, i) => {\n // output += [\n // `Anchor: \\ue202turn0image${i + 1}`,\n // `Title: ${img.title ?? '(no title)'}`,\n // `Image URL: ${img.imageUrl}`,\n // ''\n // ].join('\\n');\n // });\n // }\n\n // Knowledge Graph\n if (results.knowledgeGraph != null) {\n addSection('Knowledge Graph');\n output += [\n `Title: ${results.knowledgeGraph.title ?? '(no title)'}`,\n results.knowledgeGraph.description != null\n ? `Description: ${results.knowledgeGraph.description}`\n : '',\n results.knowledgeGraph.type != null\n ? `Type: ${results.knowledgeGraph.type}`\n : '',\n results.knowledgeGraph.imageUrl != null\n ? `Image URL: ${results.knowledgeGraph.imageUrl}`\n : '',\n results.knowledgeGraph.attributes != null\n ? `Attributes: ${JSON.stringify(results.knowledgeGraph.attributes, null, 2)}`\n : '',\n '',\n ]\n .filter(Boolean)\n .join('\\n');\n }\n\n // Answer Box\n if (results.answerBox != null) {\n addSection('Answer Box');\n output += [\n results.answerBox.title != null\n ? `Title: ${results.answerBox.title}`\n : '',\n results.answerBox.answer != null\n ? `Answer: ${results.answerBox.answer}`\n : '',\n results.answerBox.snippet != null\n ? `Snippet: ${results.answerBox.snippet}`\n : '',\n results.answerBox.date != null ? `Date: ${results.answerBox.date}` : '',\n '',\n ]\n .filter(Boolean)\n .join('\\n');\n }\n\n // People also ask\n const peopleAlsoAsk = results.peopleAlsoAsk ?? [];\n if (peopleAlsoAsk.length) {\n addSection('People Also Ask');\n peopleAlsoAsk.forEach((p, _i) => {\n output += [`Q: ${p.question}`, `A: ${p.answer}`, '']\n .filter(Boolean)\n .join('\\n');\n });\n }\n\n return output.trim();\n}\n"],"names":[],"mappings":"AAEM,SAAU,mBAAmB,CAAC,OAA2B,EAAA;IAC7D,IAAI,MAAM,GAAG,EAAE;AAEf,IAAA,MAAM,UAAU,GAAG,CAAC,KAAa,KAAU;AACzC,QAAA,MAAM,IAAI,CAAA,MAAA,EAAS,KAAK,CAAA,MAAA,CAAQ;AAClC,KAAC;;AAGD,IAAA,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,EAAE;AACrC,IAAA,IAAI,OAAO,CAAC,MAAM,EAAE;QAClB,UAAU,CAAC,aAAa,CAAC;QACzB,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,KAAI;AACvB,YAAA,MAAM,IAAI;gBACR,CAAU,OAAA,EAAA,CAAC,GAAG,CAAC,CAAA,EAAA,EAAK,CAAC,CAAC,KAAK,IAAI,YAAY,CAAE,CAAA;gBAC7C,CAAsC,mCAAA,EAAA,CAAC,GAAG,CAAC,CAAE,CAAA;gBAC7C,CAAQ,KAAA,EAAA,CAAC,CAAC,IAAI,CAAE,CAAA;AAChB,gBAAA,CAAC,CAAC,OAAO,IAAI,IAAI,GAAG,CAAY,SAAA,EAAA,CAAC,CAAC,OAAO,CAAA,CAAE,GAAG,EAAE;AAChD,gBAAA,CAAC,CAAC,IAAI,IAAI,IAAI,GAAG,CAAS,MAAA,EAAA,CAAC,CAAC,IAAI,CAAA,CAAE,GAAG,EAAE;AACvC,gBAAA,CAAC,CAAC,WAAW,IAAI,IAAI,GAAG,CAAW,QAAA,EAAA,CAAC,CAAC,WAAW,CAAA,CAAE,GAAG,EAAE;gBACvD,EAAE;gBACF,4BAA4B;AAC5B,gBAAA,GAAG,CAAC,CAAC,CAAC,UAAU,IAAI,EAAE;AACnB,qBAAA,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC;qBACtC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAA,YAAA,EAAe,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAA,GAAA,EAAM,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAA,CAAE,CAAC;gBACrE,EAAE;AACH;iBACE,MAAM,CAAC,OAAO;iBACd,IAAI,CAAC,IAAI,CAAC;AACf,SAAC,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAoCJ,IAAA,IAAI,OAAO,CAAC,cAAc,IAAI,IAAI,EAAE;QAClC,UAAU,CAAC,iBAAiB,CAAC;AAC7B,QAAA,MAAM,IAAI;AACR,YAAA,CAAA,OAAA,EAAU,OAAO,CAAC,cAAc,CAAC,KAAK,IAAI,YAAY,CAAE,CAAA;AACxD,YAAA,OAAO,CAAC,cAAc,CAAC,WAAW,IAAI;AACpC,kBAAE,CAAgB,aAAA,EAAA,OAAO,CAAC,cAAc,CAAC,WAAW,CAAE;AACtD,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,IAAI,IAAI;AAC7B,kBAAE,CAAS,MAAA,EAAA,OAAO,CAAC,cAAc,CAAC,IAAI,CAAE;AACxC,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,QAAQ,IAAI;AACjC,kBAAE,CAAc,WAAA,EAAA,OAAO,CAAC,cAAc,CAAC,QAAQ,CAAE;AACjD,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,UAAU,IAAI;AACnC,kBAAE,CAAe,YAAA,EAAA,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,cAAc,CAAC,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC,CAAE;AAC7E,kBAAE,EAAE;YACN,EAAE;AACH;aACE,MAAM,CAAC,OAAO;aACd,IAAI,CAAC,IAAI,CAAC;;;AAIf,IAAA,IAAI,OAAO,CAAC,SAAS,IAAI,IAAI,EAAE;QAC7B,UAAU,CAAC,YAAY,CAAC;AACxB,QAAA,MAAM,IAAI;AACR,YAAA,OAAO,CAAC,SAAS,CAAC,KAAK,IAAI;AACzB,kBAAE,CAAU,OAAA,EAAA,OAAO,CAAC,SAAS,CAAC,KAAK,CAAE;AACrC,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,SAAS,CAAC,MAAM,IAAI;AAC1B,kBAAE,CAAW,QAAA,EAAA,OAAO,CAAC,SAAS,CAAC,MAAM,CAAE;AACvC,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,SAAS,CAAC,OAAO,IAAI;AAC3B,kBAAE,CAAY,SAAA,EAAA,OAAO,CAAC,SAAS,CAAC,OAAO,CAAE;AACzC,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,SAAS,CAAC,IAAI,IAAI,IAAI,GAAG,SAAS,OAAO,CAAC,SAAS,CAAC,IAAI,EAAE,GAAG,EAAE;YACvE,EAAE;AACH;aACE,MAAM,CAAC,OAAO;aACd,IAAI,CAAC,IAAI,CAAC;;;AAIf,IAAA,MAAM,aAAa,GAAG,OAAO,CAAC,aAAa,IAAI,EAAE;AACjD,IAAA,IAAI,aAAa,CAAC,MAAM,EAAE;QACxB,UAAU,CAAC,iBAAiB,CAAC;QAC7B,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,KAAI;AAC9B,YAAA,MAAM,IAAI,CAAC,CAAM,GAAA,EAAA,CAAC,CAAC,QAAQ,CAAA,CAAE,EAAE,CAAA,GAAA,EAAM,CAAC,CAAC,MAAM,CAAE,CAAA,EAAE,EAAE;iBAChD,MAAM,CAAC,OAAO;iBACd,IAAI,CAAC,IAAI,CAAC;AACf,SAAC,CAAC;;AAGJ,IAAA,OAAO,MAAM,CAAC,IAAI,EAAE;AACtB;;;;"}
1
+ {"version":3,"file":"format.mjs","sources":["../../../../src/tools/search/format.ts"],"sourcesContent":["import type * as t from './types';\nimport { getDomainName, fileExtRegex } from './utils';\n\nfunction addHighlightSection(): string[] {\n return ['\\n## Highlights', ''];\n}\n\n// Helper function to format a source (organic or top story)\nfunction formatSource(\n source: t.ValidSource,\n index: number,\n turn: number,\n sourceType: 'search' | 'news',\n references: t.ResultReference[]\n): string {\n /** Array of all lines to include in the output */\n const outputLines: string[] = [];\n\n // Add the title\n outputLines.push(\n `# ${sourceType.charAt(0).toUpperCase() + sourceType.slice(1)} ${index}: ${source.title != null && source.title ? `\"${source.title}\"` : '(no title)'}`\n );\n outputLines.push(`\\nAnchor: \\\\ue202turn${turn}${sourceType}${index}`);\n outputLines.push(`URL: ${source.link}`);\n\n // Add optional fields\n if ('snippet' in source && source.snippet != null) {\n outputLines.push(`Summary: ${source.snippet}`);\n }\n\n if (source.date != null) {\n outputLines.push(`Date: ${source.date}`);\n }\n\n if (source.attribution != null) {\n outputLines.push(`Source: ${source.attribution}`);\n }\n\n // Add highlight section or empty line\n if ((source.highlights?.length ?? 0) > 0) {\n outputLines.push(...addHighlightSection());\n } else {\n outputLines.push('');\n }\n\n // Process highlights if they exist\n (source.highlights ?? [])\n .filter((h) => h.text.trim().length > 0)\n .forEach((h, hIndex) => {\n outputLines.push(\n `### Highlight ${hIndex + 1} [Relevance: ${h.score.toFixed(2)}]`\n );\n outputLines.push('');\n outputLines.push('```text');\n outputLines.push(h.text.trim());\n outputLines.push('```');\n outputLines.push('');\n\n if (h.references != null && h.references.length) {\n let hasHeader = false;\n const refLines: string[] = [];\n\n for (let j = 0; j < h.references.length; j++) {\n const ref = h.references[j];\n if (ref.reference.originalUrl.includes('mailto:')) {\n continue;\n }\n references.push({\n type: ref.type,\n link: ref.reference.originalUrl,\n attribution: getDomainName(ref.reference.originalUrl),\n title: (\n ((ref.reference.title ?? '') || ref.reference.text) ??\n ''\n ).split('\\n')[0],\n });\n\n if (ref.type !== 'link') {\n continue;\n }\n\n if (fileExtRegex.test(ref.reference.originalUrl)) {\n continue;\n }\n\n if (!hasHeader) {\n refLines.push('Core References:');\n hasHeader = true;\n }\n\n refLines.push(\n `- ${ref.type}#${ref.originalIndex + 1}: ${ref.reference.originalUrl}`\n );\n refLines.push(\n `\\t- Anchor: \\\\ue202turn${turn}ref${references.length - 1}`\n );\n }\n\n if (hasHeader) {\n outputLines.push(...refLines);\n outputLines.push('');\n }\n }\n\n if (hIndex < (source.highlights?.length ?? 0) - 1) {\n outputLines.push('---');\n outputLines.push('');\n }\n });\n\n outputLines.push('');\n return outputLines.join('\\n');\n}\n\nexport function formatResultsForLLM(\n turn: number,\n results: t.SearchResultData\n): { output: string; references: t.ResultReference[] } {\n /** Array to collect all output lines */\n const outputLines: string[] = [];\n\n const addSection = (title: string): void => {\n outputLines.push('');\n outputLines.push(`=== ${title} ===`);\n outputLines.push('');\n };\n\n const references: t.ResultReference[] = [];\n\n // Organic (web) results\n if (results.organic?.length != null && results.organic.length > 0) {\n addSection(`Web Results, Turn ${turn}`);\n for (let i = 0; i < results.organic.length; i++) {\n const r = results.organic[i];\n outputLines.push(formatSource(r, i, turn, 'search', references));\n delete results.organic[i].highlights;\n }\n }\n\n // Top stories (news)\n const topStories = results.topStories ?? [];\n if (topStories.length) {\n addSection('News Results');\n for (let i = 0; i < topStories.length; i++) {\n const r = topStories[i];\n outputLines.push(formatSource(r, i, turn, 'news', references));\n if (results.topStories?.[i]?.highlights) {\n delete results.topStories[i].highlights;\n }\n }\n }\n\n // // Images\n // const images = results.images ?? [];\n // if (images.length) {\n // addSection('Image Results');\n // const imageLines = images.map((img, i) => [\n // `Anchor: \\ue202turn0image${i}`,\n // `Title: ${img.title ?? '(no title)'}`,\n // `Image URL: ${img.imageUrl}`,\n // ''\n // ].join('\\n'));\n // outputLines.push(imageLines.join('\\n'));\n // }\n\n // Knowledge Graph\n if (results.knowledgeGraph != null) {\n addSection('Knowledge Graph');\n const kgLines = [\n `**Title:** ${results.knowledgeGraph.title ?? '(no title)'}`,\n results.knowledgeGraph.type != null\n ? `**Type:** ${results.knowledgeGraph.type}`\n : '',\n results.knowledgeGraph.description != null\n ? `**Description:** ${results.knowledgeGraph.description}`\n : '',\n results.knowledgeGraph.descriptionSource != null\n ? `**Description Source:** ${results.knowledgeGraph.descriptionSource}`\n : '',\n results.knowledgeGraph.descriptionLink != null\n ? `**Description Link:** ${results.knowledgeGraph.descriptionLink}`\n : '',\n results.knowledgeGraph.imageUrl != null\n ? `**Image URL:** ${results.knowledgeGraph.imageUrl}`\n : '',\n results.knowledgeGraph.website != null\n ? `**Website:** ${results.knowledgeGraph.website}`\n : '',\n results.knowledgeGraph.attributes != null\n ? `**Attributes:**\\n\\`\\`\\`json\\n${JSON.stringify(\n results.knowledgeGraph.attributes,\n null,\n 2\n )}\\n\\`\\`\\``\n : '',\n '',\n ].filter(Boolean);\n\n outputLines.push(kgLines.join('\\n\\n'));\n }\n\n // Answer Box\n if (results.answerBox != null) {\n addSection('Answer Box');\n const abLines = [\n results.answerBox.title != null\n ? `**Title:** ${results.answerBox.title}`\n : '',\n results.answerBox.snippet != null\n ? `**Snippet:** ${results.answerBox.snippet}`\n : '',\n results.answerBox.snippetHighlighted != null\n ? `**Snippet Highlighted:** ${results.answerBox.snippetHighlighted\n .map((s) => `\\`${s}\\``)\n .join(' ')}`\n : '',\n results.answerBox.link != null\n ? `**Link:** ${results.answerBox.link}`\n : '',\n '',\n ].filter(Boolean);\n\n outputLines.push(abLines.join('\\n\\n'));\n }\n\n // People also ask\n const peopleAlsoAsk = results.peopleAlsoAsk ?? [];\n if (peopleAlsoAsk.length) {\n addSection('People Also Ask');\n\n const paaLines: string[] = [];\n peopleAlsoAsk.forEach((p, i) => {\n const questionLines = [\n `### Question ${i + 1}:`,\n `\"${p.question}\"`,\n `${p.snippet != null && p.snippet ? `Snippet: ${p.snippet}` : ''}`,\n `${p.title != null && p.title ? `Title: ${p.title}` : ''}`,\n `${p.link != null && p.link ? `Link: ${p.link}` : ''}`,\n '',\n ].filter(Boolean);\n\n paaLines.push(questionLines.join('\\n\\n'));\n });\n\n outputLines.push(paaLines.join(''));\n }\n\n return {\n output: outputLines.join('\\n').trim(),\n references,\n };\n}\n"],"names":[],"mappings":";;AAGA,SAAS,mBAAmB,GAAA;AAC1B,IAAA,OAAO,CAAC,iBAAiB,EAAE,EAAE,CAAC;AAChC;AAEA;AACA,SAAS,YAAY,CACnB,MAAqB,EACrB,KAAa,EACb,IAAY,EACZ,UAA6B,EAC7B,UAA+B,EAAA;;IAG/B,MAAM,WAAW,GAAa,EAAE;;IAGhC,WAAW,CAAC,IAAI,CACd,CAAA,EAAA,EAAK,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAA,CAAA,EAAI,KAAK,CAAA,EAAA,EAAK,MAAM,CAAC,KAAK,IAAI,IAAI,IAAI,MAAM,CAAC,KAAK,GAAG,IAAI,MAAM,CAAC,KAAK,CAAG,CAAA,CAAA,GAAG,YAAY,CAAE,CAAA,CACvJ;IACD,WAAW,CAAC,IAAI,CAAC,CAAwB,qBAAA,EAAA,IAAI,CAAG,EAAA,UAAU,CAAG,EAAA,KAAK,CAAE,CAAA,CAAC;IACrE,WAAW,CAAC,IAAI,CAAC,CAAA,KAAA,EAAQ,MAAM,CAAC,IAAI,CAAE,CAAA,CAAC;;IAGvC,IAAI,SAAS,IAAI,MAAM,IAAI,MAAM,CAAC,OAAO,IAAI,IAAI,EAAE;QACjD,WAAW,CAAC,IAAI,CAAC,CAAA,SAAA,EAAY,MAAM,CAAC,OAAO,CAAE,CAAA,CAAC;;AAGhD,IAAA,IAAI,MAAM,CAAC,IAAI,IAAI,IAAI,EAAE;QACvB,WAAW,CAAC,IAAI,CAAC,CAAA,MAAA,EAAS,MAAM,CAAC,IAAI,CAAE,CAAA,CAAC;;AAG1C,IAAA,IAAI,MAAM,CAAC,WAAW,IAAI,IAAI,EAAE;QAC9B,WAAW,CAAC,IAAI,CAAC,CAAA,QAAA,EAAW,MAAM,CAAC,WAAW,CAAE,CAAA,CAAC;;;AAInD,IAAA,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,MAAM,IAAI,CAAC,IAAI,CAAC,EAAE;AACxC,QAAA,WAAW,CAAC,IAAI,CAAC,GAAG,mBAAmB,EAAE,CAAC;;SACrC;AACL,QAAA,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;;;AAItB,IAAA,CAAC,MAAM,CAAC,UAAU,IAAI,EAAE;AACrB,SAAA,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC;AACtC,SAAA,OAAO,CAAC,CAAC,CAAC,EAAE,MAAM,KAAI;AACrB,QAAA,WAAW,CAAC,IAAI,CACd,iBAAiB,MAAM,GAAG,CAAC,CAAgB,aAAA,EAAA,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAA,CAAA,CAAG,CACjE;AACD,QAAA,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;AACpB,QAAA,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC;QAC3B,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;AAC/B,QAAA,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC;AACvB,QAAA,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;AAEpB,QAAA,IAAI,CAAC,CAAC,UAAU,IAAI,IAAI,IAAI,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE;YAC/C,IAAI,SAAS,GAAG,KAAK;YACrB,MAAM,QAAQ,GAAa,EAAE;AAE7B,YAAA,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAC5C,MAAM,GAAG,GAAG,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC;gBAC3B,IAAI,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE;oBACjD;;gBAEF,UAAU,CAAC,IAAI,CAAC;oBACd,IAAI,EAAE,GAAG,CAAC,IAAI;AACd,oBAAA,IAAI,EAAE,GAAG,CAAC,SAAS,CAAC,WAAW;oBAC/B,WAAW,EAAE,aAAa,CAAC,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC;AACrD,oBAAA,KAAK,EAAE,CACL,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,KAAK,IAAI,EAAE,KAAK,GAAG,CAAC,SAAS,CAAC,IAAI;wBAClD,EAAE,EACF,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACjB,iBAAA,CAAC;AAEF,gBAAA,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE;oBACvB;;gBAGF,IAAI,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC,EAAE;oBAChD;;gBAGF,IAAI,CAAC,SAAS,EAAE;AACd,oBAAA,QAAQ,CAAC,IAAI,CAAC,kBAAkB,CAAC;oBACjC,SAAS,GAAG,IAAI;;gBAGlB,QAAQ,CAAC,IAAI,CACX,CAAA,EAAA,EAAK,GAAG,CAAC,IAAI,IAAI,GAAG,CAAC,aAAa,GAAG,CAAC,KAAK,GAAG,CAAC,SAAS,CAAC,WAAW,CAAE,CAAA,CACvE;AACD,gBAAA,QAAQ,CAAC,IAAI,CACX,CAAA,uBAAA,EAA0B,IAAI,CAAA,GAAA,EAAM,UAAU,CAAC,MAAM,GAAG,CAAC,CAAA,CAAE,CAC5D;;YAGH,IAAI,SAAS,EAAE;AACb,gBAAA,WAAW,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC;AAC7B,gBAAA,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;;;AAIxB,QAAA,IAAI,MAAM,GAAG,CAAC,MAAM,CAAC,UAAU,EAAE,MAAM,IAAI,CAAC,IAAI,CAAC,EAAE;AACjD,YAAA,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC;AACvB,YAAA,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;;AAExB,KAAC,CAAC;AAEJ,IAAA,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;AACpB,IAAA,OAAO,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC;AAC/B;AAEgB,SAAA,mBAAmB,CACjC,IAAY,EACZ,OAA2B,EAAA;;IAG3B,MAAM,WAAW,GAAa,EAAE;AAEhC,IAAA,MAAM,UAAU,GAAG,CAAC,KAAa,KAAU;AACzC,QAAA,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;AACpB,QAAA,WAAW,CAAC,IAAI,CAAC,OAAO,KAAK,CAAA,IAAA,CAAM,CAAC;AACpC,QAAA,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;AACtB,KAAC;IAED,MAAM,UAAU,GAAwB,EAAE;;AAG1C,IAAA,IAAI,OAAO,CAAC,OAAO,EAAE,MAAM,IAAI,IAAI,IAAI,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE;AACjE,QAAA,UAAU,CAAC,CAAA,kBAAA,EAAqB,IAAI,CAAA,CAAE,CAAC;AACvC,QAAA,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YAC/C,MAAM,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC;AAC5B,YAAA,WAAW,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC;YAChE,OAAO,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,UAAU;;;;AAKxC,IAAA,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,EAAE;AAC3C,IAAA,IAAI,UAAU,CAAC,MAAM,EAAE;QACrB,UAAU,CAAC,cAAc,CAAC;AAC1B,QAAA,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;AAC1C,YAAA,MAAM,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC;AACvB,YAAA,WAAW,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;YAC9D,IAAI,OAAO,CAAC,UAAU,GAAG,CAAC,CAAC,EAAE,UAAU,EAAE;gBACvC,OAAO,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,UAAU;;;;;;;;;;;;;;;;;AAmB7C,IAAA,IAAI,OAAO,CAAC,cAAc,IAAI,IAAI,EAAE;QAClC,UAAU,CAAC,iBAAiB,CAAC;AAC7B,QAAA,MAAM,OAAO,GAAG;AACd,YAAA,CAAA,WAAA,EAAc,OAAO,CAAC,cAAc,CAAC,KAAK,IAAI,YAAY,CAAE,CAAA;AAC5D,YAAA,OAAO,CAAC,cAAc,CAAC,IAAI,IAAI;AAC7B,kBAAE,CAAa,UAAA,EAAA,OAAO,CAAC,cAAc,CAAC,IAAI,CAAE;AAC5C,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,WAAW,IAAI;AACpC,kBAAE,CAAoB,iBAAA,EAAA,OAAO,CAAC,cAAc,CAAC,WAAW,CAAE;AAC1D,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,iBAAiB,IAAI;AAC1C,kBAAE,CAA2B,wBAAA,EAAA,OAAO,CAAC,cAAc,CAAC,iBAAiB,CAAE;AACvE,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,eAAe,IAAI;AACxC,kBAAE,CAAyB,sBAAA,EAAA,OAAO,CAAC,cAAc,CAAC,eAAe,CAAE;AACnE,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,QAAQ,IAAI;AACjC,kBAAE,CAAkB,eAAA,EAAA,OAAO,CAAC,cAAc,CAAC,QAAQ,CAAE;AACrD,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,OAAO,IAAI;AAChC,kBAAE,CAAgB,aAAA,EAAA,OAAO,CAAC,cAAc,CAAC,OAAO,CAAE;AAClD,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,UAAU,IAAI;AACnC,kBAAE,CAAgC,6BAAA,EAAA,IAAI,CAAC,SAAS,CAC9C,OAAO,CAAC,cAAc,CAAC,UAAU,EACjC,IAAI,EACJ,CAAC,CACF,CAAU,QAAA;AACX,kBAAE,EAAE;YACN,EAAE;AACH,SAAA,CAAC,MAAM,CAAC,OAAO,CAAC;QAEjB,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;;;AAIxC,IAAA,IAAI,OAAO,CAAC,SAAS,IAAI,IAAI,EAAE;QAC7B,UAAU,CAAC,YAAY,CAAC;AACxB,QAAA,MAAM,OAAO,GAAG;AACd,YAAA,OAAO,CAAC,SAAS,CAAC,KAAK,IAAI;AACzB,kBAAE,CAAc,WAAA,EAAA,OAAO,CAAC,SAAS,CAAC,KAAK,CAAE;AACzC,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,SAAS,CAAC,OAAO,IAAI;AAC3B,kBAAE,CAAgB,aAAA,EAAA,OAAO,CAAC,SAAS,CAAC,OAAO,CAAE;AAC7C,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,SAAS,CAAC,kBAAkB,IAAI;AACtC,kBAAE,CAA4B,yBAAA,EAAA,OAAO,CAAC,SAAS,CAAC;qBAC7C,GAAG,CAAC,CAAC,CAAC,KAAK,CAAA,EAAA,EAAK,CAAC,CAAA,EAAA,CAAI;qBACrB,IAAI,CAAC,GAAG,CAAC,CAAE;AACd,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,SAAS,CAAC,IAAI,IAAI;AACxB,kBAAE,CAAa,UAAA,EAAA,OAAO,CAAC,SAAS,CAAC,IAAI,CAAE;AACvC,kBAAE,EAAE;YACN,EAAE;AACH,SAAA,CAAC,MAAM,CAAC,OAAO,CAAC;QAEjB,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;;;AAIxC,IAAA,MAAM,aAAa,GAAG,OAAO,CAAC,aAAa,IAAI,EAAE;AACjD,IAAA,IAAI,aAAa,CAAC,MAAM,EAAE;QACxB,UAAU,CAAC,iBAAiB,CAAC;QAE7B,MAAM,QAAQ,GAAa,EAAE;QAC7B,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,KAAI;AAC7B,YAAA,MAAM,aAAa,GAAG;gBACpB,CAAgB,aAAA,EAAA,CAAC,GAAG,CAAC,CAAG,CAAA,CAAA;gBACxB,CAAI,CAAA,EAAA,CAAC,CAAC,QAAQ,CAAG,CAAA,CAAA;gBACjB,CAAG,EAAA,CAAC,CAAC,OAAO,IAAI,IAAI,IAAI,CAAC,CAAC,OAAO,GAAG,CAAA,SAAA,EAAY,CAAC,CAAC,OAAO,EAAE,GAAG,EAAE,CAAE,CAAA;gBAClE,CAAG,EAAA,CAAC,CAAC,KAAK,IAAI,IAAI,IAAI,CAAC,CAAC,KAAK,GAAG,CAAA,OAAA,EAAU,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,CAAE,CAAA;gBAC1D,CAAG,EAAA,CAAC,CAAC,IAAI,IAAI,IAAI,IAAI,CAAC,CAAC,IAAI,GAAG,CAAA,MAAA,EAAS,CAAC,CAAC,IAAI,EAAE,GAAG,EAAE,CAAE,CAAA;gBACtD,EAAE;AACH,aAAA,CAAC,MAAM,CAAC,OAAO,CAAC;YAEjB,QAAQ,CAAC,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC3C,SAAC,CAAC;QAEF,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;;IAGrC,OAAO;QACL,MAAM,EAAE,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE;QACrC,UAAU;KACX;AACH;;;;"}
@@ -101,6 +101,57 @@ function findBestBoundary(text, direction = 'backward') {
101
101
  // No match found, use character boundary
102
102
  return direction === 'backward' ? text.length : 0;
103
103
  }
104
+ /**
105
+ * Tracks references used in a highlight without changing their numbers
106
+ */
107
+ function trackReferencesInHighlight(text, sourceResult // Source containing the original references
108
+ ) {
109
+ // Track used references
110
+ const references = [];
111
+ if (!text || text.length === 0 || !text.includes('#')) {
112
+ return { references }; // Early return
113
+ }
114
+ // Quick check for reference markers
115
+ if (!text.includes('link#') &&
116
+ !text.includes('image#') &&
117
+ !text.includes('video#')) {
118
+ return { references };
119
+ }
120
+ // Get references from the source if available
121
+ const sourceRefs = sourceResult.references || {
122
+ links: [],
123
+ images: [],
124
+ videos: [],
125
+ };
126
+ // Find references but don't modify text
127
+ const refRegex = /\((link|image|video)#(\d+)(?:\s+"([^"]*)")?\)/g;
128
+ let match;
129
+ while ((match = refRegex.exec(text)) !== null) {
130
+ const [, type, indexStr] = match;
131
+ const originalIndex = parseInt(indexStr, 10) - 1; // Convert to 0-based
132
+ // Get the source array for this type
133
+ const refType = type;
134
+ const sourceArray = sourceRefs[`${refType}s`];
135
+ // Skip if invalid reference
136
+ if (!sourceArray ||
137
+ originalIndex < 0 ||
138
+ originalIndex >= sourceArray.length) {
139
+ continue; // Skip invalid references
140
+ }
141
+ // Get original reference
142
+ const reference = sourceArray[originalIndex];
143
+ // Track if not already tracked
144
+ const alreadyTracked = references.some((ref) => ref.type === refType && ref.originalIndex === originalIndex);
145
+ if (!alreadyTracked) {
146
+ references.push({
147
+ type: refType,
148
+ originalIndex,
149
+ reference,
150
+ });
151
+ }
152
+ }
153
+ return { references };
154
+ }
104
155
  /**
105
156
  * Expand highlights in search results using smart boundary detection.
106
157
  *
@@ -110,19 +161,16 @@ function findBestBoundary(text, direction = 'backward') {
110
161
  * @param searchResults - Search results object
111
162
  * @param mainExpandBy - Primary expansion size on each side (default: 300)
112
163
  * @param separatorExpandBy - Additional range to look for separators (default: 150)
113
- * @returns Copy of search results with expanded highlights
164
+ * @returns Copy of search results with expanded highlights and tracked references
114
165
  */
115
166
  function expandHighlights(searchResults, mainExpandBy = 300, separatorExpandBy = 150) {
116
- // 1. Avoid full deep copy - only copy what we modify
167
+ // Avoid deep copy - only copy what we modify
117
168
  const resultCopy = { ...searchResults };
118
- // Only deep copy the relevant arrays
119
- if (resultCopy.organic) {
169
+ if (resultCopy.organic)
120
170
  resultCopy.organic = [...resultCopy.organic];
121
- }
122
- if (resultCopy.topStories) {
171
+ if (resultCopy.topStories)
123
172
  resultCopy.topStories = [...resultCopy.topStories];
124
- }
125
- // 5. Process the results efficiently
173
+ // Process the results efficiently
126
174
  const processResultTypes = ['organic', 'topStories'];
127
175
  for (const resultType of processResultTypes) {
128
176
  if (!resultCopy[resultType])
@@ -141,17 +189,18 @@ function expandHighlights(searchResults, mainExpandBy = 300, separatorExpandBy =
141
189
  const highlights = [];
142
190
  // Process each highlight
143
191
  for (const highlight of result.highlights) {
144
- const highlightText = highlight.text;
145
- let startPos = content.indexOf(highlightText);
146
- let highlightLen = highlightText.length;
192
+ const { references } = trackReferencesInHighlight(highlight.text, result);
193
+ let startPos = content.indexOf(highlight.text);
194
+ let highlightLen = highlight.text.length;
147
195
  if (startPos === -1) {
148
196
  // Try with stripped whitespace
149
- const strippedHighlight = highlightText.trim();
197
+ const strippedHighlight = highlight.text.trim();
150
198
  startPos = content.indexOf(strippedHighlight);
151
199
  if (startPos === -1) {
152
200
  highlights.push({
153
201
  text: highlight.text,
154
202
  score: highlight.score,
203
+ references,
155
204
  });
156
205
  continue;
157
206
  }
@@ -178,9 +227,12 @@ function expandHighlights(searchResults, mainExpandBy = 300, separatorExpandBy =
178
227
  highlights.push({
179
228
  text: expandedHighlightText,
180
229
  score: highlight.score,
230
+ references,
181
231
  });
182
232
  }
183
233
  resultCopy.highlights = highlights;
234
+ delete resultCopy.content;
235
+ delete resultCopy.references;
184
236
  return resultCopy;
185
237
  });
186
238
  }