@librechat/agents 2.4.317 → 2.4.319
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/events.cjs +3 -3
- package/dist/cjs/events.cjs.map +1 -1
- package/dist/cjs/main.cjs +5 -2
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/ids.cjs +23 -0
- package/dist/cjs/messages/ids.cjs.map +1 -0
- package/dist/cjs/stream.cjs +8 -155
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/tools/handlers.cjs +144 -0
- package/dist/cjs/tools/handlers.cjs.map +1 -0
- package/dist/cjs/tools/search/content.cjs +140 -0
- package/dist/cjs/tools/search/content.cjs.map +1 -0
- package/dist/cjs/tools/search/firecrawl.cjs +17 -37
- package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
- package/dist/cjs/tools/search/format.cjs +79 -29
- package/dist/cjs/tools/search/format.cjs.map +1 -1
- package/dist/cjs/tools/search/highlights.cjs +64 -13
- package/dist/cjs/tools/search/highlights.cjs.map +1 -1
- package/dist/cjs/tools/search/search.cjs +13 -15
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +42 -12
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/search/utils.cjs +35 -0
- package/dist/cjs/tools/search/utils.cjs.map +1 -0
- package/dist/esm/events.mjs +1 -1
- package/dist/esm/events.mjs.map +1 -1
- package/dist/esm/main.mjs +3 -1
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/ids.mjs +21 -0
- package/dist/esm/messages/ids.mjs.map +1 -0
- package/dist/esm/stream.mjs +7 -152
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/tools/handlers.mjs +141 -0
- package/dist/esm/tools/handlers.mjs.map +1 -0
- package/dist/esm/tools/search/content.mjs +119 -0
- package/dist/esm/tools/search/content.mjs.map +1 -0
- package/dist/esm/tools/search/firecrawl.mjs +18 -37
- package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
- package/dist/esm/tools/search/format.mjs +79 -29
- package/dist/esm/tools/search/format.mjs.map +1 -1
- package/dist/esm/tools/search/highlights.mjs +64 -13
- package/dist/esm/tools/search/highlights.mjs.map +1 -1
- package/dist/esm/tools/search/search.mjs +12 -14
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +42 -12
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/search/utils.mjs +32 -0
- package/dist/esm/tools/search/utils.mjs.map +1 -0
- package/dist/types/index.d.ts +1 -0
- package/dist/types/messages/ids.d.ts +3 -0
- package/dist/types/messages/index.d.ts +1 -0
- package/dist/types/stream.d.ts +0 -8
- package/dist/types/tools/handlers.d.ts +8 -0
- package/dist/types/tools/search/content.d.ts +4 -0
- package/dist/types/tools/search/firecrawl.d.ts +6 -86
- package/dist/types/tools/search/format.d.ts +4 -1
- package/dist/types/tools/search/highlights.d.ts +1 -1
- package/dist/types/tools/search/search.d.ts +1 -1
- package/dist/types/tools/search/test.d.ts +1 -0
- package/dist/types/tools/search/tool.d.ts +12 -4
- package/dist/types/tools/search/types.d.ts +388 -53
- package/dist/types/tools/search/utils.d.ts +3 -0
- package/package.json +2 -1
- package/src/events.ts +49 -15
- package/src/index.ts +1 -0
- package/src/messages/ids.ts +26 -0
- package/src/messages/index.ts +1 -0
- package/src/scripts/search.ts +5 -3
- package/src/stream.ts +4 -186
- package/src/tools/handlers.ts +167 -0
- package/src/tools/search/content.test.ts +173 -0
- package/src/tools/search/content.ts +147 -0
- package/src/tools/search/firecrawl.ts +27 -144
- package/src/tools/search/format.ts +89 -31
- package/src/tools/search/highlights.ts +99 -17
- package/src/tools/search/output.md +2775 -0
- package/src/tools/search/search.ts +42 -54
- package/src/tools/search/test.html +884 -0
- package/src/tools/search/test.md +643 -0
- package/src/tools/search/test.ts +159 -0
- package/src/tools/search/tool.ts +52 -15
- package/src/tools/search/types.ts +439 -61
- package/src/tools/search/utils.ts +43 -0
|
@@ -101,6 +101,57 @@ function findBestBoundary(text, direction = 'backward') {
|
|
|
101
101
|
// No match found, use character boundary
|
|
102
102
|
return direction === 'backward' ? text.length : 0;
|
|
103
103
|
}
|
|
104
|
+
/**
|
|
105
|
+
* Tracks references used in a highlight without changing their numbers
|
|
106
|
+
*/
|
|
107
|
+
function trackReferencesInHighlight(text, sourceResult // Source containing the original references
|
|
108
|
+
) {
|
|
109
|
+
// Track used references
|
|
110
|
+
const references = [];
|
|
111
|
+
if (!text || text.length === 0 || !text.includes('#')) {
|
|
112
|
+
return { references }; // Early return
|
|
113
|
+
}
|
|
114
|
+
// Quick check for reference markers
|
|
115
|
+
if (!text.includes('link#') &&
|
|
116
|
+
!text.includes('image#') &&
|
|
117
|
+
!text.includes('video#')) {
|
|
118
|
+
return { references };
|
|
119
|
+
}
|
|
120
|
+
// Get references from the source if available
|
|
121
|
+
const sourceRefs = sourceResult.references || {
|
|
122
|
+
links: [],
|
|
123
|
+
images: [],
|
|
124
|
+
videos: [],
|
|
125
|
+
};
|
|
126
|
+
// Find references but don't modify text
|
|
127
|
+
const refRegex = /\((link|image|video)#(\d+)(?:\s+"([^"]*)")?\)/g;
|
|
128
|
+
let match;
|
|
129
|
+
while ((match = refRegex.exec(text)) !== null) {
|
|
130
|
+
const [, type, indexStr] = match;
|
|
131
|
+
const originalIndex = parseInt(indexStr, 10) - 1; // Convert to 0-based
|
|
132
|
+
// Get the source array for this type
|
|
133
|
+
const refType = type;
|
|
134
|
+
const sourceArray = sourceRefs[`${refType}s`];
|
|
135
|
+
// Skip if invalid reference
|
|
136
|
+
if (!sourceArray ||
|
|
137
|
+
originalIndex < 0 ||
|
|
138
|
+
originalIndex >= sourceArray.length) {
|
|
139
|
+
continue; // Skip invalid references
|
|
140
|
+
}
|
|
141
|
+
// Get original reference
|
|
142
|
+
const reference = sourceArray[originalIndex];
|
|
143
|
+
// Track if not already tracked
|
|
144
|
+
const alreadyTracked = references.some((ref) => ref.type === refType && ref.originalIndex === originalIndex);
|
|
145
|
+
if (!alreadyTracked) {
|
|
146
|
+
references.push({
|
|
147
|
+
type: refType,
|
|
148
|
+
originalIndex,
|
|
149
|
+
reference,
|
|
150
|
+
});
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
return { references };
|
|
154
|
+
}
|
|
104
155
|
/**
|
|
105
156
|
* Expand highlights in search results using smart boundary detection.
|
|
106
157
|
*
|
|
@@ -110,19 +161,16 @@ function findBestBoundary(text, direction = 'backward') {
|
|
|
110
161
|
* @param searchResults - Search results object
|
|
111
162
|
* @param mainExpandBy - Primary expansion size on each side (default: 300)
|
|
112
163
|
* @param separatorExpandBy - Additional range to look for separators (default: 150)
|
|
113
|
-
* @returns Copy of search results with expanded highlights
|
|
164
|
+
* @returns Copy of search results with expanded highlights and tracked references
|
|
114
165
|
*/
|
|
115
166
|
function expandHighlights(searchResults, mainExpandBy = 300, separatorExpandBy = 150) {
|
|
116
|
-
//
|
|
167
|
+
// Avoid deep copy - only copy what we modify
|
|
117
168
|
const resultCopy = { ...searchResults };
|
|
118
|
-
|
|
119
|
-
if (resultCopy.organic) {
|
|
169
|
+
if (resultCopy.organic)
|
|
120
170
|
resultCopy.organic = [...resultCopy.organic];
|
|
121
|
-
|
|
122
|
-
if (resultCopy.topStories) {
|
|
171
|
+
if (resultCopy.topStories)
|
|
123
172
|
resultCopy.topStories = [...resultCopy.topStories];
|
|
124
|
-
|
|
125
|
-
// 5. Process the results efficiently
|
|
173
|
+
// Process the results efficiently
|
|
126
174
|
const processResultTypes = ['organic', 'topStories'];
|
|
127
175
|
for (const resultType of processResultTypes) {
|
|
128
176
|
if (!resultCopy[resultType])
|
|
@@ -141,17 +189,18 @@ function expandHighlights(searchResults, mainExpandBy = 300, separatorExpandBy =
|
|
|
141
189
|
const highlights = [];
|
|
142
190
|
// Process each highlight
|
|
143
191
|
for (const highlight of result.highlights) {
|
|
144
|
-
const
|
|
145
|
-
let startPos = content.indexOf(
|
|
146
|
-
let highlightLen =
|
|
192
|
+
const { references } = trackReferencesInHighlight(highlight.text, result);
|
|
193
|
+
let startPos = content.indexOf(highlight.text);
|
|
194
|
+
let highlightLen = highlight.text.length;
|
|
147
195
|
if (startPos === -1) {
|
|
148
196
|
// Try with stripped whitespace
|
|
149
|
-
const strippedHighlight =
|
|
197
|
+
const strippedHighlight = highlight.text.trim();
|
|
150
198
|
startPos = content.indexOf(strippedHighlight);
|
|
151
199
|
if (startPos === -1) {
|
|
152
200
|
highlights.push({
|
|
153
201
|
text: highlight.text,
|
|
154
202
|
score: highlight.score,
|
|
203
|
+
references,
|
|
155
204
|
});
|
|
156
205
|
continue;
|
|
157
206
|
}
|
|
@@ -178,10 +227,12 @@ function expandHighlights(searchResults, mainExpandBy = 300, separatorExpandBy =
|
|
|
178
227
|
highlights.push({
|
|
179
228
|
text: expandedHighlightText,
|
|
180
229
|
score: highlight.score,
|
|
230
|
+
references,
|
|
181
231
|
});
|
|
182
232
|
}
|
|
183
|
-
delete resultCopy.content;
|
|
184
233
|
resultCopy.highlights = highlights;
|
|
234
|
+
delete resultCopy.content;
|
|
235
|
+
delete resultCopy.references;
|
|
185
236
|
return resultCopy;
|
|
186
237
|
});
|
|
187
238
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"highlights.mjs","sources":["../../../../src/tools/search/highlights.ts"],"sourcesContent":["import type * as t from './types';\n\n// 2. Pre-compile all regular expressions (only do this once)\n// Group patterns by priority for early returns\nconst priorityPatterns = [\n // High priority patterns (structural)\n [\n { regex: /\\n\\n/g }, // Double newline (paragraph break)\n { regex: /\\n/g }, // Single newline\n { regex: /={3,}\\s*\\n|-{3,}\\s*\\n/g }, // Section separators\n ],\n // Medium priority (semantic)\n [\n { regex: /[.!?][\")\\]]?\\s/g }, // End of sentence\n { regex: /;\\s/g }, // Semicolon\n { regex: /:\\s/g }, // Colon\n ],\n // Low priority (any breaks)\n [\n { regex: /,\\s/g }, // Comma\n { regex: /\\s-\\s/g }, // Dash surrounded by spaces\n { regex: /\\s/g }, // Any space\n ],\n];\n\nfunction findFirstMatch(text: string, regex: RegExp): number {\n // Reset regex\n regex.lastIndex = 0;\n\n // For very long texts, try chunking\n if (text.length > 10000) {\n const chunkSize = 2000;\n let position = 0;\n\n while (position < text.length) {\n const chunk = text.substring(position, position + chunkSize);\n regex.lastIndex = 0;\n\n const match = regex.exec(chunk);\n if (match) {\n return position + match.index;\n }\n\n // Move to next chunk with some overlap\n position += chunkSize - 100;\n if (position >= text.length) break;\n }\n return -1;\n }\n\n // For shorter texts, normal regex search\n const match = regex.exec(text);\n return match ? match.index : -1;\n}\n\n// 3. Optimized boundary finding functions\nfunction findLastMatch(text: string, regex: RegExp): number {\n // Reset regex state\n regex.lastIndex = 0;\n\n let lastIndex = -1;\n let lastLength = 0;\n let match;\n\n // For very long texts, use a different approach to avoid regex engine slowdowns\n if (text.length > 10000) {\n // Try dividing the text into chunks for faster processing\n const chunkSize = 2000;\n let startPosition = Math.max(0, text.length - chunkSize);\n\n while (startPosition >= 0) {\n const chunk = text.substring(startPosition, startPosition + chunkSize);\n regex.lastIndex = 0;\n\n let chunkLastIndex = -1;\n let chunkLastLength = 0;\n\n while ((match = regex.exec(chunk)) !== null) {\n chunkLastIndex = match.index;\n chunkLastLength = match[0].length;\n }\n\n if (chunkLastIndex !== -1) {\n return startPosition + chunkLastIndex + chunkLastLength;\n }\n\n // Move to previous chunk with some overlap\n startPosition = Math.max(0, startPosition - chunkSize + 100) - 1;\n if (startPosition <= 0) break;\n }\n return -1;\n }\n\n // For shorter texts, normal regex search\n while ((match = regex.exec(text)) !== null) {\n lastIndex = match.index;\n lastLength = match[0].length;\n }\n\n return lastIndex === -1 ? -1 : lastIndex + lastLength;\n}\n\n// 4. Find the best boundary with priority groups\nfunction findBestBoundary(text: string, direction = 'backward'): number {\n if (!text || text.length === 0) return 0;\n\n // Try each priority group\n for (const patternGroup of priorityPatterns) {\n for (const pattern of patternGroup) {\n const position =\n direction === 'backward'\n ? findLastMatch(text, pattern.regex)\n : findFirstMatch(text, pattern.regex);\n\n if (position !== -1) {\n return position;\n }\n }\n }\n\n // No match found, use character boundary\n return direction === 'backward' ? text.length : 0;\n}\n\n/**\n * Expand highlights in search results using smart boundary detection.\n *\n * This implementation finds natural text boundaries like paragraphs, sentences,\n * and phrases to provide context while maintaining readability.\n *\n * @param searchResults - Search results object\n * @param mainExpandBy - Primary expansion size on each side (default: 300)\n * @param separatorExpandBy - Additional range to look for separators (default: 150)\n * @returns Copy of search results with expanded highlights\n */\nexport function expandHighlights(\n searchResults: t.SearchResultData,\n mainExpandBy = 300,\n separatorExpandBy = 150\n): t.SearchResultData {\n // 1. Avoid full deep copy - only copy what we modify\n const resultCopy = { ...searchResults };\n\n // Only deep copy the relevant arrays\n if (resultCopy.organic) {\n resultCopy.organic = [...resultCopy.organic];\n }\n if (resultCopy.topStories) {\n resultCopy.topStories = [...resultCopy.topStories];\n }\n\n // 5. Process the results efficiently\n const processResultTypes = ['organic', 'topStories'] as const;\n\n for (const resultType of processResultTypes) {\n if (!resultCopy[resultType as 'organic' | 'topStories']) continue;\n\n // Map results to new array with modified highlights\n resultCopy[resultType] = resultCopy[resultType]?.map((result) => {\n if (\n result.content == null ||\n result.content === '' ||\n !result.highlights ||\n result.highlights.length === 0\n ) {\n return result; // No modification needed\n }\n\n // Create a shallow copy with expanded highlights\n const resultCopy = { ...result };\n const content = result.content;\n const highlights = [];\n\n // Process each highlight\n for (const highlight of result.highlights) {\n const highlightText = highlight.text;\n\n let startPos = content.indexOf(highlightText);\n let highlightLen = highlightText.length;\n\n if (startPos === -1) {\n // Try with stripped whitespace\n const strippedHighlight = highlightText.trim();\n startPos = content.indexOf(strippedHighlight);\n\n if (startPos === -1) {\n highlights.push({\n text: highlight.text,\n score: highlight.score,\n });\n continue;\n }\n highlightLen = strippedHighlight.length;\n }\n\n // Calculate boundaries\n const mainStart = Math.max(0, startPos - mainExpandBy);\n const mainEnd = Math.min(\n content.length,\n startPos + highlightLen + mainExpandBy\n );\n\n const separatorStart = Math.max(0, mainStart - separatorExpandBy);\n const separatorEnd = Math.min(\n content.length,\n mainEnd + separatorExpandBy\n );\n\n // Extract text segments\n const headText = content.substring(separatorStart, mainStart);\n const tailText = content.substring(mainEnd, separatorEnd);\n\n // Find natural boundaries\n const bestHeadBoundary = findBestBoundary(headText, 'backward');\n const bestTailBoundary = findBestBoundary(tailText, 'forward');\n\n // Calculate final positions\n const finalStart = separatorStart + bestHeadBoundary;\n const finalEnd = mainEnd + bestTailBoundary;\n\n // Extract the expanded highlight\n const expandedHighlightText = content\n .substring(finalStart, finalEnd)\n .trim();\n highlights.push({\n text: expandedHighlightText,\n score: highlight.score,\n });\n }\n\n delete resultCopy.content;\n resultCopy.highlights = highlights;\n return resultCopy;\n });\n }\n\n return resultCopy;\n}\n"],"names":[],"mappings":"AAEA;AACA;AACA,MAAM,gBAAgB,GAAG;;AAEvB,IAAA;AACE,QAAA,EAAE,KAAK,EAAE,OAAO,EAAE;AAClB,QAAA,EAAE,KAAK,EAAE,KAAK,EAAE;AAChB,QAAA,EAAE,KAAK,EAAE,wBAAwB,EAAE;AACpC,KAAA;;AAED,IAAA;AACE,QAAA,EAAE,KAAK,EAAE,iBAAiB,EAAE;AAC5B,QAAA,EAAE,KAAK,EAAE,MAAM,EAAE;AACjB,QAAA,EAAE,KAAK,EAAE,MAAM,EAAE;AAClB,KAAA;;AAED,IAAA;AACE,QAAA,EAAE,KAAK,EAAE,MAAM,EAAE;AACjB,QAAA,EAAE,KAAK,EAAE,QAAQ,EAAE;AACnB,QAAA,EAAE,KAAK,EAAE,KAAK,EAAE;AACjB,KAAA;CACF;AAED,SAAS,cAAc,CAAC,IAAY,EAAE,KAAa,EAAA;;AAEjD,IAAA,KAAK,CAAC,SAAS,GAAG,CAAC;;AAGnB,IAAA,IAAI,IAAI,CAAC,MAAM,GAAG,KAAK,EAAE;QACvB,MAAM,SAAS,GAAG,IAAI;QACtB,IAAI,QAAQ,GAAG,CAAC;AAEhB,QAAA,OAAO,QAAQ,GAAG,IAAI,CAAC,MAAM,EAAE;AAC7B,YAAA,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,QAAQ,GAAG,SAAS,CAAC;AAC5D,YAAA,KAAK,CAAC,SAAS,GAAG,CAAC;YAEnB,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC;YAC/B,IAAI,KAAK,EAAE;AACT,gBAAA,OAAO,QAAQ,GAAG,KAAK,CAAC,KAAK;;;AAI/B,YAAA,QAAQ,IAAI,SAAS,GAAG,GAAG;AAC3B,YAAA,IAAI,QAAQ,IAAI,IAAI,CAAC,MAAM;gBAAE;;QAE/B,OAAO,EAAE;;;IAIX,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;AAC9B,IAAA,OAAO,KAAK,GAAG,KAAK,CAAC,KAAK,GAAG,EAAE;AACjC;AAEA;AACA,SAAS,aAAa,CAAC,IAAY,EAAE,KAAa,EAAA;;AAEhD,IAAA,KAAK,CAAC,SAAS,GAAG,CAAC;AAEnB,IAAA,IAAI,SAAS,GAAG,EAAE;IAClB,IAAI,UAAU,GAAG,CAAC;AAClB,IAAA,IAAI,KAAK;;AAGT,IAAA,IAAI,IAAI,CAAC,MAAM,GAAG,KAAK,EAAE;;QAEvB,MAAM,SAAS,GAAG,IAAI;AACtB,QAAA,IAAI,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,GAAG,SAAS,CAAC;AAExD,QAAA,OAAO,aAAa,IAAI,CAAC,EAAE;AACzB,YAAA,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,aAAa,EAAE,aAAa,GAAG,SAAS,CAAC;AACtE,YAAA,KAAK,CAAC,SAAS,GAAG,CAAC;AAEnB,YAAA,IAAI,cAAc,GAAG,EAAE;YACvB,IAAI,eAAe,GAAG,CAAC;AAEvB,YAAA,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,IAAI,EAAE;AAC3C,gBAAA,cAAc,GAAG,KAAK,CAAC,KAAK;AAC5B,gBAAA,eAAe,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM;;AAGnC,YAAA,IAAI,cAAc,KAAK,EAAE,EAAE;AACzB,gBAAA,OAAO,aAAa,GAAG,cAAc,GAAG,eAAe;;;AAIzD,YAAA,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,aAAa,GAAG,SAAS,GAAG,GAAG,CAAC,GAAG,CAAC;YAChE,IAAI,aAAa,IAAI,CAAC;gBAAE;;QAE1B,OAAO,EAAE;;;AAIX,IAAA,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,IAAI,EAAE;AAC1C,QAAA,SAAS,GAAG,KAAK,CAAC,KAAK;AACvB,QAAA,UAAU,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM;;AAG9B,IAAA,OAAO,SAAS,KAAK,EAAE,GAAG,EAAE,GAAG,SAAS,GAAG,UAAU;AACvD;AAEA;AACA,SAAS,gBAAgB,CAAC,IAAY,EAAE,SAAS,GAAG,UAAU,EAAA;AAC5D,IAAA,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;AAAE,QAAA,OAAO,CAAC;;AAGxC,IAAA,KAAK,MAAM,YAAY,IAAI,gBAAgB,EAAE;AAC3C,QAAA,KAAK,MAAM,OAAO,IAAI,YAAY,EAAE;AAClC,YAAA,MAAM,QAAQ,GACZ,SAAS,KAAK;kBACV,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,KAAK;kBACjC,cAAc,CAAC,IAAI,EAAE,OAAO,CAAC,KAAK,CAAC;AAEzC,YAAA,IAAI,QAAQ,KAAK,EAAE,EAAE;AACnB,gBAAA,OAAO,QAAQ;;;;;AAMrB,IAAA,OAAO,SAAS,KAAK,UAAU,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC;AACnD;AAEA;;;;;;;;;;AAUG;AACG,SAAU,gBAAgB,CAC9B,aAAiC,EACjC,YAAY,GAAG,GAAG,EAClB,iBAAiB,GAAG,GAAG,EAAA;;AAGvB,IAAA,MAAM,UAAU,GAAG,EAAE,GAAG,aAAa,EAAE;;AAGvC,IAAA,IAAI,UAAU,CAAC,OAAO,EAAE;QACtB,UAAU,CAAC,OAAO,GAAG,CAAC,GAAG,UAAU,CAAC,OAAO,CAAC;;AAE9C,IAAA,IAAI,UAAU,CAAC,UAAU,EAAE;QACzB,UAAU,CAAC,UAAU,GAAG,CAAC,GAAG,UAAU,CAAC,UAAU,CAAC;;;AAIpD,IAAA,MAAM,kBAAkB,GAAG,CAAC,SAAS,EAAE,YAAY,CAAU;AAE7D,IAAA,KAAK,MAAM,UAAU,IAAI,kBAAkB,EAAE;AAC3C,QAAA,IAAI,CAAC,UAAU,CAAC,UAAsC,CAAC;YAAE;;AAGzD,QAAA,UAAU,CAAC,UAAU,CAAC,GAAG,UAAU,CAAC,UAAU,CAAC,EAAE,GAAG,CAAC,CAAC,MAAM,KAAI;AAC9D,YAAA,IACE,MAAM,CAAC,OAAO,IAAI,IAAI;gBACtB,MAAM,CAAC,OAAO,KAAK,EAAE;gBACrB,CAAC,MAAM,CAAC,UAAU;AAClB,gBAAA,MAAM,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,EAC9B;gBACA,OAAO,MAAM,CAAC;;;AAIhB,YAAA,MAAM,UAAU,GAAG,EAAE,GAAG,MAAM,EAAE;AAChC,YAAA,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO;YAC9B,MAAM,UAAU,GAAG,EAAE;;AAGrB,YAAA,KAAK,MAAM,SAAS,IAAI,MAAM,CAAC,UAAU,EAAE;AACzC,gBAAA,MAAM,aAAa,GAAG,SAAS,CAAC,IAAI;gBAEpC,IAAI,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,aAAa,CAAC;AAC7C,gBAAA,IAAI,YAAY,GAAG,aAAa,CAAC,MAAM;AAEvC,gBAAA,IAAI,QAAQ,KAAK,EAAE,EAAE;;AAEnB,oBAAA,MAAM,iBAAiB,GAAG,aAAa,CAAC,IAAI,EAAE;AAC9C,oBAAA,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,iBAAiB,CAAC;AAE7C,oBAAA,IAAI,QAAQ,KAAK,EAAE,EAAE;wBACnB,UAAU,CAAC,IAAI,CAAC;4BACd,IAAI,EAAE,SAAS,CAAC,IAAI;4BACpB,KAAK,EAAE,SAAS,CAAC,KAAK;AACvB,yBAAA,CAAC;wBACF;;AAEF,oBAAA,YAAY,GAAG,iBAAiB,CAAC,MAAM;;;AAIzC,gBAAA,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,GAAG,YAAY,CAAC;AACtD,gBAAA,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CACtB,OAAO,CAAC,MAAM,EACd,QAAQ,GAAG,YAAY,GAAG,YAAY,CACvC;AAED,gBAAA,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,GAAG,iBAAiB,CAAC;AACjE,gBAAA,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAC3B,OAAO,CAAC,MAAM,EACd,OAAO,GAAG,iBAAiB,CAC5B;;gBAGD,MAAM,QAAQ,GAAG,OAAO,CAAC,SAAS,CAAC,cAAc,EAAE,SAAS,CAAC;gBAC7D,MAAM,QAAQ,GAAG,OAAO,CAAC,SAAS,CAAC,OAAO,EAAE,YAAY,CAAC;;gBAGzD,MAAM,gBAAgB,GAAG,gBAAgB,CAAC,QAAQ,EAAE,UAAU,CAAC;gBAC/D,MAAM,gBAAgB,GAAG,gBAAgB,CAAC,QAAQ,EAAE,SAAS,CAAC;;AAG9D,gBAAA,MAAM,UAAU,GAAG,cAAc,GAAG,gBAAgB;AACpD,gBAAA,MAAM,QAAQ,GAAG,OAAO,GAAG,gBAAgB;;gBAG3C,MAAM,qBAAqB,GAAG;AAC3B,qBAAA,SAAS,CAAC,UAAU,EAAE,QAAQ;AAC9B,qBAAA,IAAI,EAAE;gBACT,UAAU,CAAC,IAAI,CAAC;AACd,oBAAA,IAAI,EAAE,qBAAqB;oBAC3B,KAAK,EAAE,SAAS,CAAC,KAAK;AACvB,iBAAA,CAAC;;YAGJ,OAAO,UAAU,CAAC,OAAO;AACzB,YAAA,UAAU,CAAC,UAAU,GAAG,UAAU;AAClC,YAAA,OAAO,UAAU;AACnB,SAAC,CAAC;;AAGJ,IAAA,OAAO,UAAU;AACnB;;;;"}
|
|
1
|
+
{"version":3,"file":"highlights.mjs","sources":["../../../../src/tools/search/highlights.ts"],"sourcesContent":["import type * as t from './types';\n\n// 2. Pre-compile all regular expressions (only do this once)\n// Group patterns by priority for early returns\nconst priorityPatterns = [\n // High priority patterns (structural)\n [\n { regex: /\\n\\n/g }, // Double newline (paragraph break)\n { regex: /\\n/g }, // Single newline\n { regex: /={3,}\\s*\\n|-{3,}\\s*\\n/g }, // Section separators\n ],\n // Medium priority (semantic)\n [\n { regex: /[.!?][\")\\]]?\\s/g }, // End of sentence\n { regex: /;\\s/g }, // Semicolon\n { regex: /:\\s/g }, // Colon\n ],\n // Low priority (any breaks)\n [\n { regex: /,\\s/g }, // Comma\n { regex: /\\s-\\s/g }, // Dash surrounded by spaces\n { regex: /\\s/g }, // Any space\n ],\n];\n\nfunction findFirstMatch(text: string, regex: RegExp): number {\n // Reset regex\n regex.lastIndex = 0;\n\n // For very long texts, try chunking\n if (text.length > 10000) {\n const chunkSize = 2000;\n let position = 0;\n\n while (position < text.length) {\n const chunk = text.substring(position, position + chunkSize);\n regex.lastIndex = 0;\n\n const match = regex.exec(chunk);\n if (match) {\n return position + match.index;\n }\n\n // Move to next chunk with some overlap\n position += chunkSize - 100;\n if (position >= text.length) break;\n }\n return -1;\n }\n\n // For shorter texts, normal regex search\n const match = regex.exec(text);\n return match ? match.index : -1;\n}\n\n// 3. Optimized boundary finding functions\nfunction findLastMatch(text: string, regex: RegExp): number {\n // Reset regex state\n regex.lastIndex = 0;\n\n let lastIndex = -1;\n let lastLength = 0;\n let match;\n\n // For very long texts, use a different approach to avoid regex engine slowdowns\n if (text.length > 10000) {\n // Try dividing the text into chunks for faster processing\n const chunkSize = 2000;\n let startPosition = Math.max(0, text.length - chunkSize);\n\n while (startPosition >= 0) {\n const chunk = text.substring(startPosition, startPosition + chunkSize);\n regex.lastIndex = 0;\n\n let chunkLastIndex = -1;\n let chunkLastLength = 0;\n\n while ((match = regex.exec(chunk)) !== null) {\n chunkLastIndex = match.index;\n chunkLastLength = match[0].length;\n }\n\n if (chunkLastIndex !== -1) {\n return startPosition + chunkLastIndex + chunkLastLength;\n }\n\n // Move to previous chunk with some overlap\n startPosition = Math.max(0, startPosition - chunkSize + 100) - 1;\n if (startPosition <= 0) break;\n }\n return -1;\n }\n\n // For shorter texts, normal regex search\n while ((match = regex.exec(text)) !== null) {\n lastIndex = match.index;\n lastLength = match[0].length;\n }\n\n return lastIndex === -1 ? -1 : lastIndex + lastLength;\n}\n\n// 4. Find the best boundary with priority groups\nfunction findBestBoundary(text: string, direction = 'backward'): number {\n if (!text || text.length === 0) return 0;\n\n // Try each priority group\n for (const patternGroup of priorityPatterns) {\n for (const pattern of patternGroup) {\n const position =\n direction === 'backward'\n ? findLastMatch(text, pattern.regex)\n : findFirstMatch(text, pattern.regex);\n\n if (position !== -1) {\n return position;\n }\n }\n }\n\n // No match found, use character boundary\n return direction === 'backward' ? text.length : 0;\n}\n\n/**\n * Tracks references used in a highlight without changing their numbers\n */\nfunction trackReferencesInHighlight(\n text: string,\n sourceResult: t.ValidSource // Source containing the original references\n): {\n references: {\n type: 'link' | 'image' | 'video';\n originalIndex: number;\n reference: t.MediaReference; // Original reference object\n }[];\n} {\n // Track used references\n const references: {\n type: 'link' | 'image' | 'video';\n originalIndex: number;\n reference: t.MediaReference;\n }[] = [];\n\n if (!text || text.length === 0 || !text.includes('#')) {\n return { references }; // Early return\n }\n\n // Quick check for reference markers\n if (\n !text.includes('link#') &&\n !text.includes('image#') &&\n !text.includes('video#')\n ) {\n return { references };\n }\n\n // Get references from the source if available\n const sourceRefs = sourceResult.references || {\n links: [],\n images: [],\n videos: [],\n };\n\n // Find references but don't modify text\n const refRegex = /\\((link|image|video)#(\\d+)(?:\\s+\"([^\"]*)\")?\\)/g;\n let match;\n\n while ((match = refRegex.exec(text)) !== null) {\n const [, type, indexStr] = match;\n const originalIndex = parseInt(indexStr, 10) - 1; // Convert to 0-based\n\n // Get the source array for this type\n const refType = type as 'link' | 'image' | 'video';\n const sourceArray = sourceRefs[`${refType}s`] as\n | t.MediaReference[]\n | undefined;\n\n // Skip if invalid reference\n if (\n !sourceArray ||\n originalIndex < 0 ||\n originalIndex >= sourceArray.length\n ) {\n continue; // Skip invalid references\n }\n\n // Get original reference\n const reference = sourceArray[originalIndex];\n\n // Track if not already tracked\n const alreadyTracked = references.some(\n (ref) => ref.type === refType && ref.originalIndex === originalIndex\n );\n\n if (!alreadyTracked) {\n references.push({\n type: refType,\n originalIndex,\n reference,\n });\n }\n }\n\n return { references };\n}\n\n/**\n * Expand highlights in search results using smart boundary detection.\n *\n * This implementation finds natural text boundaries like paragraphs, sentences,\n * and phrases to provide context while maintaining readability.\n *\n * @param searchResults - Search results object\n * @param mainExpandBy - Primary expansion size on each side (default: 300)\n * @param separatorExpandBy - Additional range to look for separators (default: 150)\n * @returns Copy of search results with expanded highlights and tracked references\n */\nexport function expandHighlights(\n searchResults: t.SearchResultData,\n mainExpandBy = 300,\n separatorExpandBy = 150\n): t.SearchResultData {\n // Avoid deep copy - only copy what we modify\n const resultCopy = { ...searchResults };\n if (resultCopy.organic) resultCopy.organic = [...resultCopy.organic];\n if (resultCopy.topStories) resultCopy.topStories = [...resultCopy.topStories];\n\n // Process the results efficiently\n const processResultTypes = ['organic', 'topStories'] as const;\n\n for (const resultType of processResultTypes) {\n if (!resultCopy[resultType as 'organic' | 'topStories']) continue;\n\n // Map results to new array with modified highlights\n resultCopy[resultType] = resultCopy[resultType]?.map((result) => {\n if (\n result.content == null ||\n result.content === '' ||\n !result.highlights ||\n result.highlights.length === 0\n ) {\n return result; // No modification needed\n }\n\n // Create a shallow copy with expanded highlights\n const resultCopy = { ...result };\n const content = result.content;\n const highlights = [];\n // Process each highlight\n for (const highlight of result.highlights) {\n const { references } = trackReferencesInHighlight(\n highlight.text,\n result\n );\n\n let startPos = content.indexOf(highlight.text);\n let highlightLen = highlight.text.length;\n\n if (startPos === -1) {\n // Try with stripped whitespace\n const strippedHighlight = highlight.text.trim();\n startPos = content.indexOf(strippedHighlight);\n\n if (startPos === -1) {\n highlights.push({\n text: highlight.text,\n score: highlight.score,\n references,\n });\n continue;\n }\n highlightLen = strippedHighlight.length;\n }\n\n // Calculate boundaries\n const mainStart = Math.max(0, startPos - mainExpandBy);\n const mainEnd = Math.min(\n content.length,\n startPos + highlightLen + mainExpandBy\n );\n\n const separatorStart = Math.max(0, mainStart - separatorExpandBy);\n const separatorEnd = Math.min(\n content.length,\n mainEnd + separatorExpandBy\n );\n\n // Extract text segments\n const headText = content.substring(separatorStart, mainStart);\n const tailText = content.substring(mainEnd, separatorEnd);\n\n // Find natural boundaries\n const bestHeadBoundary = findBestBoundary(headText, 'backward');\n const bestTailBoundary = findBestBoundary(tailText, 'forward');\n\n // Calculate final positions\n const finalStart = separatorStart + bestHeadBoundary;\n const finalEnd = mainEnd + bestTailBoundary;\n\n // Extract the expanded highlight\n const expandedHighlightText = content\n .substring(finalStart, finalEnd)\n .trim();\n highlights.push({\n text: expandedHighlightText,\n score: highlight.score,\n references,\n });\n }\n\n resultCopy.highlights = highlights;\n delete resultCopy.content;\n delete resultCopy.references;\n return resultCopy;\n });\n }\n\n return resultCopy;\n}\n"],"names":[],"mappings":"AAEA;AACA;AACA,MAAM,gBAAgB,GAAG;;AAEvB,IAAA;AACE,QAAA,EAAE,KAAK,EAAE,OAAO,EAAE;AAClB,QAAA,EAAE,KAAK,EAAE,KAAK,EAAE;AAChB,QAAA,EAAE,KAAK,EAAE,wBAAwB,EAAE;AACpC,KAAA;;AAED,IAAA;AACE,QAAA,EAAE,KAAK,EAAE,iBAAiB,EAAE;AAC5B,QAAA,EAAE,KAAK,EAAE,MAAM,EAAE;AACjB,QAAA,EAAE,KAAK,EAAE,MAAM,EAAE;AAClB,KAAA;;AAED,IAAA;AACE,QAAA,EAAE,KAAK,EAAE,MAAM,EAAE;AACjB,QAAA,EAAE,KAAK,EAAE,QAAQ,EAAE;AACnB,QAAA,EAAE,KAAK,EAAE,KAAK,EAAE;AACjB,KAAA;CACF;AAED,SAAS,cAAc,CAAC,IAAY,EAAE,KAAa,EAAA;;AAEjD,IAAA,KAAK,CAAC,SAAS,GAAG,CAAC;;AAGnB,IAAA,IAAI,IAAI,CAAC,MAAM,GAAG,KAAK,EAAE;QACvB,MAAM,SAAS,GAAG,IAAI;QACtB,IAAI,QAAQ,GAAG,CAAC;AAEhB,QAAA,OAAO,QAAQ,GAAG,IAAI,CAAC,MAAM,EAAE;AAC7B,YAAA,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,QAAQ,GAAG,SAAS,CAAC;AAC5D,YAAA,KAAK,CAAC,SAAS,GAAG,CAAC;YAEnB,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC;YAC/B,IAAI,KAAK,EAAE;AACT,gBAAA,OAAO,QAAQ,GAAG,KAAK,CAAC,KAAK;;;AAI/B,YAAA,QAAQ,IAAI,SAAS,GAAG,GAAG;AAC3B,YAAA,IAAI,QAAQ,IAAI,IAAI,CAAC,MAAM;gBAAE;;QAE/B,OAAO,EAAE;;;IAIX,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;AAC9B,IAAA,OAAO,KAAK,GAAG,KAAK,CAAC,KAAK,GAAG,EAAE;AACjC;AAEA;AACA,SAAS,aAAa,CAAC,IAAY,EAAE,KAAa,EAAA;;AAEhD,IAAA,KAAK,CAAC,SAAS,GAAG,CAAC;AAEnB,IAAA,IAAI,SAAS,GAAG,EAAE;IAClB,IAAI,UAAU,GAAG,CAAC;AAClB,IAAA,IAAI,KAAK;;AAGT,IAAA,IAAI,IAAI,CAAC,MAAM,GAAG,KAAK,EAAE;;QAEvB,MAAM,SAAS,GAAG,IAAI;AACtB,QAAA,IAAI,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,GAAG,SAAS,CAAC;AAExD,QAAA,OAAO,aAAa,IAAI,CAAC,EAAE;AACzB,YAAA,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,aAAa,EAAE,aAAa,GAAG,SAAS,CAAC;AACtE,YAAA,KAAK,CAAC,SAAS,GAAG,CAAC;AAEnB,YAAA,IAAI,cAAc,GAAG,EAAE;YACvB,IAAI,eAAe,GAAG,CAAC;AAEvB,YAAA,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,IAAI,EAAE;AAC3C,gBAAA,cAAc,GAAG,KAAK,CAAC,KAAK;AAC5B,gBAAA,eAAe,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM;;AAGnC,YAAA,IAAI,cAAc,KAAK,EAAE,EAAE;AACzB,gBAAA,OAAO,aAAa,GAAG,cAAc,GAAG,eAAe;;;AAIzD,YAAA,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,aAAa,GAAG,SAAS,GAAG,GAAG,CAAC,GAAG,CAAC;YAChE,IAAI,aAAa,IAAI,CAAC;gBAAE;;QAE1B,OAAO,EAAE;;;AAIX,IAAA,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,IAAI,EAAE;AAC1C,QAAA,SAAS,GAAG,KAAK,CAAC,KAAK;AACvB,QAAA,UAAU,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM;;AAG9B,IAAA,OAAO,SAAS,KAAK,EAAE,GAAG,EAAE,GAAG,SAAS,GAAG,UAAU;AACvD;AAEA;AACA,SAAS,gBAAgB,CAAC,IAAY,EAAE,SAAS,GAAG,UAAU,EAAA;AAC5D,IAAA,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;AAAE,QAAA,OAAO,CAAC;;AAGxC,IAAA,KAAK,MAAM,YAAY,IAAI,gBAAgB,EAAE;AAC3C,QAAA,KAAK,MAAM,OAAO,IAAI,YAAY,EAAE;AAClC,YAAA,MAAM,QAAQ,GACZ,SAAS,KAAK;kBACV,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,KAAK;kBACjC,cAAc,CAAC,IAAI,EAAE,OAAO,CAAC,KAAK,CAAC;AAEzC,YAAA,IAAI,QAAQ,KAAK,EAAE,EAAE;AACnB,gBAAA,OAAO,QAAQ;;;;;AAMrB,IAAA,OAAO,SAAS,KAAK,UAAU,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC;AACnD;AAEA;;AAEG;AACH,SAAS,0BAA0B,CACjC,IAAY,EACZ,YAA2B;;;IAS3B,MAAM,UAAU,GAIV,EAAE;AAER,IAAA,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE;AACrD,QAAA,OAAO,EAAE,UAAU,EAAE,CAAC;;;AAIxB,IAAA,IACE,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC;AACvB,QAAA,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;AACxB,QAAA,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EACxB;QACA,OAAO,EAAE,UAAU,EAAE;;;AAIvB,IAAA,MAAM,UAAU,GAAG,YAAY,CAAC,UAAU,IAAI;AAC5C,QAAA,KAAK,EAAE,EAAE;AACT,QAAA,MAAM,EAAE,EAAE;AACV,QAAA,MAAM,EAAE,EAAE;KACX;;IAGD,MAAM,QAAQ,GAAG,gDAAgD;AACjE,IAAA,IAAI,KAAK;AAET,IAAA,OAAO,CAAC,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,IAAI,EAAE;QAC7C,MAAM,GAAG,IAAI,EAAE,QAAQ,CAAC,GAAG,KAAK;AAChC,QAAA,MAAM,aAAa,GAAG,QAAQ,CAAC,QAAQ,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC;;QAGjD,MAAM,OAAO,GAAG,IAAkC;QAClD,MAAM,WAAW,GAAG,UAAU,CAAC,GAAG,OAAO,CAAA,CAAA,CAAG,CAE/B;;AAGb,QAAA,IACE,CAAC,WAAW;AACZ,YAAA,aAAa,GAAG,CAAC;AACjB,YAAA,aAAa,IAAI,WAAW,CAAC,MAAM,EACnC;AACA,YAAA,SAAS;;;AAIX,QAAA,MAAM,SAAS,GAAG,WAAW,CAAC,aAAa,CAAC;;QAG5C,MAAM,cAAc,GAAG,UAAU,CAAC,IAAI,CACpC,CAAC,GAAG,KAAK,GAAG,CAAC,IAAI,KAAK,OAAO,IAAI,GAAG,CAAC,aAAa,KAAK,aAAa,CACrE;QAED,IAAI,CAAC,cAAc,EAAE;YACnB,UAAU,CAAC,IAAI,CAAC;AACd,gBAAA,IAAI,EAAE,OAAO;gBACb,aAAa;gBACb,SAAS;AACV,aAAA,CAAC;;;IAIN,OAAO,EAAE,UAAU,EAAE;AACvB;AAEA;;;;;;;;;;AAUG;AACG,SAAU,gBAAgB,CAC9B,aAAiC,EACjC,YAAY,GAAG,GAAG,EAClB,iBAAiB,GAAG,GAAG,EAAA;;AAGvB,IAAA,MAAM,UAAU,GAAG,EAAE,GAAG,aAAa,EAAE;IACvC,IAAI,UAAU,CAAC,OAAO;QAAE,UAAU,CAAC,OAAO,GAAG,CAAC,GAAG,UAAU,CAAC,OAAO,CAAC;IACpE,IAAI,UAAU,CAAC,UAAU;QAAE,UAAU,CAAC,UAAU,GAAG,CAAC,GAAG,UAAU,CAAC,UAAU,CAAC;;AAG7E,IAAA,MAAM,kBAAkB,GAAG,CAAC,SAAS,EAAE,YAAY,CAAU;AAE7D,IAAA,KAAK,MAAM,UAAU,IAAI,kBAAkB,EAAE;AAC3C,QAAA,IAAI,CAAC,UAAU,CAAC,UAAsC,CAAC;YAAE;;AAGzD,QAAA,UAAU,CAAC,UAAU,CAAC,GAAG,UAAU,CAAC,UAAU,CAAC,EAAE,GAAG,CAAC,CAAC,MAAM,KAAI;AAC9D,YAAA,IACE,MAAM,CAAC,OAAO,IAAI,IAAI;gBACtB,MAAM,CAAC,OAAO,KAAK,EAAE;gBACrB,CAAC,MAAM,CAAC,UAAU;AAClB,gBAAA,MAAM,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,EAC9B;gBACA,OAAO,MAAM,CAAC;;;AAIhB,YAAA,MAAM,UAAU,GAAG,EAAE,GAAG,MAAM,EAAE;AAChC,YAAA,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO;YAC9B,MAAM,UAAU,GAAG,EAAE;;AAErB,YAAA,KAAK,MAAM,SAAS,IAAI,MAAM,CAAC,UAAU,EAAE;AACzC,gBAAA,MAAM,EAAE,UAAU,EAAE,GAAG,0BAA0B,CAC/C,SAAS,CAAC,IAAI,EACd,MAAM,CACP;gBAED,IAAI,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI,CAAC;AAC9C,gBAAA,IAAI,YAAY,GAAG,SAAS,CAAC,IAAI,CAAC,MAAM;AAExC,gBAAA,IAAI,QAAQ,KAAK,EAAE,EAAE;;oBAEnB,MAAM,iBAAiB,GAAG,SAAS,CAAC,IAAI,CAAC,IAAI,EAAE;AAC/C,oBAAA,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,iBAAiB,CAAC;AAE7C,oBAAA,IAAI,QAAQ,KAAK,EAAE,EAAE;wBACnB,UAAU,CAAC,IAAI,CAAC;4BACd,IAAI,EAAE,SAAS,CAAC,IAAI;4BACpB,KAAK,EAAE,SAAS,CAAC,KAAK;4BACtB,UAAU;AACX,yBAAA,CAAC;wBACF;;AAEF,oBAAA,YAAY,GAAG,iBAAiB,CAAC,MAAM;;;AAIzC,gBAAA,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,GAAG,YAAY,CAAC;AACtD,gBAAA,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CACtB,OAAO,CAAC,MAAM,EACd,QAAQ,GAAG,YAAY,GAAG,YAAY,CACvC;AAED,gBAAA,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,GAAG,iBAAiB,CAAC;AACjE,gBAAA,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAC3B,OAAO,CAAC,MAAM,EACd,OAAO,GAAG,iBAAiB,CAC5B;;gBAGD,MAAM,QAAQ,GAAG,OAAO,CAAC,SAAS,CAAC,cAAc,EAAE,SAAS,CAAC;gBAC7D,MAAM,QAAQ,GAAG,OAAO,CAAC,SAAS,CAAC,OAAO,EAAE,YAAY,CAAC;;gBAGzD,MAAM,gBAAgB,GAAG,gBAAgB,CAAC,QAAQ,EAAE,UAAU,CAAC;gBAC/D,MAAM,gBAAgB,GAAG,gBAAgB,CAAC,QAAQ,EAAE,SAAS,CAAC;;AAG9D,gBAAA,MAAM,UAAU,GAAG,cAAc,GAAG,gBAAgB;AACpD,gBAAA,MAAM,QAAQ,GAAG,OAAO,GAAG,gBAAgB;;gBAG3C,MAAM,qBAAqB,GAAG;AAC3B,qBAAA,SAAS,CAAC,UAAU,EAAE,QAAQ;AAC9B,qBAAA,IAAI,EAAE;gBACT,UAAU,CAAC,IAAI,CAAC;AACd,oBAAA,IAAI,EAAE,qBAAqB;oBAC3B,KAAK,EAAE,SAAS,CAAC,KAAK;oBACtB,UAAU;AACX,iBAAA,CAAC;;AAGJ,YAAA,UAAU,CAAC,UAAU,GAAG,UAAU;YAClC,OAAO,UAAU,CAAC,OAAO;YACzB,OAAO,UAAU,CAAC,UAAU;AAC5B,YAAA,OAAO,UAAU;AACnB,SAAC,CAAC;;AAGJ,IAAA,OAAO,UAAU;AACnB;;;;"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import axios from 'axios';
|
|
2
2
|
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
|
|
3
|
-
import { getAttribution } from './
|
|
3
|
+
import { getAttribution } from './utils.mjs';
|
|
4
4
|
|
|
5
5
|
/* eslint-disable no-console */
|
|
6
6
|
const chunker = {
|
|
@@ -77,23 +77,23 @@ const createSerperAPI = (apiKey) => {
|
|
|
77
77
|
const config = {
|
|
78
78
|
apiKey: apiKey ?? process.env.SERPER_API_KEY,
|
|
79
79
|
apiUrl: 'https://google.serper.dev/search',
|
|
80
|
-
defaultLocation: 'us',
|
|
81
80
|
timeout: 10000,
|
|
82
81
|
};
|
|
83
82
|
if (config.apiKey == null || config.apiKey === '') {
|
|
84
83
|
throw new Error('SERPER_API_KEY is required for SerperAPI');
|
|
85
84
|
}
|
|
86
|
-
const getSources = async (query, numResults = 8,
|
|
85
|
+
const getSources = async ({ query, country, numResults = 8, }) => {
|
|
87
86
|
if (!query.trim()) {
|
|
88
87
|
return { success: false, error: 'Query cannot be empty' };
|
|
89
88
|
}
|
|
90
89
|
try {
|
|
91
|
-
const searchLocation = (storedLocation ?? config.defaultLocation).toLowerCase();
|
|
92
90
|
const payload = {
|
|
93
91
|
q: query,
|
|
94
92
|
num: Math.min(Math.max(1, numResults), 10),
|
|
95
|
-
gl: searchLocation,
|
|
96
93
|
};
|
|
94
|
+
if (country != null && country !== '') {
|
|
95
|
+
payload['gl'] = country.toLowerCase();
|
|
96
|
+
}
|
|
97
97
|
const response = await axios.post(config.apiUrl, payload, {
|
|
98
98
|
headers: {
|
|
99
99
|
'X-API-KEY': config.apiKey,
|
|
@@ -105,10 +105,10 @@ const createSerperAPI = (apiKey) => {
|
|
|
105
105
|
const results = {
|
|
106
106
|
organic: data.organic,
|
|
107
107
|
images: data.images ?? [],
|
|
108
|
-
topStories: data.topStories ?? [],
|
|
109
|
-
knowledgeGraph: data.knowledgeGraph,
|
|
110
108
|
answerBox: data.answerBox,
|
|
109
|
+
topStories: data.topStories ?? [],
|
|
111
110
|
peopleAlsoAsk: data.peopleAlsoAsk,
|
|
111
|
+
knowledgeGraph: data.knowledgeGraph,
|
|
112
112
|
relatedSearches: data.relatedSearches,
|
|
113
113
|
};
|
|
114
114
|
return { success: true, data: results };
|
|
@@ -129,7 +129,7 @@ const createSearXNGAPI = (instanceUrl, apiKey) => {
|
|
|
129
129
|
if (config.instanceUrl == null || config.instanceUrl === '') {
|
|
130
130
|
throw new Error('SEARXNG_INSTANCE_URL is required for SearXNG API');
|
|
131
131
|
}
|
|
132
|
-
const getSources = async (query, numResults = 8,
|
|
132
|
+
const getSources = async ({ query, numResults = 8, }) => {
|
|
133
133
|
if (!query.trim()) {
|
|
134
134
|
return { success: false, error: 'Query cannot be empty' };
|
|
135
135
|
}
|
|
@@ -151,11 +151,7 @@ const createSearXNGAPI = (instanceUrl, apiKey) => {
|
|
|
151
151
|
language: 'all',
|
|
152
152
|
safesearch: 0,
|
|
153
153
|
engines: 'google,bing,duckduckgo',
|
|
154
|
-
max_results: Math.min(Math.max(1, numResults), 20),
|
|
155
154
|
};
|
|
156
|
-
if (storedLocation != null && storedLocation !== 'all') {
|
|
157
|
-
params.language = storedLocation;
|
|
158
|
-
}
|
|
159
155
|
const headers = {
|
|
160
156
|
'Content-Type': 'application/json',
|
|
161
157
|
};
|
|
@@ -237,9 +233,10 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
237
233
|
.then(([url, response]) => {
|
|
238
234
|
const attribution = getAttribution(url, response.data?.metadata);
|
|
239
235
|
if (response.success && response.data) {
|
|
240
|
-
const content = firecrawlScraper.extractContent(response);
|
|
236
|
+
const [content, references] = firecrawlScraper.extractContent(response);
|
|
241
237
|
return {
|
|
242
238
|
url,
|
|
239
|
+
references,
|
|
243
240
|
attribution,
|
|
244
241
|
content: chunker.cleanText(content),
|
|
245
242
|
};
|
|
@@ -302,10 +299,11 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
302
299
|
if (result.error === true) {
|
|
303
300
|
continue;
|
|
304
301
|
}
|
|
305
|
-
const { url, content, attribution, highlights } = result;
|
|
302
|
+
const { url, content, attribution, references, highlights } = result;
|
|
306
303
|
onContentScraped?.(url, {
|
|
307
304
|
content,
|
|
308
305
|
attribution,
|
|
306
|
+
references,
|
|
309
307
|
highlights,
|
|
310
308
|
});
|
|
311
309
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"search.mjs","sources":["../../../../src/tools/search/search.ts"],"sourcesContent":["/* eslint-disable no-console */\nimport axios from 'axios';\nimport { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';\nimport type * as t from './types';\nimport { getAttribution, FirecrawlScraper } from './firecrawl';\nimport { BaseReranker } from './rerankers';\n\nconst chunker = {\n cleanText: (text: string): string => {\n if (!text) return '';\n\n /** Normalized all line endings to '\\n' */\n const normalizedText = text.replace(/\\r\\n/g, '\\n').replace(/\\r/g, '\\n');\n\n /** Handle multiple backslashes followed by newlines\n * This replaces patterns like '\\\\\\\\\\\\n' with a single newline */\n const fixedBackslashes = normalizedText.replace(/\\\\+\\n/g, '\\n');\n\n /** Cleaned up consecutive newlines, tabs, and spaces around newlines */\n const cleanedNewlines = fixedBackslashes.replace(/[\\t ]*\\n[\\t \\n]*/g, '\\n');\n\n /** Cleaned up excessive spaces and tabs */\n const cleanedSpaces = cleanedNewlines.replace(/[ \\t]+/g, ' ');\n\n return cleanedSpaces.trim();\n },\n splitText: async (\n text: string,\n options?: {\n chunkSize?: number;\n chunkOverlap?: number;\n separators?: string[];\n }\n ): Promise<string[]> => {\n const chunkSize = options?.chunkSize ?? 150;\n const chunkOverlap = options?.chunkOverlap ?? 50;\n const separators = options?.separators || ['\\n\\n', '\\n'];\n\n const splitter = new RecursiveCharacterTextSplitter({\n separators,\n chunkSize,\n chunkOverlap,\n });\n\n return await splitter.splitText(text);\n },\n\n splitTexts: async (\n texts: string[],\n options?: {\n chunkSize?: number;\n chunkOverlap?: number;\n separators?: string[];\n }\n ): Promise<string[][]> => {\n // Split multiple texts\n const promises = texts.map((text) =>\n chunker.splitText(text, options).catch((error) => {\n console.error('Error splitting text:', error);\n return [text];\n })\n );\n return Promise.all(promises);\n },\n};\n\nconst createSourceUpdateCallback = (sourceMap: Map<string, t.ValidSource>) => {\n return (link: string, update?: Partial<t.ValidSource>): void => {\n const source = sourceMap.get(link);\n if (source) {\n sourceMap.set(link, {\n ...source,\n ...update,\n });\n }\n };\n};\n\nconst getHighlights = async ({\n query,\n content,\n reranker,\n topResults = 5,\n}: {\n content: string;\n query: string;\n reranker?: BaseReranker;\n topResults?: number;\n}): Promise<t.Highlight[] | undefined> => {\n if (!content) {\n console.warn('No content provided for highlights');\n return;\n }\n if (!reranker) {\n console.warn('No reranker provided for highlights');\n return;\n }\n\n try {\n const documents = await chunker.splitText(content);\n if (Array.isArray(documents)) {\n return await reranker.rerank(query, documents, topResults);\n } else {\n console.error(\n 'Expected documents to be an array, got:',\n typeof documents\n );\n return;\n }\n } catch (error) {\n console.error('Error in content processing:', error);\n return;\n }\n};\n\nconst createSerperAPI = (\n apiKey?: string\n): {\n getSources: (\n query: string,\n numResults?: number,\n storedLocation?: string\n ) => Promise<t.SearchResult>;\n} => {\n const config = {\n apiKey: apiKey ?? process.env.SERPER_API_KEY,\n apiUrl: 'https://google.serper.dev/search',\n defaultLocation: 'us',\n timeout: 10000,\n };\n\n if (config.apiKey == null || config.apiKey === '') {\n throw new Error('SERPER_API_KEY is required for SerperAPI');\n }\n\n const getSources = async (\n query: string,\n numResults: number = 8,\n storedLocation?: string\n ): Promise<t.SearchResult> => {\n if (!query.trim()) {\n return { success: false, error: 'Query cannot be empty' };\n }\n\n try {\n const searchLocation = (\n storedLocation ?? config.defaultLocation\n ).toLowerCase();\n\n const payload = {\n q: query,\n num: Math.min(Math.max(1, numResults), 10),\n gl: searchLocation,\n };\n\n const response = await axios.post(config.apiUrl, payload, {\n headers: {\n 'X-API-KEY': config.apiKey,\n 'Content-Type': 'application/json',\n },\n timeout: config.timeout,\n });\n\n const data = response.data;\n const results: t.SearchResultData = {\n organic: data.organic,\n images: data.images ?? [],\n topStories: data.topStories ?? [],\n knowledgeGraph: data.knowledgeGraph as t.KnowledgeGraphResult,\n answerBox: data.answerBox as t.AnswerBoxResult,\n peopleAlsoAsk: data.peopleAlsoAsk as t.PeopleAlsoAskResult[],\n relatedSearches: data.relatedSearches as string[],\n };\n\n return { success: true, data: results };\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return { success: false, error: `API request failed: ${errorMessage}` };\n }\n };\n\n return { getSources };\n};\n\nconst createSearXNGAPI = (\n instanceUrl?: string,\n apiKey?: string\n): {\n getSources: (\n query: string,\n numResults?: number,\n storedLocation?: string\n ) => Promise<t.SearchResult>;\n} => {\n const config = {\n instanceUrl: instanceUrl ?? process.env.SEARXNG_INSTANCE_URL,\n apiKey: apiKey ?? process.env.SEARXNG_API_KEY,\n defaultLocation: 'all',\n timeout: 10000,\n };\n\n if (config.instanceUrl == null || config.instanceUrl === '') {\n throw new Error('SEARXNG_INSTANCE_URL is required for SearXNG API');\n }\n\n const getSources = async (\n query: string,\n numResults: number = 8,\n storedLocation?: string\n ): Promise<t.SearchResult> => {\n if (!query.trim()) {\n return { success: false, error: 'Query cannot be empty' };\n }\n\n try {\n // Ensure the instance URL ends with /search\n if (config.instanceUrl == null || config.instanceUrl === '') {\n return { success: false, error: 'Instance URL is not defined' };\n }\n\n let searchUrl = config.instanceUrl;\n if (!searchUrl.endsWith('/search')) {\n searchUrl = searchUrl.replace(/\\/$/, '') + '/search';\n }\n\n // Prepare parameters for SearXNG\n const params: Record<string, string | number> = {\n q: query,\n format: 'json',\n pageno: 1,\n categories: 'general',\n language: 'all',\n safesearch: 0,\n engines: 'google,bing,duckduckgo',\n max_results: Math.min(Math.max(1, numResults), 20),\n };\n\n if (storedLocation != null && storedLocation !== 'all') {\n params.language = storedLocation;\n }\n\n const headers: Record<string, string> = {\n 'Content-Type': 'application/json',\n };\n\n if (config.apiKey != null && config.apiKey !== '') {\n headers['X-API-Key'] = config.apiKey;\n }\n\n const response = await axios.get(searchUrl, {\n headers,\n params,\n timeout: config.timeout,\n });\n\n const data = response.data;\n\n // Transform SearXNG results to match SerperAPI format\n const organicResults = (data.results ?? [])\n .slice(0, numResults)\n .map((result: t.SearXNGResult) => ({\n title: result.title ?? '',\n link: result.url ?? '',\n snippet: result.content ?? '',\n date: result.publishedDate ?? '',\n }));\n\n // Extract image results if available\n const imageResults = (data.results ?? [])\n .filter((result: t.SearXNGResult) => result.img_src)\n .slice(0, 6)\n .map((result: t.SearXNGResult) => ({\n title: result.title ?? '',\n imageUrl: result.img_src ?? '',\n }));\n\n // Format results to match SerperAPI structure\n const results: t.SearchResultData = {\n organic: organicResults,\n images: imageResults,\n topStories: [],\n // Use undefined instead of null for optional properties\n relatedSearches: data.suggestions ?? [],\n };\n\n return { success: true, data: results };\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return {\n success: false,\n error: `SearXNG API request failed: ${errorMessage}`,\n };\n }\n };\n\n return { getSources };\n};\n\nexport const createSearchAPI = (\n config: t.SearchConfig\n): {\n getSources: (\n query: string,\n numResults?: number,\n storedLocation?: string\n ) => Promise<t.SearchResult>;\n} => {\n const {\n searchProvider = 'serper',\n serperApiKey,\n searxngInstanceUrl,\n searxngApiKey,\n } = config;\n\n if (searchProvider.toLowerCase() === 'serper') {\n return createSerperAPI(serperApiKey);\n } else if (searchProvider.toLowerCase() === 'searxng') {\n return createSearXNGAPI(searxngInstanceUrl, searxngApiKey);\n } else {\n throw new Error(\n `Invalid search provider: ${searchProvider}. Must be 'serper' or 'searxng'`\n );\n }\n};\n\nexport const createSourceProcessor = (\n config: t.ProcessSourcesConfig = {},\n scraperInstance?: FirecrawlScraper\n): {\n processSources: (\n result: t.SearchResult,\n numElements: number,\n query: string,\n proMode?: boolean\n ) => Promise<t.SearchResultData>;\n topResults: number;\n} => {\n if (!scraperInstance) {\n throw new Error('Firecrawl scraper instance is required');\n }\n const {\n topResults = 5,\n // strategies = ['no_extraction'],\n // filterContent = true,\n reranker,\n } = config;\n\n const firecrawlScraper = scraperInstance;\n\n const webScraper = {\n scrapeMany: async ({\n query,\n links,\n }: {\n query: string;\n links: string[];\n }): Promise<Array<t.ScrapeResult>> => {\n console.log(`Scraping ${links.length} links with Firecrawl`);\n const promises: Array<Promise<t.ScrapeResult>> = [];\n try {\n for (const currentLink of links) {\n const promise: Promise<t.ScrapeResult> = firecrawlScraper\n .scrapeUrl(currentLink, {})\n .then(([url, response]) => {\n const attribution = getAttribution(url, response.data?.metadata);\n if (response.success && response.data) {\n const content = firecrawlScraper.extractContent(response);\n return {\n url,\n attribution,\n content: chunker.cleanText(content),\n };\n }\n\n return {\n url,\n attribution,\n error: true,\n content: `Failed to scrape ${url}: ${response.error ?? 'Unknown error'}`,\n };\n })\n .then(async (result) => {\n try {\n if (result.error != null) {\n console.error(\n `Error scraping ${result.url}: ${result.content}`\n );\n return {\n ...result,\n };\n }\n const highlights = await getHighlights({\n query,\n reranker,\n content: result.content,\n });\n return {\n ...result,\n highlights,\n };\n } catch (error) {\n console.error('Error processing scraped content:', error);\n return {\n ...result,\n };\n }\n })\n .catch((error) => {\n console.error(`Error scraping ${currentLink}:`, error);\n return {\n url: currentLink,\n error: true,\n content: `Failed to scrape ${currentLink}: ${error.message ?? 'Unknown error'}`,\n };\n });\n promises.push(promise);\n }\n return await Promise.all(promises);\n } catch (error) {\n console.error('Error in scrapeMany:', error);\n return [];\n }\n },\n };\n\n const fetchContents = async ({\n links,\n query,\n target,\n onContentScraped,\n }: {\n links: string[];\n query: string;\n target: number;\n onContentScraped?: (link: string, update?: Partial<t.ValidSource>) => void;\n }): Promise<void> => {\n const initialLinks = links.slice(0, target);\n // const remainingLinks = links.slice(target).reverse();\n const results = await webScraper.scrapeMany({ query, links: initialLinks });\n for (const result of results) {\n if (result.error === true) {\n continue;\n }\n const { url, content, attribution, highlights } = result;\n onContentScraped?.(url, {\n content,\n attribution,\n highlights,\n });\n }\n };\n\n const processSources = async (\n result: t.SearchResult,\n numElements: number,\n query: string,\n proMode: boolean = false\n ): Promise<t.SearchResultData> => {\n try {\n if (!result.data) {\n return {\n organic: [],\n topStories: [],\n images: [],\n relatedSearches: [],\n };\n } else if (!result.data.organic) {\n return result.data;\n }\n\n if (!proMode) {\n const wikiSources = result.data.organic.filter((source) =>\n source.link.includes('wikipedia.org')\n );\n\n if (!wikiSources.length) {\n return result.data;\n }\n\n const wikiSourceMap = new Map<string, t.ValidSource>();\n wikiSourceMap.set(wikiSources[0].link, wikiSources[0]);\n const onContentScraped = createSourceUpdateCallback(wikiSourceMap);\n await fetchContents({\n query,\n target: 1,\n onContentScraped,\n links: [wikiSources[0].link],\n });\n\n for (let i = 0; i < result.data.organic.length; i++) {\n const source = result.data.organic[i];\n const updatedSource = wikiSourceMap.get(source.link);\n if (updatedSource) {\n result.data.organic[i] = {\n ...source,\n ...updatedSource,\n };\n }\n }\n\n return result.data;\n }\n\n const sourceMap = new Map<string, t.ValidSource>();\n const allLinks: string[] = [];\n\n for (const source of result.data.organic) {\n if (source.link) {\n allLinks.push(source.link);\n sourceMap.set(source.link, source);\n }\n }\n\n if (allLinks.length === 0) {\n return result.data;\n }\n\n const onContentScraped = createSourceUpdateCallback(sourceMap);\n await fetchContents({\n links: allLinks,\n query,\n onContentScraped,\n target: numElements,\n });\n\n for (let i = 0; i < result.data.organic.length; i++) {\n const source = result.data.organic[i];\n const updatedSource = sourceMap.get(source.link);\n if (updatedSource) {\n result.data.organic[i] = {\n ...source,\n ...updatedSource,\n };\n }\n }\n\n const successfulSources = result.data.organic\n .filter(\n (source) =>\n source.content != null && !source.content.startsWith('Failed')\n )\n .slice(0, numElements);\n\n if (successfulSources.length > 0) {\n result.data.organic = successfulSources;\n }\n return result.data;\n } catch (error) {\n console.error('Error in processSources:', error);\n return {\n organic: [],\n topStories: [],\n images: [],\n relatedSearches: [],\n ...result.data,\n error: error instanceof Error ? error.message : String(error),\n };\n }\n };\n\n return {\n processSources,\n topResults,\n };\n};\n"],"names":[],"mappings":";;;;AAAA;AAOA,MAAM,OAAO,GAAG;AACd,IAAA,SAAS,EAAE,CAAC,IAAY,KAAY;AAClC,QAAA,IAAI,CAAC,IAAI;AAAE,YAAA,OAAO,EAAE;;AAGpB,QAAA,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC;AAEvE;AACiE;QACjE,MAAM,gBAAgB,GAAG,cAAc,CAAC,OAAO,CAAC,QAAQ,EAAE,IAAI,CAAC;;QAG/D,MAAM,eAAe,GAAG,gBAAgB,CAAC,OAAO,CAAC,mBAAmB,EAAE,IAAI,CAAC;;QAG3E,MAAM,aAAa,GAAG,eAAe,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;AAE7D,QAAA,OAAO,aAAa,CAAC,IAAI,EAAE;KAC5B;AACD,IAAA,SAAS,EAAE,OACT,IAAY,EACZ,OAIC,KACoB;AACrB,QAAA,MAAM,SAAS,GAAG,OAAO,EAAE,SAAS,IAAI,GAAG;AAC3C,QAAA,MAAM,YAAY,GAAG,OAAO,EAAE,YAAY,IAAI,EAAE;QAChD,MAAM,UAAU,GAAG,OAAO,EAAE,UAAU,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC;AAExD,QAAA,MAAM,QAAQ,GAAG,IAAI,8BAA8B,CAAC;YAClD,UAAU;YACV,SAAS;YACT,YAAY;AACb,SAAA,CAAC;AAEF,QAAA,OAAO,MAAM,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC;KACtC;AAED,IAAA,UAAU,EAAE,OACV,KAAe,EACf,OAIC,KACsB;;QAEvB,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,KAC9B,OAAO,CAAC,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,KAAK,CAAC,CAAC,KAAK,KAAI;AAC/C,YAAA,OAAO,CAAC,KAAK,CAAC,uBAAuB,EAAE,KAAK,CAAC;YAC7C,OAAO,CAAC,IAAI,CAAC;SACd,CAAC,CACH;AACD,QAAA,OAAO,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC;KAC7B;CACF;AAED,MAAM,0BAA0B,GAAG,CAAC,SAAqC,KAAI;AAC3E,IAAA,OAAO,CAAC,IAAY,EAAE,MAA+B,KAAU;QAC7D,MAAM,MAAM,GAAG,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC;QAClC,IAAI,MAAM,EAAE;AACV,YAAA,SAAS,CAAC,GAAG,CAAC,IAAI,EAAE;AAClB,gBAAA,GAAG,MAAM;AACT,gBAAA,GAAG,MAAM;AACV,aAAA,CAAC;;AAEN,KAAC;AACH,CAAC;AAED,MAAM,aAAa,GAAG,OAAO,EAC3B,KAAK,EACL,OAAO,EACP,QAAQ,EACR,UAAU,GAAG,CAAC,GAMf,KAAwC;IACvC,IAAI,CAAC,OAAO,EAAE;AACZ,QAAA,OAAO,CAAC,IAAI,CAAC,oCAAoC,CAAC;QAClD;;IAEF,IAAI,CAAC,QAAQ,EAAE;AACb,QAAA,OAAO,CAAC,IAAI,CAAC,qCAAqC,CAAC;QACnD;;AAGF,IAAA,IAAI;QACF,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC;AAClD,QAAA,IAAI,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE;YAC5B,OAAO,MAAM,QAAQ,CAAC,MAAM,CAAC,KAAK,EAAE,SAAS,EAAE,UAAU,CAAC;;aACrD;YACL,OAAO,CAAC,KAAK,CACX,yCAAyC,EACzC,OAAO,SAAS,CACjB;YACD;;;IAEF,OAAO,KAAK,EAAE;AACd,QAAA,OAAO,CAAC,KAAK,CAAC,8BAA8B,EAAE,KAAK,CAAC;QACpD;;AAEJ,CAAC;AAED,MAAM,eAAe,GAAG,CACtB,MAAe,KAOb;AACF,IAAA,MAAM,MAAM,GAAG;AACb,QAAA,MAAM,EAAE,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc;AAC5C,QAAA,MAAM,EAAE,kCAAkC;AAC1C,QAAA,eAAe,EAAE,IAAI;AACrB,QAAA,OAAO,EAAE,KAAK;KACf;AAED,IAAA,IAAI,MAAM,CAAC,MAAM,IAAI,IAAI,IAAI,MAAM,CAAC,MAAM,KAAK,EAAE,EAAE;AACjD,QAAA,MAAM,IAAI,KAAK,CAAC,0CAA0C,CAAC;;IAG7D,MAAM,UAAU,GAAG,OACjB,KAAa,EACb,UAAA,GAAqB,CAAC,EACtB,cAAuB,KACI;AAC3B,QAAA,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,EAAE;YACjB,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,uBAAuB,EAAE;;AAG3D,QAAA,IAAI;AACF,YAAA,MAAM,cAAc,GAAG,CACrB,cAAc,IAAI,MAAM,CAAC,eAAe,EACxC,WAAW,EAAE;AAEf,YAAA,MAAM,OAAO,GAAG;AACd,gBAAA,CAAC,EAAE,KAAK;AACR,gBAAA,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,CAAC,EAAE,EAAE,CAAC;AAC1C,gBAAA,EAAE,EAAE,cAAc;aACnB;AAED,YAAA,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,EAAE;AACxD,gBAAA,OAAO,EAAE;oBACP,WAAW,EAAE,MAAM,CAAC,MAAM;AAC1B,oBAAA,cAAc,EAAE,kBAAkB;AACnC,iBAAA;gBACD,OAAO,EAAE,MAAM,CAAC,OAAO;AACxB,aAAA,CAAC;AAEF,YAAA,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI;AAC1B,YAAA,MAAM,OAAO,GAAuB;gBAClC,OAAO,EAAE,IAAI,CAAC,OAAO;AACrB,gBAAA,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,EAAE;AACzB,gBAAA,UAAU,EAAE,IAAI,CAAC,UAAU,IAAI,EAAE;gBACjC,cAAc,EAAE,IAAI,CAAC,cAAwC;gBAC7D,SAAS,EAAE,IAAI,CAAC,SAA8B;gBAC9C,aAAa,EAAE,IAAI,CAAC,aAAwC;gBAC5D,eAAe,EAAE,IAAI,CAAC,eAA2B;aAClD;YAED,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE;;QACvC,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACxD,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,CAAuB,oBAAA,EAAA,YAAY,CAAE,CAAA,EAAE;;AAE3E,KAAC;IAED,OAAO,EAAE,UAAU,EAAE;AACvB,CAAC;AAED,MAAM,gBAAgB,GAAG,CACvB,WAAoB,EACpB,MAAe,KAOb;AACF,IAAA,MAAM,MAAM,GAAG;AACb,QAAA,WAAW,EAAE,WAAW,IAAI,OAAO,CAAC,GAAG,CAAC,oBAAoB;AAC5D,QAAA,MAAM,EAAE,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,eAAe;AAC7C,QACA,OAAO,EAAE,KAAK;KACf;AAED,IAAA,IAAI,MAAM,CAAC,WAAW,IAAI,IAAI,IAAI,MAAM,CAAC,WAAW,KAAK,EAAE,EAAE;AAC3D,QAAA,MAAM,IAAI,KAAK,CAAC,kDAAkD,CAAC;;IAGrE,MAAM,UAAU,GAAG,OACjB,KAAa,EACb,UAAA,GAAqB,CAAC,EACtB,cAAuB,KACI;AAC3B,QAAA,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,EAAE;YACjB,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,uBAAuB,EAAE;;AAG3D,QAAA,IAAI;;AAEF,YAAA,IAAI,MAAM,CAAC,WAAW,IAAI,IAAI,IAAI,MAAM,CAAC,WAAW,KAAK,EAAE,EAAE;gBAC3D,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,6BAA6B,EAAE;;AAGjE,YAAA,IAAI,SAAS,GAAG,MAAM,CAAC,WAAW;YAClC,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE;gBAClC,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,GAAG,SAAS;;;AAItD,YAAA,MAAM,MAAM,GAAoC;AAC9C,gBAAA,CAAC,EAAE,KAAK;AACR,gBAAA,MAAM,EAAE,MAAM;AACd,gBAAA,MAAM,EAAE,CAAC;AACT,gBAAA,UAAU,EAAE,SAAS;AACrB,gBAAA,QAAQ,EAAE,KAAK;AACf,gBAAA,UAAU,EAAE,CAAC;AACb,gBAAA,OAAO,EAAE,wBAAwB;AACjC,gBAAA,WAAW,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,CAAC,EAAE,EAAE,CAAC;aACnD;YAED,IAAI,cAAc,IAAI,IAAI,IAAI,cAAc,KAAK,KAAK,EAAE;AACtD,gBAAA,MAAM,CAAC,QAAQ,GAAG,cAAc;;AAGlC,YAAA,MAAM,OAAO,GAA2B;AACtC,gBAAA,cAAc,EAAE,kBAAkB;aACnC;AAED,YAAA,IAAI,MAAM,CAAC,MAAM,IAAI,IAAI,IAAI,MAAM,CAAC,MAAM,KAAK,EAAE,EAAE;AACjD,gBAAA,OAAO,CAAC,WAAW,CAAC,GAAG,MAAM,CAAC,MAAM;;YAGtC,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,SAAS,EAAE;gBAC1C,OAAO;gBACP,MAAM;gBACN,OAAO,EAAE,MAAM,CAAC,OAAO;AACxB,aAAA,CAAC;AAEF,YAAA,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI;;YAG1B,MAAM,cAAc,GAAG,CAAC,IAAI,CAAC,OAAO,IAAI,EAAE;AACvC,iBAAA,KAAK,CAAC,CAAC,EAAE,UAAU;AACnB,iBAAA,GAAG,CAAC,CAAC,MAAuB,MAAM;AACjC,gBAAA,KAAK,EAAE,MAAM,CAAC,KAAK,IAAI,EAAE;AACzB,gBAAA,IAAI,EAAE,MAAM,CAAC,GAAG,IAAI,EAAE;AACtB,gBAAA,OAAO,EAAE,MAAM,CAAC,OAAO,IAAI,EAAE;AAC7B,gBAAA,IAAI,EAAE,MAAM,CAAC,aAAa,IAAI,EAAE;AACjC,aAAA,CAAC,CAAC;;YAGL,MAAM,YAAY,GAAG,CAAC,IAAI,CAAC,OAAO,IAAI,EAAE;iBACrC,MAAM,CAAC,CAAC,MAAuB,KAAK,MAAM,CAAC,OAAO;AAClD,iBAAA,KAAK,CAAC,CAAC,EAAE,CAAC;AACV,iBAAA,GAAG,CAAC,CAAC,MAAuB,MAAM;AACjC,gBAAA,KAAK,EAAE,MAAM,CAAC,KAAK,IAAI,EAAE;AACzB,gBAAA,QAAQ,EAAE,MAAM,CAAC,OAAO,IAAI,EAAE;AAC/B,aAAA,CAAC,CAAC;;AAGL,YAAA,MAAM,OAAO,GAAuB;AAClC,gBAAA,OAAO,EAAE,cAAc;AACvB,gBAAA,MAAM,EAAE,YAAY;AACpB,gBAAA,UAAU,EAAE,EAAE;;AAEd,gBAAA,eAAe,EAAE,IAAI,CAAC,WAAW,IAAI,EAAE;aACxC;YAED,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE;;QACvC,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACxD,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;gBACd,KAAK,EAAE,CAA+B,4BAAA,EAAA,YAAY,CAAE,CAAA;aACrD;;AAEL,KAAC;IAED,OAAO,EAAE,UAAU,EAAE;AACvB,CAAC;AAEY,MAAA,eAAe,GAAG,CAC7B,MAAsB,KAOpB;AACF,IAAA,MAAM,EACJ,cAAc,GAAG,QAAQ,EACzB,YAAY,EACZ,kBAAkB,EAClB,aAAa,GACd,GAAG,MAAM;AAEV,IAAA,IAAI,cAAc,CAAC,WAAW,EAAE,KAAK,QAAQ,EAAE;AAC7C,QAAA,OAAO,eAAe,CAAC,YAAY,CAAC;;AAC/B,SAAA,IAAI,cAAc,CAAC,WAAW,EAAE,KAAK,SAAS,EAAE;AACrD,QAAA,OAAO,gBAAgB,CAAC,kBAAkB,EAAE,aAAa,CAAC;;SACrD;AACL,QAAA,MAAM,IAAI,KAAK,CACb,4BAA4B,cAAc,CAAA,+BAAA,CAAiC,CAC5E;;AAEL;AAEa,MAAA,qBAAqB,GAAG,CACnC,SAAiC,EAAE,EACnC,eAAkC,KAShC;IACF,IAAI,CAAC,eAAe,EAAE;AACpB,QAAA,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC;;IAE3D,MAAM,EACJ,UAAU,GAAG,CAAC;;;IAGd,QAAQ,GACT,GAAG,MAAM;IAEV,MAAM,gBAAgB,GAAG,eAAe;AAExC,IAAA,MAAM,UAAU,GAAG;QACjB,UAAU,EAAE,OAAO,EACjB,KAAK,EACL,KAAK,GAIN,KAAoC;YACnC,OAAO,CAAC,GAAG,CAAC,CAAA,SAAA,EAAY,KAAK,CAAC,MAAM,CAAuB,qBAAA,CAAA,CAAC;YAC5D,MAAM,QAAQ,GAAmC,EAAE;AACnD,YAAA,IAAI;AACF,gBAAA,KAAK,MAAM,WAAW,IAAI,KAAK,EAAE;oBAC/B,MAAM,OAAO,GAA4B;AACtC,yBAAA,SAAS,CAAC,WAAW,EAAE,EAAE;yBACzB,IAAI,CAAC,CAAC,CAAC,GAAG,EAAE,QAAQ,CAAC,KAAI;AACxB,wBAAA,MAAM,WAAW,GAAG,cAAc,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC;wBAChE,IAAI,QAAQ,CAAC,OAAO,IAAI,QAAQ,CAAC,IAAI,EAAE;4BACrC,MAAM,OAAO,GAAG,gBAAgB,CAAC,cAAc,CAAC,QAAQ,CAAC;4BACzD,OAAO;gCACL,GAAG;gCACH,WAAW;AACX,gCAAA,OAAO,EAAE,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC;6BACpC;;wBAGH,OAAO;4BACL,GAAG;4BACH,WAAW;AACX,4BAAA,KAAK,EAAE,IAAI;4BACX,OAAO,EAAE,oBAAoB,GAAG,CAAA,EAAA,EAAK,QAAQ,CAAC,KAAK,IAAI,eAAe,CAAE,CAAA;yBACzE;AACH,qBAAC;AACA,yBAAA,IAAI,CAAC,OAAO,MAAM,KAAI;AACrB,wBAAA,IAAI;AACF,4BAAA,IAAI,MAAM,CAAC,KAAK,IAAI,IAAI,EAAE;AACxB,gCAAA,OAAO,CAAC,KAAK,CACX,CAAA,eAAA,EAAkB,MAAM,CAAC,GAAG,CAAA,EAAA,EAAK,MAAM,CAAC,OAAO,CAAA,CAAE,CAClD;gCACD,OAAO;AACL,oCAAA,GAAG,MAAM;iCACV;;AAEH,4BAAA,MAAM,UAAU,GAAG,MAAM,aAAa,CAAC;gCACrC,KAAK;gCACL,QAAQ;gCACR,OAAO,EAAE,MAAM,CAAC,OAAO;AACxB,6BAAA,CAAC;4BACF,OAAO;AACL,gCAAA,GAAG,MAAM;gCACT,UAAU;6BACX;;wBACD,OAAO,KAAK,EAAE;AACd,4BAAA,OAAO,CAAC,KAAK,CAAC,mCAAmC,EAAE,KAAK,CAAC;4BACzD,OAAO;AACL,gCAAA,GAAG,MAAM;6BACV;;AAEL,qBAAC;AACA,yBAAA,KAAK,CAAC,CAAC,KAAK,KAAI;wBACf,OAAO,CAAC,KAAK,CAAC,CAAA,eAAA,EAAkB,WAAW,CAAG,CAAA,CAAA,EAAE,KAAK,CAAC;wBACtD,OAAO;AACL,4BAAA,GAAG,EAAE,WAAW;AAChB,4BAAA,KAAK,EAAE,IAAI;4BACX,OAAO,EAAE,oBAAoB,WAAW,CAAA,EAAA,EAAK,KAAK,CAAC,OAAO,IAAI,eAAe,CAAE,CAAA;yBAChF;AACH,qBAAC,CAAC;AACJ,oBAAA,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC;;AAExB,gBAAA,OAAO,MAAM,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC;;YAClC,OAAO,KAAK,EAAE;AACd,gBAAA,OAAO,CAAC,KAAK,CAAC,sBAAsB,EAAE,KAAK,CAAC;AAC5C,gBAAA,OAAO,EAAE;;SAEZ;KACF;AAED,IAAA,MAAM,aAAa,GAAG,OAAO,EAC3B,KAAK,EACL,KAAK,EACL,MAAM,EACN,gBAAgB,GAMjB,KAAmB;QAClB,MAAM,YAAY,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC;;AAE3C,QAAA,MAAM,OAAO,GAAG,MAAM,UAAU,CAAC,UAAU,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC;AAC3E,QAAA,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE;AAC5B,YAAA,IAAI,MAAM,CAAC,KAAK,KAAK,IAAI,EAAE;gBACzB;;YAEF,MAAM,EAAE,GAAG,EAAE,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,GAAG,MAAM;YACxD,gBAAgB,GAAG,GAAG,EAAE;gBACtB,OAAO;gBACP,WAAW;gBACX,UAAU;AACX,aAAA,CAAC;;AAEN,KAAC;AAED,IAAA,MAAM,cAAc,GAAG,OACrB,MAAsB,EACtB,WAAmB,EACnB,KAAa,EACb,OAAmB,GAAA,KAAK,KACO;AAC/B,QAAA,IAAI;AACF,YAAA,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE;gBAChB,OAAO;AACL,oBAAA,OAAO,EAAE,EAAE;AACX,oBAAA,UAAU,EAAE,EAAE;AACd,oBAAA,MAAM,EAAE,EAAE;AACV,oBAAA,eAAe,EAAE,EAAE;iBACpB;;AACI,iBAAA,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE;gBAC/B,OAAO,MAAM,CAAC,IAAI;;YAGpB,IAAI,CAAC,OAAO,EAAE;gBACZ,MAAM,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,KACpD,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,eAAe,CAAC,CACtC;AAED,gBAAA,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE;oBACvB,OAAO,MAAM,CAAC,IAAI;;AAGpB,gBAAA,MAAM,aAAa,GAAG,IAAI,GAAG,EAAyB;AACtD,gBAAA,aAAa,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC;AACtD,gBAAA,MAAM,gBAAgB,GAAG,0BAA0B,CAAC,aAAa,CAAC;AAClE,gBAAA,MAAM,aAAa,CAAC;oBAClB,KAAK;AACL,oBAAA,MAAM,EAAE,CAAC;oBACT,gBAAgB;oBAChB,KAAK,EAAE,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AAC7B,iBAAA,CAAC;AAEF,gBAAA,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;oBACnD,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;oBACrC,MAAM,aAAa,GAAG,aAAa,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC;oBACpD,IAAI,aAAa,EAAE;AACjB,wBAAA,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;AACvB,4BAAA,GAAG,MAAM;AACT,4BAAA,GAAG,aAAa;yBACjB;;;gBAIL,OAAO,MAAM,CAAC,IAAI;;AAGpB,YAAA,MAAM,SAAS,GAAG,IAAI,GAAG,EAAyB;YAClD,MAAM,QAAQ,GAAa,EAAE;YAE7B,KAAK,MAAM,MAAM,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE;AACxC,gBAAA,IAAI,MAAM,CAAC,IAAI,EAAE;AACf,oBAAA,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC;oBAC1B,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC;;;AAItC,YAAA,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE;gBACzB,OAAO,MAAM,CAAC,IAAI;;AAGpB,YAAA,MAAM,gBAAgB,GAAG,0BAA0B,CAAC,SAAS,CAAC;AAC9D,YAAA,MAAM,aAAa,CAAC;AAClB,gBAAA,KAAK,EAAE,QAAQ;gBACf,KAAK;gBACL,gBAAgB;AAChB,gBAAA,MAAM,EAAE,WAAW;AACpB,aAAA,CAAC;AAEF,YAAA,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACnD,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;gBACrC,MAAM,aAAa,GAAG,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC;gBAChD,IAAI,aAAa,EAAE;AACjB,oBAAA,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;AACvB,wBAAA,GAAG,MAAM;AACT,wBAAA,GAAG,aAAa;qBACjB;;;AAIL,YAAA,MAAM,iBAAiB,GAAG,MAAM,CAAC,IAAI,CAAC;iBACnC,MAAM,CACL,CAAC,MAAM,KACL,MAAM,CAAC,OAAO,IAAI,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,QAAQ,CAAC;AAEjE,iBAAA,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC;AAExB,YAAA,IAAI,iBAAiB,CAAC,MAAM,GAAG,CAAC,EAAE;AAChC,gBAAA,MAAM,CAAC,IAAI,CAAC,OAAO,GAAG,iBAAiB;;YAEzC,OAAO,MAAM,CAAC,IAAI;;QAClB,OAAO,KAAK,EAAE;AACd,YAAA,OAAO,CAAC,KAAK,CAAC,0BAA0B,EAAE,KAAK,CAAC;YAChD,OAAO;AACL,gBAAA,OAAO,EAAE,EAAE;AACX,gBAAA,UAAU,EAAE,EAAE;AACd,gBAAA,MAAM,EAAE,EAAE;AACV,gBAAA,eAAe,EAAE,EAAE;gBACnB,GAAG,MAAM,CAAC,IAAI;AACd,gBAAA,KAAK,EAAE,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;aAC9D;;AAEL,KAAC;IAED,OAAO;QACL,cAAc;QACd,UAAU;KACX;AACH;;;;"}
|
|
1
|
+
{"version":3,"file":"search.mjs","sources":["../../../../src/tools/search/search.ts"],"sourcesContent":["/* eslint-disable no-console */\nimport axios from 'axios';\nimport { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';\nimport type * as t from './types';\nimport { FirecrawlScraper } from './firecrawl';\nimport { BaseReranker } from './rerankers';\nimport { getAttribution } from './utils';\n\nconst chunker = {\n cleanText: (text: string): string => {\n if (!text) return '';\n\n /** Normalized all line endings to '\\n' */\n const normalizedText = text.replace(/\\r\\n/g, '\\n').replace(/\\r/g, '\\n');\n\n /** Handle multiple backslashes followed by newlines\n * This replaces patterns like '\\\\\\\\\\\\n' with a single newline */\n const fixedBackslashes = normalizedText.replace(/\\\\+\\n/g, '\\n');\n\n /** Cleaned up consecutive newlines, tabs, and spaces around newlines */\n const cleanedNewlines = fixedBackslashes.replace(/[\\t ]*\\n[\\t \\n]*/g, '\\n');\n\n /** Cleaned up excessive spaces and tabs */\n const cleanedSpaces = cleanedNewlines.replace(/[ \\t]+/g, ' ');\n\n return cleanedSpaces.trim();\n },\n splitText: async (\n text: string,\n options?: {\n chunkSize?: number;\n chunkOverlap?: number;\n separators?: string[];\n }\n ): Promise<string[]> => {\n const chunkSize = options?.chunkSize ?? 150;\n const chunkOverlap = options?.chunkOverlap ?? 50;\n const separators = options?.separators || ['\\n\\n', '\\n'];\n\n const splitter = new RecursiveCharacterTextSplitter({\n separators,\n chunkSize,\n chunkOverlap,\n });\n\n return await splitter.splitText(text);\n },\n\n splitTexts: async (\n texts: string[],\n options?: {\n chunkSize?: number;\n chunkOverlap?: number;\n separators?: string[];\n }\n ): Promise<string[][]> => {\n // Split multiple texts\n const promises = texts.map((text) =>\n chunker.splitText(text, options).catch((error) => {\n console.error('Error splitting text:', error);\n return [text];\n })\n );\n return Promise.all(promises);\n },\n};\n\nconst createSourceUpdateCallback = (sourceMap: Map<string, t.ValidSource>) => {\n return (link: string, update?: Partial<t.ValidSource>): void => {\n const source = sourceMap.get(link);\n if (source) {\n sourceMap.set(link, {\n ...source,\n ...update,\n });\n }\n };\n};\n\nconst getHighlights = async ({\n query,\n content,\n reranker,\n topResults = 5,\n}: {\n content: string;\n query: string;\n reranker?: BaseReranker;\n topResults?: number;\n}): Promise<t.Highlight[] | undefined> => {\n if (!content) {\n console.warn('No content provided for highlights');\n return;\n }\n if (!reranker) {\n console.warn('No reranker provided for highlights');\n return;\n }\n\n try {\n const documents = await chunker.splitText(content);\n if (Array.isArray(documents)) {\n return await reranker.rerank(query, documents, topResults);\n } else {\n console.error(\n 'Expected documents to be an array, got:',\n typeof documents\n );\n return;\n }\n } catch (error) {\n console.error('Error in content processing:', error);\n return;\n }\n};\n\nconst createSerperAPI = (\n apiKey?: string\n): {\n getSources: (params: t.GetSourcesParams) => Promise<t.SearchResult>;\n} => {\n const config = {\n apiKey: apiKey ?? process.env.SERPER_API_KEY,\n apiUrl: 'https://google.serper.dev/search',\n timeout: 10000,\n };\n\n if (config.apiKey == null || config.apiKey === '') {\n throw new Error('SERPER_API_KEY is required for SerperAPI');\n }\n\n const getSources = async ({\n query,\n country,\n numResults = 8,\n }: t.GetSourcesParams): Promise<t.SearchResult> => {\n if (!query.trim()) {\n return { success: false, error: 'Query cannot be empty' };\n }\n\n try {\n const payload: t.SerperSearchPayload = {\n q: query,\n num: Math.min(Math.max(1, numResults), 10),\n };\n\n if (country != null && country !== '') {\n payload['gl'] = country.toLowerCase();\n }\n\n const response = await axios.post<t.SerperResultData>(\n config.apiUrl,\n payload,\n {\n headers: {\n 'X-API-KEY': config.apiKey,\n 'Content-Type': 'application/json',\n },\n timeout: config.timeout,\n }\n );\n\n const data = response.data;\n const results: t.SearchResultData = {\n organic: data.organic,\n images: data.images ?? [],\n answerBox: data.answerBox,\n topStories: data.topStories ?? [],\n peopleAlsoAsk: data.peopleAlsoAsk,\n knowledgeGraph: data.knowledgeGraph,\n relatedSearches: data.relatedSearches,\n };\n\n return { success: true, data: results };\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return { success: false, error: `API request failed: ${errorMessage}` };\n }\n };\n\n return { getSources };\n};\n\nconst createSearXNGAPI = (\n instanceUrl?: string,\n apiKey?: string\n): {\n getSources: (params: t.GetSourcesParams) => Promise<t.SearchResult>;\n} => {\n const config = {\n instanceUrl: instanceUrl ?? process.env.SEARXNG_INSTANCE_URL,\n apiKey: apiKey ?? process.env.SEARXNG_API_KEY,\n defaultLocation: 'all',\n timeout: 10000,\n };\n\n if (config.instanceUrl == null || config.instanceUrl === '') {\n throw new Error('SEARXNG_INSTANCE_URL is required for SearXNG API');\n }\n\n const getSources = async ({\n query,\n numResults = 8,\n }: t.GetSourcesParams): Promise<t.SearchResult> => {\n if (!query.trim()) {\n return { success: false, error: 'Query cannot be empty' };\n }\n\n try {\n // Ensure the instance URL ends with /search\n if (config.instanceUrl == null || config.instanceUrl === '') {\n return { success: false, error: 'Instance URL is not defined' };\n }\n\n let searchUrl = config.instanceUrl;\n if (!searchUrl.endsWith('/search')) {\n searchUrl = searchUrl.replace(/\\/$/, '') + '/search';\n }\n\n // Prepare parameters for SearXNG\n const params: t.SearxNGSearchPayload = {\n q: query,\n format: 'json',\n pageno: 1,\n categories: 'general',\n language: 'all',\n safesearch: 0,\n engines: 'google,bing,duckduckgo',\n };\n\n const headers: Record<string, string> = {\n 'Content-Type': 'application/json',\n };\n\n if (config.apiKey != null && config.apiKey !== '') {\n headers['X-API-Key'] = config.apiKey;\n }\n\n const response = await axios.get(searchUrl, {\n headers,\n params,\n timeout: config.timeout,\n });\n\n const data = response.data;\n\n // Transform SearXNG results to match SerperAPI format\n const organicResults = (data.results ?? [])\n .slice(0, numResults)\n .map((result: t.SearXNGResult) => ({\n title: result.title ?? '',\n link: result.url ?? '',\n snippet: result.content ?? '',\n date: result.publishedDate ?? '',\n }));\n\n // Extract image results if available\n const imageResults = (data.results ?? [])\n .filter((result: t.SearXNGResult) => result.img_src)\n .slice(0, 6)\n .map((result: t.SearXNGResult) => ({\n title: result.title ?? '',\n imageUrl: result.img_src ?? '',\n }));\n\n // Format results to match SerperAPI structure\n const results: t.SearchResultData = {\n organic: organicResults,\n images: imageResults,\n topStories: [],\n // Use undefined instead of null for optional properties\n relatedSearches: data.suggestions ?? [],\n };\n\n return { success: true, data: results };\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return {\n success: false,\n error: `SearXNG API request failed: ${errorMessage}`,\n };\n }\n };\n\n return { getSources };\n};\n\nexport const createSearchAPI = (\n config: t.SearchConfig\n): {\n getSources: (params: t.GetSourcesParams) => Promise<t.SearchResult>;\n} => {\n const {\n searchProvider = 'serper',\n serperApiKey,\n searxngInstanceUrl,\n searxngApiKey,\n } = config;\n\n if (searchProvider.toLowerCase() === 'serper') {\n return createSerperAPI(serperApiKey);\n } else if (searchProvider.toLowerCase() === 'searxng') {\n return createSearXNGAPI(searxngInstanceUrl, searxngApiKey);\n } else {\n throw new Error(\n `Invalid search provider: ${searchProvider}. Must be 'serper' or 'searxng'`\n );\n }\n};\n\nexport const createSourceProcessor = (\n config: t.ProcessSourcesConfig = {},\n scraperInstance?: FirecrawlScraper\n): {\n processSources: (\n result: t.SearchResult,\n numElements: number,\n query: string,\n proMode?: boolean\n ) => Promise<t.SearchResultData>;\n topResults: number;\n} => {\n if (!scraperInstance) {\n throw new Error('Firecrawl scraper instance is required');\n }\n const {\n topResults = 5,\n // strategies = ['no_extraction'],\n // filterContent = true,\n reranker,\n } = config;\n\n const firecrawlScraper = scraperInstance;\n\n const webScraper = {\n scrapeMany: async ({\n query,\n links,\n }: {\n query: string;\n links: string[];\n }): Promise<Array<t.ScrapeResult>> => {\n console.log(`Scraping ${links.length} links with Firecrawl`);\n const promises: Array<Promise<t.ScrapeResult>> = [];\n try {\n for (const currentLink of links) {\n const promise: Promise<t.ScrapeResult> = firecrawlScraper\n .scrapeUrl(currentLink, {})\n .then(([url, response]) => {\n const attribution = getAttribution(url, response.data?.metadata);\n if (response.success && response.data) {\n const [content, references] =\n firecrawlScraper.extractContent(response);\n return {\n url,\n references,\n attribution,\n content: chunker.cleanText(content),\n } as t.ScrapeResult;\n }\n\n return {\n url,\n attribution,\n error: true,\n content: `Failed to scrape ${url}: ${response.error ?? 'Unknown error'}`,\n } as t.ScrapeResult;\n })\n .then(async (result) => {\n try {\n if (result.error != null) {\n console.error(\n `Error scraping ${result.url}: ${result.content}`\n );\n return {\n ...result,\n };\n }\n const highlights = await getHighlights({\n query,\n reranker,\n content: result.content,\n });\n return {\n ...result,\n highlights,\n };\n } catch (error) {\n console.error('Error processing scraped content:', error);\n return {\n ...result,\n };\n }\n })\n .catch((error) => {\n console.error(`Error scraping ${currentLink}:`, error);\n return {\n url: currentLink,\n error: true,\n content: `Failed to scrape ${currentLink}: ${error.message ?? 'Unknown error'}`,\n };\n });\n promises.push(promise);\n }\n return await Promise.all(promises);\n } catch (error) {\n console.error('Error in scrapeMany:', error);\n return [];\n }\n },\n };\n\n const fetchContents = async ({\n links,\n query,\n target,\n onContentScraped,\n }: {\n links: string[];\n query: string;\n target: number;\n onContentScraped?: (link: string, update?: Partial<t.ValidSource>) => void;\n }): Promise<void> => {\n const initialLinks = links.slice(0, target);\n // const remainingLinks = links.slice(target).reverse();\n const results = await webScraper.scrapeMany({ query, links: initialLinks });\n for (const result of results) {\n if (result.error === true) {\n continue;\n }\n const { url, content, attribution, references, highlights } = result;\n onContentScraped?.(url, {\n content,\n attribution,\n references,\n highlights,\n });\n }\n };\n\n const processSources = async (\n result: t.SearchResult,\n numElements: number,\n query: string,\n proMode: boolean = false\n ): Promise<t.SearchResultData> => {\n try {\n if (!result.data) {\n return {\n organic: [],\n topStories: [],\n images: [],\n relatedSearches: [],\n };\n } else if (!result.data.organic) {\n return result.data;\n }\n\n if (!proMode) {\n const wikiSources = result.data.organic.filter((source) =>\n source.link.includes('wikipedia.org')\n );\n\n if (!wikiSources.length) {\n return result.data;\n }\n\n const wikiSourceMap = new Map<string, t.ValidSource>();\n wikiSourceMap.set(wikiSources[0].link, wikiSources[0]);\n const onContentScraped = createSourceUpdateCallback(wikiSourceMap);\n await fetchContents({\n query,\n target: 1,\n onContentScraped,\n links: [wikiSources[0].link],\n });\n\n for (let i = 0; i < result.data.organic.length; i++) {\n const source = result.data.organic[i];\n const updatedSource = wikiSourceMap.get(source.link);\n if (updatedSource) {\n result.data.organic[i] = {\n ...source,\n ...updatedSource,\n };\n }\n }\n\n return result.data;\n }\n\n const sourceMap = new Map<string, t.ValidSource>();\n const allLinks: string[] = [];\n\n for (const source of result.data.organic) {\n if (source.link) {\n allLinks.push(source.link);\n sourceMap.set(source.link, source);\n }\n }\n\n if (allLinks.length === 0) {\n return result.data;\n }\n\n const onContentScraped = createSourceUpdateCallback(sourceMap);\n await fetchContents({\n links: allLinks,\n query,\n onContentScraped,\n target: numElements,\n });\n\n for (let i = 0; i < result.data.organic.length; i++) {\n const source = result.data.organic[i];\n const updatedSource = sourceMap.get(source.link);\n if (updatedSource) {\n result.data.organic[i] = {\n ...source,\n ...updatedSource,\n };\n }\n }\n\n const successfulSources = result.data.organic\n .filter(\n (source) =>\n source.content != null && !source.content.startsWith('Failed')\n )\n .slice(0, numElements);\n\n if (successfulSources.length > 0) {\n result.data.organic = successfulSources;\n }\n return result.data;\n } catch (error) {\n console.error('Error in processSources:', error);\n return {\n organic: [],\n topStories: [],\n images: [],\n relatedSearches: [],\n ...result.data,\n error: error instanceof Error ? error.message : String(error),\n };\n }\n };\n\n return {\n processSources,\n topResults,\n };\n};\n"],"names":[],"mappings":";;;;AAAA;AAQA,MAAM,OAAO,GAAG;AACd,IAAA,SAAS,EAAE,CAAC,IAAY,KAAY;AAClC,QAAA,IAAI,CAAC,IAAI;AAAE,YAAA,OAAO,EAAE;;AAGpB,QAAA,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC;AAEvE;AACiE;QACjE,MAAM,gBAAgB,GAAG,cAAc,CAAC,OAAO,CAAC,QAAQ,EAAE,IAAI,CAAC;;QAG/D,MAAM,eAAe,GAAG,gBAAgB,CAAC,OAAO,CAAC,mBAAmB,EAAE,IAAI,CAAC;;QAG3E,MAAM,aAAa,GAAG,eAAe,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;AAE7D,QAAA,OAAO,aAAa,CAAC,IAAI,EAAE;KAC5B;AACD,IAAA,SAAS,EAAE,OACT,IAAY,EACZ,OAIC,KACoB;AACrB,QAAA,MAAM,SAAS,GAAG,OAAO,EAAE,SAAS,IAAI,GAAG;AAC3C,QAAA,MAAM,YAAY,GAAG,OAAO,EAAE,YAAY,IAAI,EAAE;QAChD,MAAM,UAAU,GAAG,OAAO,EAAE,UAAU,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC;AAExD,QAAA,MAAM,QAAQ,GAAG,IAAI,8BAA8B,CAAC;YAClD,UAAU;YACV,SAAS;YACT,YAAY;AACb,SAAA,CAAC;AAEF,QAAA,OAAO,MAAM,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC;KACtC;AAED,IAAA,UAAU,EAAE,OACV,KAAe,EACf,OAIC,KACsB;;QAEvB,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,KAC9B,OAAO,CAAC,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,KAAK,CAAC,CAAC,KAAK,KAAI;AAC/C,YAAA,OAAO,CAAC,KAAK,CAAC,uBAAuB,EAAE,KAAK,CAAC;YAC7C,OAAO,CAAC,IAAI,CAAC;SACd,CAAC,CACH;AACD,QAAA,OAAO,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC;KAC7B;CACF;AAED,MAAM,0BAA0B,GAAG,CAAC,SAAqC,KAAI;AAC3E,IAAA,OAAO,CAAC,IAAY,EAAE,MAA+B,KAAU;QAC7D,MAAM,MAAM,GAAG,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC;QAClC,IAAI,MAAM,EAAE;AACV,YAAA,SAAS,CAAC,GAAG,CAAC,IAAI,EAAE;AAClB,gBAAA,GAAG,MAAM;AACT,gBAAA,GAAG,MAAM;AACV,aAAA,CAAC;;AAEN,KAAC;AACH,CAAC;AAED,MAAM,aAAa,GAAG,OAAO,EAC3B,KAAK,EACL,OAAO,EACP,QAAQ,EACR,UAAU,GAAG,CAAC,GAMf,KAAwC;IACvC,IAAI,CAAC,OAAO,EAAE;AACZ,QAAA,OAAO,CAAC,IAAI,CAAC,oCAAoC,CAAC;QAClD;;IAEF,IAAI,CAAC,QAAQ,EAAE;AACb,QAAA,OAAO,CAAC,IAAI,CAAC,qCAAqC,CAAC;QACnD;;AAGF,IAAA,IAAI;QACF,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC;AAClD,QAAA,IAAI,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE;YAC5B,OAAO,MAAM,QAAQ,CAAC,MAAM,CAAC,KAAK,EAAE,SAAS,EAAE,UAAU,CAAC;;aACrD;YACL,OAAO,CAAC,KAAK,CACX,yCAAyC,EACzC,OAAO,SAAS,CACjB;YACD;;;IAEF,OAAO,KAAK,EAAE;AACd,QAAA,OAAO,CAAC,KAAK,CAAC,8BAA8B,EAAE,KAAK,CAAC;QACpD;;AAEJ,CAAC;AAED,MAAM,eAAe,GAAG,CACtB,MAAe,KAGb;AACF,IAAA,MAAM,MAAM,GAAG;AACb,QAAA,MAAM,EAAE,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc;AAC5C,QAAA,MAAM,EAAE,kCAAkC;AAC1C,QAAA,OAAO,EAAE,KAAK;KACf;AAED,IAAA,IAAI,MAAM,CAAC,MAAM,IAAI,IAAI,IAAI,MAAM,CAAC,MAAM,KAAK,EAAE,EAAE;AACjD,QAAA,MAAM,IAAI,KAAK,CAAC,0CAA0C,CAAC;;AAG7D,IAAA,MAAM,UAAU,GAAG,OAAO,EACxB,KAAK,EACL,OAAO,EACP,UAAU,GAAG,CAAC,GACK,KAA6B;AAChD,QAAA,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,EAAE;YACjB,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,uBAAuB,EAAE;;AAG3D,QAAA,IAAI;AACF,YAAA,MAAM,OAAO,GAA0B;AACrC,gBAAA,CAAC,EAAE,KAAK;AACR,gBAAA,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,CAAC,EAAE,EAAE,CAAC;aAC3C;YAED,IAAI,OAAO,IAAI,IAAI,IAAI,OAAO,KAAK,EAAE,EAAE;gBACrC,OAAO,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,WAAW,EAAE;;AAGvC,YAAA,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,MAAM,CAAC,MAAM,EACb,OAAO,EACP;AACE,gBAAA,OAAO,EAAE;oBACP,WAAW,EAAE,MAAM,CAAC,MAAM;AAC1B,oBAAA,cAAc,EAAE,kBAAkB;AACnC,iBAAA;gBACD,OAAO,EAAE,MAAM,CAAC,OAAO;AACxB,aAAA,CACF;AAED,YAAA,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI;AAC1B,YAAA,MAAM,OAAO,GAAuB;gBAClC,OAAO,EAAE,IAAI,CAAC,OAAO;AACrB,gBAAA,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,EAAE;gBACzB,SAAS,EAAE,IAAI,CAAC,SAAS;AACzB,gBAAA,UAAU,EAAE,IAAI,CAAC,UAAU,IAAI,EAAE;gBACjC,aAAa,EAAE,IAAI,CAAC,aAAa;gBACjC,cAAc,EAAE,IAAI,CAAC,cAAc;gBACnC,eAAe,EAAE,IAAI,CAAC,eAAe;aACtC;YAED,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE;;QACvC,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACxD,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,CAAuB,oBAAA,EAAA,YAAY,CAAE,CAAA,EAAE;;AAE3E,KAAC;IAED,OAAO,EAAE,UAAU,EAAE;AACvB,CAAC;AAED,MAAM,gBAAgB,GAAG,CACvB,WAAoB,EACpB,MAAe,KAGb;AACF,IAAA,MAAM,MAAM,GAAG;AACb,QAAA,WAAW,EAAE,WAAW,IAAI,OAAO,CAAC,GAAG,CAAC,oBAAoB;AAC5D,QAAA,MAAM,EAAE,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,eAAe;AAC7C,QACA,OAAO,EAAE,KAAK;KACf;AAED,IAAA,IAAI,MAAM,CAAC,WAAW,IAAI,IAAI,IAAI,MAAM,CAAC,WAAW,KAAK,EAAE,EAAE;AAC3D,QAAA,MAAM,IAAI,KAAK,CAAC,kDAAkD,CAAC;;AAGrE,IAAA,MAAM,UAAU,GAAG,OAAO,EACxB,KAAK,EACL,UAAU,GAAG,CAAC,GACK,KAA6B;AAChD,QAAA,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,EAAE;YACjB,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,uBAAuB,EAAE;;AAG3D,QAAA,IAAI;;AAEF,YAAA,IAAI,MAAM,CAAC,WAAW,IAAI,IAAI,IAAI,MAAM,CAAC,WAAW,KAAK,EAAE,EAAE;gBAC3D,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,6BAA6B,EAAE;;AAGjE,YAAA,IAAI,SAAS,GAAG,MAAM,CAAC,WAAW;YAClC,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE;gBAClC,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,GAAG,SAAS;;;AAItD,YAAA,MAAM,MAAM,GAA2B;AACrC,gBAAA,CAAC,EAAE,KAAK;AACR,gBAAA,MAAM,EAAE,MAAM;AACd,gBAAA,MAAM,EAAE,CAAC;AACT,gBAAA,UAAU,EAAE,SAAS;AACrB,gBAAA,QAAQ,EAAE,KAAK;AACf,gBAAA,UAAU,EAAE,CAAC;AACb,gBAAA,OAAO,EAAE,wBAAwB;aAClC;AAED,YAAA,MAAM,OAAO,GAA2B;AACtC,gBAAA,cAAc,EAAE,kBAAkB;aACnC;AAED,YAAA,IAAI,MAAM,CAAC,MAAM,IAAI,IAAI,IAAI,MAAM,CAAC,MAAM,KAAK,EAAE,EAAE;AACjD,gBAAA,OAAO,CAAC,WAAW,CAAC,GAAG,MAAM,CAAC,MAAM;;YAGtC,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,SAAS,EAAE;gBAC1C,OAAO;gBACP,MAAM;gBACN,OAAO,EAAE,MAAM,CAAC,OAAO;AACxB,aAAA,CAAC;AAEF,YAAA,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI;;YAG1B,MAAM,cAAc,GAAG,CAAC,IAAI,CAAC,OAAO,IAAI,EAAE;AACvC,iBAAA,KAAK,CAAC,CAAC,EAAE,UAAU;AACnB,iBAAA,GAAG,CAAC,CAAC,MAAuB,MAAM;AACjC,gBAAA,KAAK,EAAE,MAAM,CAAC,KAAK,IAAI,EAAE;AACzB,gBAAA,IAAI,EAAE,MAAM,CAAC,GAAG,IAAI,EAAE;AACtB,gBAAA,OAAO,EAAE,MAAM,CAAC,OAAO,IAAI,EAAE;AAC7B,gBAAA,IAAI,EAAE,MAAM,CAAC,aAAa,IAAI,EAAE;AACjC,aAAA,CAAC,CAAC;;YAGL,MAAM,YAAY,GAAG,CAAC,IAAI,CAAC,OAAO,IAAI,EAAE;iBACrC,MAAM,CAAC,CAAC,MAAuB,KAAK,MAAM,CAAC,OAAO;AAClD,iBAAA,KAAK,CAAC,CAAC,EAAE,CAAC;AACV,iBAAA,GAAG,CAAC,CAAC,MAAuB,MAAM;AACjC,gBAAA,KAAK,EAAE,MAAM,CAAC,KAAK,IAAI,EAAE;AACzB,gBAAA,QAAQ,EAAE,MAAM,CAAC,OAAO,IAAI,EAAE;AAC/B,aAAA,CAAC,CAAC;;AAGL,YAAA,MAAM,OAAO,GAAuB;AAClC,gBAAA,OAAO,EAAE,cAAc;AACvB,gBAAA,MAAM,EAAE,YAAY;AACpB,gBAAA,UAAU,EAAE,EAAE;;AAEd,gBAAA,eAAe,EAAE,IAAI,CAAC,WAAW,IAAI,EAAE;aACxC;YAED,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE;;QACvC,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACxD,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;gBACd,KAAK,EAAE,CAA+B,4BAAA,EAAA,YAAY,CAAE,CAAA;aACrD;;AAEL,KAAC;IAED,OAAO,EAAE,UAAU,EAAE;AACvB,CAAC;AAEY,MAAA,eAAe,GAAG,CAC7B,MAAsB,KAGpB;AACF,IAAA,MAAM,EACJ,cAAc,GAAG,QAAQ,EACzB,YAAY,EACZ,kBAAkB,EAClB,aAAa,GACd,GAAG,MAAM;AAEV,IAAA,IAAI,cAAc,CAAC,WAAW,EAAE,KAAK,QAAQ,EAAE;AAC7C,QAAA,OAAO,eAAe,CAAC,YAAY,CAAC;;AAC/B,SAAA,IAAI,cAAc,CAAC,WAAW,EAAE,KAAK,SAAS,EAAE;AACrD,QAAA,OAAO,gBAAgB,CAAC,kBAAkB,EAAE,aAAa,CAAC;;SACrD;AACL,QAAA,MAAM,IAAI,KAAK,CACb,4BAA4B,cAAc,CAAA,+BAAA,CAAiC,CAC5E;;AAEL;AAEa,MAAA,qBAAqB,GAAG,CACnC,SAAiC,EAAE,EACnC,eAAkC,KAShC;IACF,IAAI,CAAC,eAAe,EAAE;AACpB,QAAA,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC;;IAE3D,MAAM,EACJ,UAAU,GAAG,CAAC;;;IAGd,QAAQ,GACT,GAAG,MAAM;IAEV,MAAM,gBAAgB,GAAG,eAAe;AAExC,IAAA,MAAM,UAAU,GAAG;QACjB,UAAU,EAAE,OAAO,EACjB,KAAK,EACL,KAAK,GAIN,KAAoC;YACnC,OAAO,CAAC,GAAG,CAAC,CAAA,SAAA,EAAY,KAAK,CAAC,MAAM,CAAuB,qBAAA,CAAA,CAAC;YAC5D,MAAM,QAAQ,GAAmC,EAAE;AACnD,YAAA,IAAI;AACF,gBAAA,KAAK,MAAM,WAAW,IAAI,KAAK,EAAE;oBAC/B,MAAM,OAAO,GAA4B;AACtC,yBAAA,SAAS,CAAC,WAAW,EAAE,EAAE;yBACzB,IAAI,CAAC,CAAC,CAAC,GAAG,EAAE,QAAQ,CAAC,KAAI;AACxB,wBAAA,MAAM,WAAW,GAAG,cAAc,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC;wBAChE,IAAI,QAAQ,CAAC,OAAO,IAAI,QAAQ,CAAC,IAAI,EAAE;AACrC,4BAAA,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,GACzB,gBAAgB,CAAC,cAAc,CAAC,QAAQ,CAAC;4BAC3C,OAAO;gCACL,GAAG;gCACH,UAAU;gCACV,WAAW;AACX,gCAAA,OAAO,EAAE,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC;6BAClB;;wBAGrB,OAAO;4BACL,GAAG;4BACH,WAAW;AACX,4BAAA,KAAK,EAAE,IAAI;4BACX,OAAO,EAAE,oBAAoB,GAAG,CAAA,EAAA,EAAK,QAAQ,CAAC,KAAK,IAAI,eAAe,CAAE,CAAA;yBACvD;AACrB,qBAAC;AACA,yBAAA,IAAI,CAAC,OAAO,MAAM,KAAI;AACrB,wBAAA,IAAI;AACF,4BAAA,IAAI,MAAM,CAAC,KAAK,IAAI,IAAI,EAAE;AACxB,gCAAA,OAAO,CAAC,KAAK,CACX,CAAA,eAAA,EAAkB,MAAM,CAAC,GAAG,CAAA,EAAA,EAAK,MAAM,CAAC,OAAO,CAAA,CAAE,CAClD;gCACD,OAAO;AACL,oCAAA,GAAG,MAAM;iCACV;;AAEH,4BAAA,MAAM,UAAU,GAAG,MAAM,aAAa,CAAC;gCACrC,KAAK;gCACL,QAAQ;gCACR,OAAO,EAAE,MAAM,CAAC,OAAO;AACxB,6BAAA,CAAC;4BACF,OAAO;AACL,gCAAA,GAAG,MAAM;gCACT,UAAU;6BACX;;wBACD,OAAO,KAAK,EAAE;AACd,4BAAA,OAAO,CAAC,KAAK,CAAC,mCAAmC,EAAE,KAAK,CAAC;4BACzD,OAAO;AACL,gCAAA,GAAG,MAAM;6BACV;;AAEL,qBAAC;AACA,yBAAA,KAAK,CAAC,CAAC,KAAK,KAAI;wBACf,OAAO,CAAC,KAAK,CAAC,CAAA,eAAA,EAAkB,WAAW,CAAG,CAAA,CAAA,EAAE,KAAK,CAAC;wBACtD,OAAO;AACL,4BAAA,GAAG,EAAE,WAAW;AAChB,4BAAA,KAAK,EAAE,IAAI;4BACX,OAAO,EAAE,oBAAoB,WAAW,CAAA,EAAA,EAAK,KAAK,CAAC,OAAO,IAAI,eAAe,CAAE,CAAA;yBAChF;AACH,qBAAC,CAAC;AACJ,oBAAA,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC;;AAExB,gBAAA,OAAO,MAAM,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC;;YAClC,OAAO,KAAK,EAAE;AACd,gBAAA,OAAO,CAAC,KAAK,CAAC,sBAAsB,EAAE,KAAK,CAAC;AAC5C,gBAAA,OAAO,EAAE;;SAEZ;KACF;AAED,IAAA,MAAM,aAAa,GAAG,OAAO,EAC3B,KAAK,EACL,KAAK,EACL,MAAM,EACN,gBAAgB,GAMjB,KAAmB;QAClB,MAAM,YAAY,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC;;AAE3C,QAAA,MAAM,OAAO,GAAG,MAAM,UAAU,CAAC,UAAU,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC;AAC3E,QAAA,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE;AAC5B,YAAA,IAAI,MAAM,CAAC,KAAK,KAAK,IAAI,EAAE;gBACzB;;AAEF,YAAA,MAAM,EAAE,GAAG,EAAE,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,UAAU,EAAE,GAAG,MAAM;YACpE,gBAAgB,GAAG,GAAG,EAAE;gBACtB,OAAO;gBACP,WAAW;gBACX,UAAU;gBACV,UAAU;AACX,aAAA,CAAC;;AAEN,KAAC;AAED,IAAA,MAAM,cAAc,GAAG,OACrB,MAAsB,EACtB,WAAmB,EACnB,KAAa,EACb,OAAmB,GAAA,KAAK,KACO;AAC/B,QAAA,IAAI;AACF,YAAA,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE;gBAChB,OAAO;AACL,oBAAA,OAAO,EAAE,EAAE;AACX,oBAAA,UAAU,EAAE,EAAE;AACd,oBAAA,MAAM,EAAE,EAAE;AACV,oBAAA,eAAe,EAAE,EAAE;iBACpB;;AACI,iBAAA,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE;gBAC/B,OAAO,MAAM,CAAC,IAAI;;YAGpB,IAAI,CAAC,OAAO,EAAE;gBACZ,MAAM,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,KACpD,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,eAAe,CAAC,CACtC;AAED,gBAAA,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE;oBACvB,OAAO,MAAM,CAAC,IAAI;;AAGpB,gBAAA,MAAM,aAAa,GAAG,IAAI,GAAG,EAAyB;AACtD,gBAAA,aAAa,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC;AACtD,gBAAA,MAAM,gBAAgB,GAAG,0BAA0B,CAAC,aAAa,CAAC;AAClE,gBAAA,MAAM,aAAa,CAAC;oBAClB,KAAK;AACL,oBAAA,MAAM,EAAE,CAAC;oBACT,gBAAgB;oBAChB,KAAK,EAAE,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AAC7B,iBAAA,CAAC;AAEF,gBAAA,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;oBACnD,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;oBACrC,MAAM,aAAa,GAAG,aAAa,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC;oBACpD,IAAI,aAAa,EAAE;AACjB,wBAAA,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;AACvB,4BAAA,GAAG,MAAM;AACT,4BAAA,GAAG,aAAa;yBACjB;;;gBAIL,OAAO,MAAM,CAAC,IAAI;;AAGpB,YAAA,MAAM,SAAS,GAAG,IAAI,GAAG,EAAyB;YAClD,MAAM,QAAQ,GAAa,EAAE;YAE7B,KAAK,MAAM,MAAM,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE;AACxC,gBAAA,IAAI,MAAM,CAAC,IAAI,EAAE;AACf,oBAAA,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC;oBAC1B,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC;;;AAItC,YAAA,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE;gBACzB,OAAO,MAAM,CAAC,IAAI;;AAGpB,YAAA,MAAM,gBAAgB,GAAG,0BAA0B,CAAC,SAAS,CAAC;AAC9D,YAAA,MAAM,aAAa,CAAC;AAClB,gBAAA,KAAK,EAAE,QAAQ;gBACf,KAAK;gBACL,gBAAgB;AAChB,gBAAA,MAAM,EAAE,WAAW;AACpB,aAAA,CAAC;AAEF,YAAA,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACnD,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;gBACrC,MAAM,aAAa,GAAG,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC;gBAChD,IAAI,aAAa,EAAE;AACjB,oBAAA,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;AACvB,wBAAA,GAAG,MAAM;AACT,wBAAA,GAAG,aAAa;qBACjB;;;AAIL,YAAA,MAAM,iBAAiB,GAAG,MAAM,CAAC,IAAI,CAAC;iBACnC,MAAM,CACL,CAAC,MAAM,KACL,MAAM,CAAC,OAAO,IAAI,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,QAAQ,CAAC;AAEjE,iBAAA,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC;AAExB,YAAA,IAAI,iBAAiB,CAAC,MAAM,GAAG,CAAC,EAAE;AAChC,gBAAA,MAAM,CAAC,IAAI,CAAC,OAAO,GAAG,iBAAiB;;YAEzC,OAAO,MAAM,CAAC,IAAI;;QAClB,OAAO,KAAK,EAAE;AACd,YAAA,OAAO,CAAC,KAAK,CAAC,0BAA0B,EAAE,KAAK,CAAC;YAChD,OAAO;AACL,gBAAA,OAAO,EAAE,EAAE;AACX,gBAAA,UAAU,EAAE,EAAE;AACd,gBAAA,MAAM,EAAE,EAAE;AACV,gBAAA,eAAe,EAAE,EAAE;gBACnB,GAAG,MAAM,CAAC,IAAI;AACd,gBAAA,KAAK,EAAE,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;aAC9D;;AAEL,KAAC;IAED,OAAO;QACL,cAAc;QACd,UAAU;KACX;AACH;;;;"}
|
|
@@ -8,8 +8,7 @@ import { createReranker } from './rerankers.mjs';
|
|
|
8
8
|
import { Constants } from '../../common/enum.mjs';
|
|
9
9
|
|
|
10
10
|
/* eslint-disable no-console */
|
|
11
|
-
const
|
|
12
|
-
query: z.string().describe(`
|
|
11
|
+
const DEFAULT_QUERY_DESCRIPTION = `
|
|
13
12
|
GUIDELINES:
|
|
14
13
|
- Start broad, then narrow: Begin with key concepts, then refine with specifics
|
|
15
14
|
- Think like sources: Use terminology experts would use in the field
|
|
@@ -27,10 +26,35 @@ TECHNIQUES (combine for power searches):
|
|
|
27
26
|
- SPECIFIC QUESTIONS: Use who/what/when/where/why/how
|
|
28
27
|
- DOMAIN TERMS: Include technical terminology for specialized topics
|
|
29
28
|
- CONCISE TERMS: Prioritize keywords over sentences
|
|
30
|
-
`.trim()
|
|
31
|
-
|
|
29
|
+
`.trim();
|
|
30
|
+
const DEFAULT_COUNTRY_DESCRIPTION = `Country code to localize search results.
|
|
31
|
+
Use standard 2-letter country codes: "us", "uk", "ca", "de", "fr", "jp", "br", etc.
|
|
32
|
+
Provide this when the search should return results specific to a particular country.
|
|
33
|
+
Examples:
|
|
34
|
+
- "us" for United States (default)
|
|
35
|
+
- "de" for Germany
|
|
36
|
+
- "in" for India
|
|
37
|
+
`.trim();
|
|
38
|
+
/**
|
|
39
|
+
* Creates a search tool with a schema that dynamically includes the country field
|
|
40
|
+
* only when the searchProvider is 'serper'.
|
|
41
|
+
*
|
|
42
|
+
* @param config - The search tool configuration
|
|
43
|
+
* @returns A DynamicStructuredTool with a schema that depends on the searchProvider
|
|
44
|
+
*/
|
|
32
45
|
const createSearchTool = (config = {}) => {
|
|
33
46
|
const { searchProvider = 'serper', serperApiKey, searxngInstanceUrl, searxngApiKey, rerankerType = 'cohere', topResults = 5, strategies = ['no_extraction'], filterContent = true, firecrawlApiKey, firecrawlApiUrl, firecrawlFormats = ['markdown', 'html'], jinaApiKey, cohereApiKey, onSearchResults: _onSearchResults, } = config;
|
|
47
|
+
const querySchema = z.string().describe(DEFAULT_QUERY_DESCRIPTION);
|
|
48
|
+
const schemaObject = {
|
|
49
|
+
query: querySchema,
|
|
50
|
+
};
|
|
51
|
+
if (searchProvider === 'serper') {
|
|
52
|
+
schemaObject.country = z
|
|
53
|
+
.string()
|
|
54
|
+
.optional()
|
|
55
|
+
.describe(DEFAULT_COUNTRY_DESCRIPTION);
|
|
56
|
+
}
|
|
57
|
+
const SearchToolSchema = z.object(schemaObject);
|
|
34
58
|
const searchAPI = createSearchAPI({
|
|
35
59
|
searchProvider,
|
|
36
60
|
serperApiKey,
|
|
@@ -53,9 +77,9 @@ const createSearchTool = (config = {}) => {
|
|
|
53
77
|
const sourceProcessor = createSourceProcessor({
|
|
54
78
|
reranker: selectedReranker,
|
|
55
79
|
topResults}, firecrawlScraper);
|
|
56
|
-
const search = async ({ query, proMode = true, maxSources = 5, onSearchResults, }) => {
|
|
80
|
+
const search = async ({ query, country, proMode = true, maxSources = 5, onSearchResults, }) => {
|
|
57
81
|
try {
|
|
58
|
-
const sources = await searchAPI.getSources(query);
|
|
82
|
+
const sources = await searchAPI.getSources({ query, country });
|
|
59
83
|
onSearchResults?.(sources);
|
|
60
84
|
if (!sources.success) {
|
|
61
85
|
throw new Error(sources.error ?? 'Search failed');
|
|
@@ -74,9 +98,12 @@ const createSearchTool = (config = {}) => {
|
|
|
74
98
|
};
|
|
75
99
|
}
|
|
76
100
|
};
|
|
77
|
-
return tool(async (
|
|
101
|
+
return tool(async (params, runnableConfig) => {
|
|
102
|
+
const { query, country: _c } = params;
|
|
103
|
+
const country = typeof _c === 'string' && _c ? _c : undefined;
|
|
78
104
|
const searchResult = await search({
|
|
79
105
|
query,
|
|
106
|
+
country,
|
|
80
107
|
onSearchResults: _onSearchResults
|
|
81
108
|
? (result) => {
|
|
82
109
|
_onSearchResults(result, runnableConfig);
|
|
@@ -84,16 +111,19 @@ const createSearchTool = (config = {}) => {
|
|
|
84
111
|
: undefined,
|
|
85
112
|
});
|
|
86
113
|
const turn = runnableConfig.toolCall?.turn ?? 0;
|
|
87
|
-
const output = formatResultsForLLM(turn, searchResult);
|
|
88
|
-
|
|
114
|
+
const { output, references } = formatResultsForLLM(turn, searchResult);
|
|
115
|
+
const data = { turn, ...searchResult, references };
|
|
116
|
+
return [output, { [Constants.WEB_SEARCH]: data }];
|
|
89
117
|
}, {
|
|
90
118
|
name: Constants.WEB_SEARCH,
|
|
91
119
|
description: `
|
|
92
|
-
Real-time search. Results have required
|
|
120
|
+
Real-time search. Results have required citation anchors.
|
|
121
|
+
|
|
122
|
+
Note: Use ONCE per reply unless instructed otherwise.
|
|
93
123
|
|
|
94
124
|
Anchors:
|
|
95
|
-
- \\
|
|
96
|
-
- X = turn, Y = item
|
|
125
|
+
- \\ue202turnXtypeY
|
|
126
|
+
- X = turn idx, type = 'search' | 'news' | 'image' | 'ref', Y = item idx
|
|
97
127
|
|
|
98
128
|
Special Markers:
|
|
99
129
|
- \\ue203...\\ue204 — highlight start/end of cited text (for Standalone or Group citations)
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tool.mjs","sources":["../../../../src/tools/search/tool.ts"],"sourcesContent":["/* eslint-disable no-console */\nimport { z } from 'zod';\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\nimport type * as t from './types';\nimport { createSearchAPI, createSourceProcessor } from './search';\nimport { createFirecrawlScraper } from './firecrawl';\nimport { expandHighlights } from './highlights';\nimport { formatResultsForLLM } from './format';\nimport { createReranker } from './rerankers';\nimport { Constants } from '@/common';\n\nconst
|
|
1
|
+
{"version":3,"file":"tool.mjs","sources":["../../../../src/tools/search/tool.ts"],"sourcesContent":["/* eslint-disable no-console */\nimport { z } from 'zod';\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\nimport type * as t from './types';\nimport { createSearchAPI, createSourceProcessor } from './search';\nimport { createFirecrawlScraper } from './firecrawl';\nimport { expandHighlights } from './highlights';\nimport { formatResultsForLLM } from './format';\nimport { createReranker } from './rerankers';\nimport { Constants } from '@/common';\n\nconst DEFAULT_QUERY_DESCRIPTION = `\nGUIDELINES:\n- Start broad, then narrow: Begin with key concepts, then refine with specifics\n- Think like sources: Use terminology experts would use in the field\n- Consider perspective: Frame queries from different viewpoints for better results\n- Quality over quantity: A precise 3-4 word query often beats lengthy sentences\n\nTECHNIQUES (combine for power searches):\n- EXACT PHRASES: Use quotes (\"climate change report\")\n- EXCLUDE TERMS: Use minus to remove unwanted results (-wikipedia)\n- SITE-SPECIFIC: Restrict to websites (site:edu research)\n- FILETYPE: Find specific documents (filetype:pdf study)\n- OR OPERATOR: Find alternatives (electric OR hybrid cars)\n- DATE RANGE: Recent information (data after:2020)\n- WILDCARDS: Use * for unknown terms (how to * bread)\n- SPECIFIC QUESTIONS: Use who/what/when/where/why/how\n- DOMAIN TERMS: Include technical terminology for specialized topics\n- CONCISE TERMS: Prioritize keywords over sentences\n`.trim();\n\nconst DEFAULT_COUNTRY_DESCRIPTION = `Country code to localize search results.\nUse standard 2-letter country codes: \"us\", \"uk\", \"ca\", \"de\", \"fr\", \"jp\", \"br\", etc.\nProvide this when the search should return results specific to a particular country.\nExamples:\n- \"us\" for United States (default)\n- \"de\" for Germany\n- \"in\" for India\n`.trim();\n\n/**\n * Creates a search tool with a schema that dynamically includes the country field\n * only when the searchProvider is 'serper'.\n *\n * @param config - The search tool configuration\n * @returns A DynamicStructuredTool with a schema that depends on the searchProvider\n */\nexport const createSearchTool = (\n config: t.SearchToolConfig = {}\n): DynamicStructuredTool<typeof SearchToolSchema> => {\n const {\n searchProvider = 'serper',\n serperApiKey,\n searxngInstanceUrl,\n searxngApiKey,\n rerankerType = 'cohere',\n topResults = 5,\n strategies = ['no_extraction'],\n filterContent = true,\n firecrawlApiKey,\n firecrawlApiUrl,\n firecrawlFormats = ['markdown', 'html'],\n jinaApiKey,\n cohereApiKey,\n onSearchResults: _onSearchResults,\n } = config;\n\n const querySchema = z.string().describe(DEFAULT_QUERY_DESCRIPTION);\n const schemaObject: {\n query: z.ZodString;\n country?: z.ZodOptional<z.ZodString>;\n } = {\n query: querySchema,\n };\n\n if (searchProvider === 'serper') {\n schemaObject.country = z\n .string()\n .optional()\n .describe(DEFAULT_COUNTRY_DESCRIPTION);\n }\n\n const SearchToolSchema = z.object(schemaObject);\n\n const searchAPI = createSearchAPI({\n searchProvider,\n serperApiKey,\n searxngInstanceUrl,\n searxngApiKey,\n });\n\n const firecrawlScraper = createFirecrawlScraper({\n apiKey: firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY,\n apiUrl: firecrawlApiUrl,\n formats: firecrawlFormats,\n });\n\n const selectedReranker = createReranker({\n rerankerType,\n jinaApiKey,\n cohereApiKey,\n });\n\n if (!selectedReranker) {\n console.warn('No reranker selected. Using default ranking.');\n }\n\n const sourceProcessor = createSourceProcessor(\n {\n reranker: selectedReranker,\n topResults,\n strategies,\n filterContent,\n },\n firecrawlScraper\n );\n\n const search = async ({\n query,\n country,\n proMode = true,\n maxSources = 5,\n onSearchResults,\n }: {\n query: string;\n country?: string;\n maxSources?: number;\n proMode?: boolean;\n onSearchResults?: (sources: t.SearchResult) => void;\n }): Promise<t.SearchResultData> => {\n try {\n const sources = await searchAPI.getSources({ query, country });\n onSearchResults?.(sources);\n\n if (!sources.success) {\n throw new Error(sources.error ?? 'Search failed');\n }\n\n const processedSources = await sourceProcessor.processSources(\n sources,\n maxSources,\n query,\n proMode\n );\n return expandHighlights(processedSources);\n } catch (error) {\n console.error('Error in search:', error);\n return {\n organic: [],\n topStories: [],\n images: [],\n relatedSearches: [],\n error: error instanceof Error ? error.message : String(error),\n };\n }\n };\n\n return tool<typeof SearchToolSchema>(\n async (params, runnableConfig) => {\n const { query, country: _c } = params;\n const country = typeof _c === 'string' && _c ? _c : undefined;\n const searchResult = await search({\n query,\n country,\n onSearchResults: _onSearchResults\n ? (result): void => {\n _onSearchResults(result, runnableConfig);\n }\n : undefined,\n });\n const turn = runnableConfig.toolCall?.turn ?? 0;\n const { output, references } = formatResultsForLLM(turn, searchResult);\n const data: t.SearchResultData = { turn, ...searchResult, references };\n return [output, { [Constants.WEB_SEARCH]: data }];\n },\n {\n name: Constants.WEB_SEARCH,\n description: `\nReal-time search. Results have required citation anchors.\n\nNote: Use ONCE per reply unless instructed otherwise.\n\nAnchors:\n- \\\\ue202turnXtypeY\n- X = turn idx, type = 'search' | 'news' | 'image' | 'ref', Y = item idx\n\nSpecial Markers:\n- \\\\ue203...\\\\ue204 — highlight start/end of cited text (for Standalone or Group citations)\n- \\\\ue200...\\\\ue201 — group block (e.g. \\\\ue200\\\\ue202turn0search1\\\\ue202turn0news2\\\\ue201)\n\n**CITE EVERY NON-OBVIOUS FACT/QUOTE:**\nUse anchor marker(s) immediately after the statement:\n- Standalone: \"Pure functions produce same output. \\\\ue202turn0search0\"\n- Standalone (multiple): \"Today's News \\\\ue202turn0search0\\\\ue202turn0news0\"\n- Highlight: \"\\\\ue203Highlight text.\\\\ue204\\\\ue202turn0news1\"\n- Group: \"Sources. \\\\ue200\\\\ue202turn0search0\\\\ue202turn0news1\\\\ue201\"\n- Group Highlight: \"\\\\ue203Highlight for group.\\\\ue204 \\\\ue200\\\\ue202turn0search0\\\\ue202turn0news1\\\\ue201\"\n- Image: \"See photo \\\\ue202turn0image0.\"\n\n**NEVER use markdown links, [1], or footnotes. CITE ONLY with anchors provided.**\n`.trim(),\n schema: SearchToolSchema,\n responseFormat: Constants.CONTENT_AND_ARTIFACT,\n }\n );\n};\n"],"names":[],"mappings":";;;;;;;;;AAAA;AAWA,MAAM,yBAAyB,GAAG;;;;;;;;;;;;;;;;;;CAkBjC,CAAC,IAAI,EAAE;AAER,MAAM,2BAA2B,GAAG,CAAA;;;;;;;CAOnC,CAAC,IAAI,EAAE;AAER;;;;;;AAMG;MACU,gBAAgB,GAAG,CAC9B,MAA6B,GAAA,EAAE,KACmB;IAClD,MAAM,EACJ,cAAc,GAAG,QAAQ,EACzB,YAAY,EACZ,kBAAkB,EAClB,aAAa,EACb,YAAY,GAAG,QAAQ,EACvB,UAAU,GAAG,CAAC,EACd,UAAU,GAAG,CAAC,eAAe,CAAC,EAC9B,aAAa,GAAG,IAAI,EACpB,eAAe,EACf,eAAe,EACf,gBAAgB,GAAG,CAAC,UAAU,EAAE,MAAM,CAAC,EACvC,UAAU,EACV,YAAY,EACZ,eAAe,EAAE,gBAAgB,GAClC,GAAG,MAAM;IAEV,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,yBAAyB,CAAC;AAClE,IAAA,MAAM,YAAY,GAGd;AACF,QAAA,KAAK,EAAE,WAAW;KACnB;AAED,IAAA,IAAI,cAAc,KAAK,QAAQ,EAAE;QAC/B,YAAY,CAAC,OAAO,GAAG;AACpB,aAAA,MAAM;AACN,aAAA,QAAQ;aACR,QAAQ,CAAC,2BAA2B,CAAC;;IAG1C,MAAM,gBAAgB,GAAG,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC;IAE/C,MAAM,SAAS,GAAG,eAAe,CAAC;QAChC,cAAc;QACd,YAAY;QACZ,kBAAkB;QAClB,aAAa;AACd,KAAA,CAAC;IAEF,MAAM,gBAAgB,GAAG,sBAAsB,CAAC;AAC9C,QAAA,MAAM,EAAE,eAAe,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB;AACxD,QAAA,MAAM,EAAE,eAAe;AACvB,QAAA,OAAO,EAAE,gBAAgB;AAC1B,KAAA,CAAC;IAEF,MAAM,gBAAgB,GAAG,cAAc,CAAC;QACtC,YAAY;QACZ,UAAU;QACV,YAAY;AACb,KAAA,CAAC;IAEF,IAAI,CAAC,gBAAgB,EAAE;AACrB,QAAA,OAAO,CAAC,IAAI,CAAC,8CAA8C,CAAC;;IAG9D,MAAM,eAAe,GAAG,qBAAqB,CAC3C;AACE,QAAA,QAAQ,EAAE,gBAAgB;QAC1B,WAGD,EACD,gBAAgB,CACjB;IAED,MAAM,MAAM,GAAG,OAAO,EACpB,KAAK,EACL,OAAO,EACP,OAAO,GAAG,IAAI,EACd,UAAU,GAAG,CAAC,EACd,eAAe,GAOhB,KAAiC;AAChC,QAAA,IAAI;AACF,YAAA,MAAM,OAAO,GAAG,MAAM,SAAS,CAAC,UAAU,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC;AAC9D,YAAA,eAAe,GAAG,OAAO,CAAC;AAE1B,YAAA,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE;gBACpB,MAAM,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,IAAI,eAAe,CAAC;;AAGnD,YAAA,MAAM,gBAAgB,GAAG,MAAM,eAAe,CAAC,cAAc,CAC3D,OAAO,EACP,UAAU,EACV,KAAK,EACL,OAAO,CACR;AACD,YAAA,OAAO,gBAAgB,CAAC,gBAAgB,CAAC;;QACzC,OAAO,KAAK,EAAE;AACd,YAAA,OAAO,CAAC,KAAK,CAAC,kBAAkB,EAAE,KAAK,CAAC;YACxC,OAAO;AACL,gBAAA,OAAO,EAAE,EAAE;AACX,gBAAA,UAAU,EAAE,EAAE;AACd,gBAAA,MAAM,EAAE,EAAE;AACV,gBAAA,eAAe,EAAE,EAAE;AACnB,gBAAA,KAAK,EAAE,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;aAC9D;;AAEL,KAAC;IAED,OAAO,IAAI,CACT,OAAO,MAAM,EAAE,cAAc,KAAI;QAC/B,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,EAAE,GAAG,MAAM;AACrC,QAAA,MAAM,OAAO,GAAG,OAAO,EAAE,KAAK,QAAQ,IAAI,EAAE,GAAG,EAAE,GAAG,SAAS;AAC7D,QAAA,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC;YAChC,KAAK;YACL,OAAO;AACP,YAAA,eAAe,EAAE;AACf,kBAAE,CAAC,MAAM,KAAU;AACjB,oBAAA,gBAAgB,CAAC,MAAM,EAAE,cAAc,CAAC;;AAE1C,kBAAE,SAAS;AACd,SAAA,CAAC;QACF,MAAM,IAAI,GAAG,cAAc,CAAC,QAAQ,EAAE,IAAI,IAAI,CAAC;AAC/C,QAAA,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,mBAAmB,CAAC,IAAI,EAAE,YAAY,CAAC;QACtE,MAAM,IAAI,GAAuB,EAAE,IAAI,EAAE,GAAG,YAAY,EAAE,UAAU,EAAE;AACtE,QAAA,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,SAAS,CAAC,UAAU,GAAG,IAAI,EAAE,CAAC;AACnD,KAAC,EACD;QACE,IAAI,EAAE,SAAS,CAAC,UAAU;AAC1B,QAAA,WAAW,EAAE;;;;;;;;;;;;;;;;;;;;;;;AAuBlB,CAAA,CAAC,IAAI,EAAE;AACF,QAAA,MAAM,EAAE,gBAAgB;QACxB,cAAc,EAAE,SAAS,CAAC,oBAAoB;AAC/C,KAAA,CACF;AACH;;;;"}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
const getDomainName = (link, metadata) => {
|
|
2
|
+
try {
|
|
3
|
+
const url = metadata?.sourceURL ?? metadata?.url ?? (link || '');
|
|
4
|
+
const domain = new URL(url).hostname.replace(/^www\./, '');
|
|
5
|
+
if (domain) {
|
|
6
|
+
return domain;
|
|
7
|
+
}
|
|
8
|
+
}
|
|
9
|
+
catch (e) {
|
|
10
|
+
// URL parsing failed
|
|
11
|
+
console.error('Error parsing URL:', e);
|
|
12
|
+
}
|
|
13
|
+
return;
|
|
14
|
+
};
|
|
15
|
+
function getAttribution(link, metadata) {
|
|
16
|
+
if (!metadata)
|
|
17
|
+
return getDomainName(link, metadata);
|
|
18
|
+
const possibleAttributions = [
|
|
19
|
+
metadata.ogSiteName,
|
|
20
|
+
metadata['og:site_name'],
|
|
21
|
+
metadata.title?.split('|').pop()?.trim(),
|
|
22
|
+
metadata['twitter:site']?.replace(/^@/, ''),
|
|
23
|
+
];
|
|
24
|
+
const attribution = possibleAttributions.find((attr) => attr != null && typeof attr === 'string' && attr.trim() !== '');
|
|
25
|
+
if (attribution != null) {
|
|
26
|
+
return attribution;
|
|
27
|
+
}
|
|
28
|
+
return getDomainName(link, metadata);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export { getAttribution, getDomainName };
|
|
32
|
+
//# sourceMappingURL=utils.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"utils.mjs","sources":["../../../../src/tools/search/utils.ts"],"sourcesContent":["/* eslint-disable no-console */\nimport type * as t from './types';\n\nexport const getDomainName = (\n link: string,\n metadata?: t.ScrapeMetadata\n): string | undefined => {\n try {\n const url = metadata?.sourceURL ?? metadata?.url ?? (link || '');\n const domain = new URL(url).hostname.replace(/^www\\./, '');\n if (domain) {\n return domain;\n }\n } catch (e) {\n // URL parsing failed\n console.error('Error parsing URL:', e);\n }\n\n return;\n};\n\nexport function getAttribution(\n link: string,\n metadata?: t.ScrapeMetadata\n): string | undefined {\n if (!metadata) return getDomainName(link, metadata);\n\n const possibleAttributions = [\n metadata.ogSiteName,\n metadata['og:site_name'],\n metadata.title?.split('|').pop()?.trim(),\n metadata['twitter:site']?.replace(/^@/, ''),\n ];\n\n const attribution = possibleAttributions.find(\n (attr) => attr != null && typeof attr === 'string' && attr.trim() !== ''\n );\n if (attribution != null) {\n return attribution;\n }\n\n return getDomainName(link, metadata);\n}\n"],"names":[],"mappings":"MAGa,aAAa,GAAG,CAC3B,IAAY,EACZ,QAA2B,KACL;AACtB,IAAA,IAAI;AACF,QAAA,MAAM,GAAG,GAAG,QAAQ,EAAE,SAAS,IAAI,QAAQ,EAAE,GAAG,KAAK,IAAI,IAAI,EAAE,CAAC;AAChE,QAAA,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;QAC1D,IAAI,MAAM,EAAE;AACV,YAAA,OAAO,MAAM;;;IAEf,OAAO,CAAC,EAAE;;AAEV,QAAA,OAAO,CAAC,KAAK,CAAC,oBAAoB,EAAE,CAAC,CAAC;;IAGxC;AACF;AAEgB,SAAA,cAAc,CAC5B,IAAY,EACZ,QAA2B,EAAA;AAE3B,IAAA,IAAI,CAAC,QAAQ;AAAE,QAAA,OAAO,aAAa,CAAC,IAAI,EAAE,QAAQ,CAAC;AAEnD,IAAA,MAAM,oBAAoB,GAAG;AAC3B,QAAA,QAAQ,CAAC,UAAU;QACnB,QAAQ,CAAC,cAAc,CAAC;AACxB,QAAA,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,IAAI,EAAE;QACxC,QAAQ,CAAC,cAAc,CAAC,EAAE,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;KAC5C;IAED,MAAM,WAAW,GAAG,oBAAoB,CAAC,IAAI,CAC3C,CAAC,IAAI,KAAK,IAAI,IAAI,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CACzE;AACD,IAAA,IAAI,WAAW,IAAI,IAAI,EAAE;AACvB,QAAA,OAAO,WAAW;;AAGpB,IAAA,OAAO,aAAa,CAAC,IAAI,EAAE,QAAQ,CAAC;AACtC;;;;"}
|
package/dist/types/index.d.ts
CHANGED