@librechat/agents 2.4.317 → 2.4.318
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/tools/search/content.cjs +140 -0
- package/dist/cjs/tools/search/content.cjs.map +1 -0
- package/dist/cjs/tools/search/firecrawl.cjs +17 -37
- package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
- package/dist/cjs/tools/search/format.cjs +79 -29
- package/dist/cjs/tools/search/format.cjs.map +1 -1
- package/dist/cjs/tools/search/highlights.cjs +64 -13
- package/dist/cjs/tools/search/highlights.cjs.map +1 -1
- package/dist/cjs/tools/search/search.cjs +13 -15
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +44 -12
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/search/utils.cjs +35 -0
- package/dist/cjs/tools/search/utils.cjs.map +1 -0
- package/dist/esm/tools/search/content.mjs +119 -0
- package/dist/esm/tools/search/content.mjs.map +1 -0
- package/dist/esm/tools/search/firecrawl.mjs +18 -37
- package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
- package/dist/esm/tools/search/format.mjs +79 -29
- package/dist/esm/tools/search/format.mjs.map +1 -1
- package/dist/esm/tools/search/highlights.mjs +64 -13
- package/dist/esm/tools/search/highlights.mjs.map +1 -1
- package/dist/esm/tools/search/search.mjs +12 -14
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +44 -12
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/search/utils.mjs +32 -0
- package/dist/esm/tools/search/utils.mjs.map +1 -0
- package/dist/types/tools/search/content.d.ts +4 -0
- package/dist/types/tools/search/firecrawl.d.ts +6 -86
- package/dist/types/tools/search/format.d.ts +4 -1
- package/dist/types/tools/search/highlights.d.ts +1 -1
- package/dist/types/tools/search/search.d.ts +1 -1
- package/dist/types/tools/search/test.d.ts +1 -0
- package/dist/types/tools/search/tool.d.ts +12 -4
- package/dist/types/tools/search/types.d.ts +380 -46
- package/dist/types/tools/search/utils.d.ts +3 -0
- package/package.json +2 -1
- package/src/scripts/search.ts +5 -3
- package/src/tools/search/content.test.ts +173 -0
- package/src/tools/search/content.ts +147 -0
- package/src/tools/search/firecrawl.ts +27 -144
- package/src/tools/search/format.ts +89 -31
- package/src/tools/search/highlights.ts +99 -17
- package/src/tools/search/output.md +2775 -0
- package/src/tools/search/search.ts +42 -54
- package/src/tools/search/test.html +884 -0
- package/src/tools/search/test.md +643 -0
- package/src/tools/search/test.ts +159 -0
- package/src/tools/search/tool.ts +54 -15
- package/src/tools/search/types.ts +430 -52
- package/src/tools/search/utils.ts +43 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"highlights.cjs","sources":["../../../../src/tools/search/highlights.ts"],"sourcesContent":["import type * as t from './types';\n\n// 2. Pre-compile all regular expressions (only do this once)\n// Group patterns by priority for early returns\nconst priorityPatterns = [\n // High priority patterns (structural)\n [\n { regex: /\\n\\n/g }, // Double newline (paragraph break)\n { regex: /\\n/g }, // Single newline\n { regex: /={3,}\\s*\\n|-{3,}\\s*\\n/g }, // Section separators\n ],\n // Medium priority (semantic)\n [\n { regex: /[.!?][\")\\]]?\\s/g }, // End of sentence\n { regex: /;\\s/g }, // Semicolon\n { regex: /:\\s/g }, // Colon\n ],\n // Low priority (any breaks)\n [\n { regex: /,\\s/g }, // Comma\n { regex: /\\s-\\s/g }, // Dash surrounded by spaces\n { regex: /\\s/g }, // Any space\n ],\n];\n\nfunction findFirstMatch(text: string, regex: RegExp): number {\n // Reset regex\n regex.lastIndex = 0;\n\n // For very long texts, try chunking\n if (text.length > 10000) {\n const chunkSize = 2000;\n let position = 0;\n\n while (position < text.length) {\n const chunk = text.substring(position, position + chunkSize);\n regex.lastIndex = 0;\n\n const match = regex.exec(chunk);\n if (match) {\n return position + match.index;\n }\n\n // Move to next chunk with some overlap\n position += chunkSize - 100;\n if (position >= text.length) break;\n }\n return -1;\n }\n\n // For shorter texts, normal regex search\n const match = regex.exec(text);\n return match ? match.index : -1;\n}\n\n// 3. Optimized boundary finding functions\nfunction findLastMatch(text: string, regex: RegExp): number {\n // Reset regex state\n regex.lastIndex = 0;\n\n let lastIndex = -1;\n let lastLength = 0;\n let match;\n\n // For very long texts, use a different approach to avoid regex engine slowdowns\n if (text.length > 10000) {\n // Try dividing the text into chunks for faster processing\n const chunkSize = 2000;\n let startPosition = Math.max(0, text.length - chunkSize);\n\n while (startPosition >= 0) {\n const chunk = text.substring(startPosition, startPosition + chunkSize);\n regex.lastIndex = 0;\n\n let chunkLastIndex = -1;\n let chunkLastLength = 0;\n\n while ((match = regex.exec(chunk)) !== null) {\n chunkLastIndex = match.index;\n chunkLastLength = match[0].length;\n }\n\n if (chunkLastIndex !== -1) {\n return startPosition + chunkLastIndex + chunkLastLength;\n }\n\n // Move to previous chunk with some overlap\n startPosition = Math.max(0, startPosition - chunkSize + 100) - 1;\n if (startPosition <= 0) break;\n }\n return -1;\n }\n\n // For shorter texts, normal regex search\n while ((match = regex.exec(text)) !== null) {\n lastIndex = match.index;\n lastLength = match[0].length;\n }\n\n return lastIndex === -1 ? -1 : lastIndex + lastLength;\n}\n\n// 4. Find the best boundary with priority groups\nfunction findBestBoundary(text: string, direction = 'backward'): number {\n if (!text || text.length === 0) return 0;\n\n // Try each priority group\n for (const patternGroup of priorityPatterns) {\n for (const pattern of patternGroup) {\n const position =\n direction === 'backward'\n ? findLastMatch(text, pattern.regex)\n : findFirstMatch(text, pattern.regex);\n\n if (position !== -1) {\n return position;\n }\n }\n }\n\n // No match found, use character boundary\n return direction === 'backward' ? text.length : 0;\n}\n\n/**\n * Expand highlights in search results using smart boundary detection.\n *\n * This implementation finds natural text boundaries like paragraphs, sentences,\n * and phrases to provide context while maintaining readability.\n *\n * @param searchResults - Search results object\n * @param mainExpandBy - Primary expansion size on each side (default: 300)\n * @param separatorExpandBy - Additional range to look for separators (default: 150)\n * @returns Copy of search results with expanded highlights\n */\nexport function expandHighlights(\n searchResults: t.SearchResultData,\n mainExpandBy = 300,\n separatorExpandBy = 150\n): t.SearchResultData {\n // 1. Avoid full deep copy - only copy what we modify\n const resultCopy = { ...searchResults };\n\n // Only deep copy the relevant arrays\n if (resultCopy.organic) {\n resultCopy.organic = [...resultCopy.organic];\n }\n if (resultCopy.topStories) {\n resultCopy.topStories = [...resultCopy.topStories];\n }\n\n // 5. Process the results efficiently\n const processResultTypes = ['organic', 'topStories'] as const;\n\n for (const resultType of processResultTypes) {\n if (!resultCopy[resultType as 'organic' | 'topStories']) continue;\n\n // Map results to new array with modified highlights\n resultCopy[resultType] = resultCopy[resultType]?.map((result) => {\n if (\n result.content == null ||\n result.content === '' ||\n !result.highlights ||\n result.highlights.length === 0\n ) {\n return result; // No modification needed\n }\n\n // Create a shallow copy with expanded highlights\n const resultCopy = { ...result };\n const content = result.content;\n const highlights = [];\n\n // Process each highlight\n for (const highlight of result.highlights) {\n const highlightText = highlight.text;\n\n let startPos = content.indexOf(highlightText);\n let highlightLen = highlightText.length;\n\n if (startPos === -1) {\n // Try with stripped whitespace\n const strippedHighlight = highlightText.trim();\n startPos = content.indexOf(strippedHighlight);\n\n if (startPos === -1) {\n highlights.push({\n text: highlight.text,\n score: highlight.score,\n });\n continue;\n }\n highlightLen = strippedHighlight.length;\n }\n\n // Calculate boundaries\n const mainStart = Math.max(0, startPos - mainExpandBy);\n const mainEnd = Math.min(\n content.length,\n startPos + highlightLen + mainExpandBy\n );\n\n const separatorStart = Math.max(0, mainStart - separatorExpandBy);\n const separatorEnd = Math.min(\n content.length,\n mainEnd + separatorExpandBy\n );\n\n // Extract text segments\n const headText = content.substring(separatorStart, mainStart);\n const tailText = content.substring(mainEnd, separatorEnd);\n\n // Find natural boundaries\n const bestHeadBoundary = findBestBoundary(headText, 'backward');\n const bestTailBoundary = findBestBoundary(tailText, 'forward');\n\n // Calculate final positions\n const finalStart = separatorStart + bestHeadBoundary;\n const finalEnd = mainEnd + bestTailBoundary;\n\n // Extract the expanded highlight\n const expandedHighlightText = content\n .substring(finalStart, finalEnd)\n .trim();\n highlights.push({\n text: expandedHighlightText,\n score: highlight.score,\n });\n }\n\n delete resultCopy.content;\n resultCopy.highlights = highlights;\n return resultCopy;\n });\n }\n\n return resultCopy;\n}\n"],"names":[],"mappings":";;AAEA;AACA;AACA,MAAM,gBAAgB,GAAG;;AAEvB,IAAA;AACE,QAAA,EAAE,KAAK,EAAE,OAAO,EAAE;AAClB,QAAA,EAAE,KAAK,EAAE,KAAK,EAAE;AAChB,QAAA,EAAE,KAAK,EAAE,wBAAwB,EAAE;AACpC,KAAA;;AAED,IAAA;AACE,QAAA,EAAE,KAAK,EAAE,iBAAiB,EAAE;AAC5B,QAAA,EAAE,KAAK,EAAE,MAAM,EAAE;AACjB,QAAA,EAAE,KAAK,EAAE,MAAM,EAAE;AAClB,KAAA;;AAED,IAAA;AACE,QAAA,EAAE,KAAK,EAAE,MAAM,EAAE;AACjB,QAAA,EAAE,KAAK,EAAE,QAAQ,EAAE;AACnB,QAAA,EAAE,KAAK,EAAE,KAAK,EAAE;AACjB,KAAA;CACF;AAED,SAAS,cAAc,CAAC,IAAY,EAAE,KAAa,EAAA;;AAEjD,IAAA,KAAK,CAAC,SAAS,GAAG,CAAC;;AAGnB,IAAA,IAAI,IAAI,CAAC,MAAM,GAAG,KAAK,EAAE;QACvB,MAAM,SAAS,GAAG,IAAI;QACtB,IAAI,QAAQ,GAAG,CAAC;AAEhB,QAAA,OAAO,QAAQ,GAAG,IAAI,CAAC,MAAM,EAAE;AAC7B,YAAA,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,QAAQ,GAAG,SAAS,CAAC;AAC5D,YAAA,KAAK,CAAC,SAAS,GAAG,CAAC;YAEnB,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC;YAC/B,IAAI,KAAK,EAAE;AACT,gBAAA,OAAO,QAAQ,GAAG,KAAK,CAAC,KAAK;;;AAI/B,YAAA,QAAQ,IAAI,SAAS,GAAG,GAAG;AAC3B,YAAA,IAAI,QAAQ,IAAI,IAAI,CAAC,MAAM;gBAAE;;QAE/B,OAAO,EAAE;;;IAIX,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;AAC9B,IAAA,OAAO,KAAK,GAAG,KAAK,CAAC,KAAK,GAAG,EAAE;AACjC;AAEA;AACA,SAAS,aAAa,CAAC,IAAY,EAAE,KAAa,EAAA;;AAEhD,IAAA,KAAK,CAAC,SAAS,GAAG,CAAC;AAEnB,IAAA,IAAI,SAAS,GAAG,EAAE;IAClB,IAAI,UAAU,GAAG,CAAC;AAClB,IAAA,IAAI,KAAK;;AAGT,IAAA,IAAI,IAAI,CAAC,MAAM,GAAG,KAAK,EAAE;;QAEvB,MAAM,SAAS,GAAG,IAAI;AACtB,QAAA,IAAI,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,GAAG,SAAS,CAAC;AAExD,QAAA,OAAO,aAAa,IAAI,CAAC,EAAE;AACzB,YAAA,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,aAAa,EAAE,aAAa,GAAG,SAAS,CAAC;AACtE,YAAA,KAAK,CAAC,SAAS,GAAG,CAAC;AAEnB,YAAA,IAAI,cAAc,GAAG,EAAE;YACvB,IAAI,eAAe,GAAG,CAAC;AAEvB,YAAA,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,IAAI,EAAE;AAC3C,gBAAA,cAAc,GAAG,KAAK,CAAC,KAAK;AAC5B,gBAAA,eAAe,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM;;AAGnC,YAAA,IAAI,cAAc,KAAK,EAAE,EAAE;AACzB,gBAAA,OAAO,aAAa,GAAG,cAAc,GAAG,eAAe;;;AAIzD,YAAA,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,aAAa,GAAG,SAAS,GAAG,GAAG,CAAC,GAAG,CAAC;YAChE,IAAI,aAAa,IAAI,CAAC;gBAAE;;QAE1B,OAAO,EAAE;;;AAIX,IAAA,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,IAAI,EAAE;AAC1C,QAAA,SAAS,GAAG,KAAK,CAAC,KAAK;AACvB,QAAA,UAAU,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM;;AAG9B,IAAA,OAAO,SAAS,KAAK,EAAE,GAAG,EAAE,GAAG,SAAS,GAAG,UAAU;AACvD;AAEA;AACA,SAAS,gBAAgB,CAAC,IAAY,EAAE,SAAS,GAAG,UAAU,EAAA;AAC5D,IAAA,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;AAAE,QAAA,OAAO,CAAC;;AAGxC,IAAA,KAAK,MAAM,YAAY,IAAI,gBAAgB,EAAE;AAC3C,QAAA,KAAK,MAAM,OAAO,IAAI,YAAY,EAAE;AAClC,YAAA,MAAM,QAAQ,GACZ,SAAS,KAAK;kBACV,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,KAAK;kBACjC,cAAc,CAAC,IAAI,EAAE,OAAO,CAAC,KAAK,CAAC;AAEzC,YAAA,IAAI,QAAQ,KAAK,EAAE,EAAE;AACnB,gBAAA,OAAO,QAAQ;;;;;AAMrB,IAAA,OAAO,SAAS,KAAK,UAAU,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC;AACnD;AAEA;;;;;;;;;;AAUG;AACG,SAAU,gBAAgB,CAC9B,aAAiC,EACjC,YAAY,GAAG,GAAG,EAClB,iBAAiB,GAAG,GAAG,EAAA;;AAGvB,IAAA,MAAM,UAAU,GAAG,EAAE,GAAG,aAAa,EAAE;;AAGvC,IAAA,IAAI,UAAU,CAAC,OAAO,EAAE;QACtB,UAAU,CAAC,OAAO,GAAG,CAAC,GAAG,UAAU,CAAC,OAAO,CAAC;;AAE9C,IAAA,IAAI,UAAU,CAAC,UAAU,EAAE;QACzB,UAAU,CAAC,UAAU,GAAG,CAAC,GAAG,UAAU,CAAC,UAAU,CAAC;;;AAIpD,IAAA,MAAM,kBAAkB,GAAG,CAAC,SAAS,EAAE,YAAY,CAAU;AAE7D,IAAA,KAAK,MAAM,UAAU,IAAI,kBAAkB,EAAE;AAC3C,QAAA,IAAI,CAAC,UAAU,CAAC,UAAsC,CAAC;YAAE;;AAGzD,QAAA,UAAU,CAAC,UAAU,CAAC,GAAG,UAAU,CAAC,UAAU,CAAC,EAAE,GAAG,CAAC,CAAC,MAAM,KAAI;AAC9D,YAAA,IACE,MAAM,CAAC,OAAO,IAAI,IAAI;gBACtB,MAAM,CAAC,OAAO,KAAK,EAAE;gBACrB,CAAC,MAAM,CAAC,UAAU;AAClB,gBAAA,MAAM,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,EAC9B;gBACA,OAAO,MAAM,CAAC;;;AAIhB,YAAA,MAAM,UAAU,GAAG,EAAE,GAAG,MAAM,EAAE;AAChC,YAAA,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO;YAC9B,MAAM,UAAU,GAAG,EAAE;;AAGrB,YAAA,KAAK,MAAM,SAAS,IAAI,MAAM,CAAC,UAAU,EAAE;AACzC,gBAAA,MAAM,aAAa,GAAG,SAAS,CAAC,IAAI;gBAEpC,IAAI,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,aAAa,CAAC;AAC7C,gBAAA,IAAI,YAAY,GAAG,aAAa,CAAC,MAAM;AAEvC,gBAAA,IAAI,QAAQ,KAAK,EAAE,EAAE;;AAEnB,oBAAA,MAAM,iBAAiB,GAAG,aAAa,CAAC,IAAI,EAAE;AAC9C,oBAAA,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,iBAAiB,CAAC;AAE7C,oBAAA,IAAI,QAAQ,KAAK,EAAE,EAAE;wBACnB,UAAU,CAAC,IAAI,CAAC;4BACd,IAAI,EAAE,SAAS,CAAC,IAAI;4BACpB,KAAK,EAAE,SAAS,CAAC,KAAK;AACvB,yBAAA,CAAC;wBACF;;AAEF,oBAAA,YAAY,GAAG,iBAAiB,CAAC,MAAM;;;AAIzC,gBAAA,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,GAAG,YAAY,CAAC;AACtD,gBAAA,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CACtB,OAAO,CAAC,MAAM,EACd,QAAQ,GAAG,YAAY,GAAG,YAAY,CACvC;AAED,gBAAA,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,GAAG,iBAAiB,CAAC;AACjE,gBAAA,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAC3B,OAAO,CAAC,MAAM,EACd,OAAO,GAAG,iBAAiB,CAC5B;;gBAGD,MAAM,QAAQ,GAAG,OAAO,CAAC,SAAS,CAAC,cAAc,EAAE,SAAS,CAAC;gBAC7D,MAAM,QAAQ,GAAG,OAAO,CAAC,SAAS,CAAC,OAAO,EAAE,YAAY,CAAC;;gBAGzD,MAAM,gBAAgB,GAAG,gBAAgB,CAAC,QAAQ,EAAE,UAAU,CAAC;gBAC/D,MAAM,gBAAgB,GAAG,gBAAgB,CAAC,QAAQ,EAAE,SAAS,CAAC;;AAG9D,gBAAA,MAAM,UAAU,GAAG,cAAc,GAAG,gBAAgB;AACpD,gBAAA,MAAM,QAAQ,GAAG,OAAO,GAAG,gBAAgB;;gBAG3C,MAAM,qBAAqB,GAAG;AAC3B,qBAAA,SAAS,CAAC,UAAU,EAAE,QAAQ;AAC9B,qBAAA,IAAI,EAAE;gBACT,UAAU,CAAC,IAAI,CAAC;AACd,oBAAA,IAAI,EAAE,qBAAqB;oBAC3B,KAAK,EAAE,SAAS,CAAC,KAAK;AACvB,iBAAA,CAAC;;YAGJ,OAAO,UAAU,CAAC,OAAO;AACzB,YAAA,UAAU,CAAC,UAAU,GAAG,UAAU;AAClC,YAAA,OAAO,UAAU;AACnB,SAAC,CAAC;;AAGJ,IAAA,OAAO,UAAU;AACnB;;;;"}
|
|
1
|
+
{"version":3,"file":"highlights.cjs","sources":["../../../../src/tools/search/highlights.ts"],"sourcesContent":["import type * as t from './types';\n\n// 2. Pre-compile all regular expressions (only do this once)\n// Group patterns by priority for early returns\nconst priorityPatterns = [\n // High priority patterns (structural)\n [\n { regex: /\\n\\n/g }, // Double newline (paragraph break)\n { regex: /\\n/g }, // Single newline\n { regex: /={3,}\\s*\\n|-{3,}\\s*\\n/g }, // Section separators\n ],\n // Medium priority (semantic)\n [\n { regex: /[.!?][\")\\]]?\\s/g }, // End of sentence\n { regex: /;\\s/g }, // Semicolon\n { regex: /:\\s/g }, // Colon\n ],\n // Low priority (any breaks)\n [\n { regex: /,\\s/g }, // Comma\n { regex: /\\s-\\s/g }, // Dash surrounded by spaces\n { regex: /\\s/g }, // Any space\n ],\n];\n\nfunction findFirstMatch(text: string, regex: RegExp): number {\n // Reset regex\n regex.lastIndex = 0;\n\n // For very long texts, try chunking\n if (text.length > 10000) {\n const chunkSize = 2000;\n let position = 0;\n\n while (position < text.length) {\n const chunk = text.substring(position, position + chunkSize);\n regex.lastIndex = 0;\n\n const match = regex.exec(chunk);\n if (match) {\n return position + match.index;\n }\n\n // Move to next chunk with some overlap\n position += chunkSize - 100;\n if (position >= text.length) break;\n }\n return -1;\n }\n\n // For shorter texts, normal regex search\n const match = regex.exec(text);\n return match ? match.index : -1;\n}\n\n// 3. Optimized boundary finding functions\nfunction findLastMatch(text: string, regex: RegExp): number {\n // Reset regex state\n regex.lastIndex = 0;\n\n let lastIndex = -1;\n let lastLength = 0;\n let match;\n\n // For very long texts, use a different approach to avoid regex engine slowdowns\n if (text.length > 10000) {\n // Try dividing the text into chunks for faster processing\n const chunkSize = 2000;\n let startPosition = Math.max(0, text.length - chunkSize);\n\n while (startPosition >= 0) {\n const chunk = text.substring(startPosition, startPosition + chunkSize);\n regex.lastIndex = 0;\n\n let chunkLastIndex = -1;\n let chunkLastLength = 0;\n\n while ((match = regex.exec(chunk)) !== null) {\n chunkLastIndex = match.index;\n chunkLastLength = match[0].length;\n }\n\n if (chunkLastIndex !== -1) {\n return startPosition + chunkLastIndex + chunkLastLength;\n }\n\n // Move to previous chunk with some overlap\n startPosition = Math.max(0, startPosition - chunkSize + 100) - 1;\n if (startPosition <= 0) break;\n }\n return -1;\n }\n\n // For shorter texts, normal regex search\n while ((match = regex.exec(text)) !== null) {\n lastIndex = match.index;\n lastLength = match[0].length;\n }\n\n return lastIndex === -1 ? -1 : lastIndex + lastLength;\n}\n\n// 4. Find the best boundary with priority groups\nfunction findBestBoundary(text: string, direction = 'backward'): number {\n if (!text || text.length === 0) return 0;\n\n // Try each priority group\n for (const patternGroup of priorityPatterns) {\n for (const pattern of patternGroup) {\n const position =\n direction === 'backward'\n ? findLastMatch(text, pattern.regex)\n : findFirstMatch(text, pattern.regex);\n\n if (position !== -1) {\n return position;\n }\n }\n }\n\n // No match found, use character boundary\n return direction === 'backward' ? text.length : 0;\n}\n\n/**\n * Tracks references used in a highlight without changing their numbers\n */\nfunction trackReferencesInHighlight(\n text: string,\n sourceResult: t.ValidSource // Source containing the original references\n): {\n references: {\n type: 'link' | 'image' | 'video';\n originalIndex: number;\n reference: t.MediaReference; // Original reference object\n }[];\n} {\n // Track used references\n const references: {\n type: 'link' | 'image' | 'video';\n originalIndex: number;\n reference: t.MediaReference;\n }[] = [];\n\n if (!text || text.length === 0 || !text.includes('#')) {\n return { references }; // Early return\n }\n\n // Quick check for reference markers\n if (\n !text.includes('link#') &&\n !text.includes('image#') &&\n !text.includes('video#')\n ) {\n return { references };\n }\n\n // Get references from the source if available\n const sourceRefs = sourceResult.references || {\n links: [],\n images: [],\n videos: [],\n };\n\n // Find references but don't modify text\n const refRegex = /\\((link|image|video)#(\\d+)(?:\\s+\"([^\"]*)\")?\\)/g;\n let match;\n\n while ((match = refRegex.exec(text)) !== null) {\n const [, type, indexStr] = match;\n const originalIndex = parseInt(indexStr, 10) - 1; // Convert to 0-based\n\n // Get the source array for this type\n const refType = type as 'link' | 'image' | 'video';\n const sourceArray = sourceRefs[`${refType}s`] as\n | t.MediaReference[]\n | undefined;\n\n // Skip if invalid reference\n if (\n !sourceArray ||\n originalIndex < 0 ||\n originalIndex >= sourceArray.length\n ) {\n continue; // Skip invalid references\n }\n\n // Get original reference\n const reference = sourceArray[originalIndex];\n\n // Track if not already tracked\n const alreadyTracked = references.some(\n (ref) => ref.type === refType && ref.originalIndex === originalIndex\n );\n\n if (!alreadyTracked) {\n references.push({\n type: refType,\n originalIndex,\n reference,\n });\n }\n }\n\n return { references };\n}\n\n/**\n * Expand highlights in search results using smart boundary detection.\n *\n * This implementation finds natural text boundaries like paragraphs, sentences,\n * and phrases to provide context while maintaining readability.\n *\n * @param searchResults - Search results object\n * @param mainExpandBy - Primary expansion size on each side (default: 300)\n * @param separatorExpandBy - Additional range to look for separators (default: 150)\n * @returns Copy of search results with expanded highlights and tracked references\n */\nexport function expandHighlights(\n searchResults: t.SearchResultData,\n mainExpandBy = 300,\n separatorExpandBy = 150\n): t.SearchResultData {\n // Avoid deep copy - only copy what we modify\n const resultCopy = { ...searchResults };\n if (resultCopy.organic) resultCopy.organic = [...resultCopy.organic];\n if (resultCopy.topStories) resultCopy.topStories = [...resultCopy.topStories];\n\n // Process the results efficiently\n const processResultTypes = ['organic', 'topStories'] as const;\n\n for (const resultType of processResultTypes) {\n if (!resultCopy[resultType as 'organic' | 'topStories']) continue;\n\n // Map results to new array with modified highlights\n resultCopy[resultType] = resultCopy[resultType]?.map((result) => {\n if (\n result.content == null ||\n result.content === '' ||\n !result.highlights ||\n result.highlights.length === 0\n ) {\n return result; // No modification needed\n }\n\n // Create a shallow copy with expanded highlights\n const resultCopy = { ...result };\n const content = result.content;\n const highlights = [];\n // Process each highlight\n for (const highlight of result.highlights) {\n const { references } = trackReferencesInHighlight(\n highlight.text,\n result\n );\n\n let startPos = content.indexOf(highlight.text);\n let highlightLen = highlight.text.length;\n\n if (startPos === -1) {\n // Try with stripped whitespace\n const strippedHighlight = highlight.text.trim();\n startPos = content.indexOf(strippedHighlight);\n\n if (startPos === -1) {\n highlights.push({\n text: highlight.text,\n score: highlight.score,\n references,\n });\n continue;\n }\n highlightLen = strippedHighlight.length;\n }\n\n // Calculate boundaries\n const mainStart = Math.max(0, startPos - mainExpandBy);\n const mainEnd = Math.min(\n content.length,\n startPos + highlightLen + mainExpandBy\n );\n\n const separatorStart = Math.max(0, mainStart - separatorExpandBy);\n const separatorEnd = Math.min(\n content.length,\n mainEnd + separatorExpandBy\n );\n\n // Extract text segments\n const headText = content.substring(separatorStart, mainStart);\n const tailText = content.substring(mainEnd, separatorEnd);\n\n // Find natural boundaries\n const bestHeadBoundary = findBestBoundary(headText, 'backward');\n const bestTailBoundary = findBestBoundary(tailText, 'forward');\n\n // Calculate final positions\n const finalStart = separatorStart + bestHeadBoundary;\n const finalEnd = mainEnd + bestTailBoundary;\n\n // Extract the expanded highlight\n const expandedHighlightText = content\n .substring(finalStart, finalEnd)\n .trim();\n highlights.push({\n text: expandedHighlightText,\n score: highlight.score,\n references,\n });\n }\n\n resultCopy.highlights = highlights;\n delete resultCopy.content;\n delete resultCopy.references;\n return resultCopy;\n });\n }\n\n return resultCopy;\n}\n"],"names":[],"mappings":";;AAEA;AACA;AACA,MAAM,gBAAgB,GAAG;;AAEvB,IAAA;AACE,QAAA,EAAE,KAAK,EAAE,OAAO,EAAE;AAClB,QAAA,EAAE,KAAK,EAAE,KAAK,EAAE;AAChB,QAAA,EAAE,KAAK,EAAE,wBAAwB,EAAE;AACpC,KAAA;;AAED,IAAA;AACE,QAAA,EAAE,KAAK,EAAE,iBAAiB,EAAE;AAC5B,QAAA,EAAE,KAAK,EAAE,MAAM,EAAE;AACjB,QAAA,EAAE,KAAK,EAAE,MAAM,EAAE;AAClB,KAAA;;AAED,IAAA;AACE,QAAA,EAAE,KAAK,EAAE,MAAM,EAAE;AACjB,QAAA,EAAE,KAAK,EAAE,QAAQ,EAAE;AACnB,QAAA,EAAE,KAAK,EAAE,KAAK,EAAE;AACjB,KAAA;CACF;AAED,SAAS,cAAc,CAAC,IAAY,EAAE,KAAa,EAAA;;AAEjD,IAAA,KAAK,CAAC,SAAS,GAAG,CAAC;;AAGnB,IAAA,IAAI,IAAI,CAAC,MAAM,GAAG,KAAK,EAAE;QACvB,MAAM,SAAS,GAAG,IAAI;QACtB,IAAI,QAAQ,GAAG,CAAC;AAEhB,QAAA,OAAO,QAAQ,GAAG,IAAI,CAAC,MAAM,EAAE;AAC7B,YAAA,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,QAAQ,GAAG,SAAS,CAAC;AAC5D,YAAA,KAAK,CAAC,SAAS,GAAG,CAAC;YAEnB,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC;YAC/B,IAAI,KAAK,EAAE;AACT,gBAAA,OAAO,QAAQ,GAAG,KAAK,CAAC,KAAK;;;AAI/B,YAAA,QAAQ,IAAI,SAAS,GAAG,GAAG;AAC3B,YAAA,IAAI,QAAQ,IAAI,IAAI,CAAC,MAAM;gBAAE;;QAE/B,OAAO,EAAE;;;IAIX,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;AAC9B,IAAA,OAAO,KAAK,GAAG,KAAK,CAAC,KAAK,GAAG,EAAE;AACjC;AAEA;AACA,SAAS,aAAa,CAAC,IAAY,EAAE,KAAa,EAAA;;AAEhD,IAAA,KAAK,CAAC,SAAS,GAAG,CAAC;AAEnB,IAAA,IAAI,SAAS,GAAG,EAAE;IAClB,IAAI,UAAU,GAAG,CAAC;AAClB,IAAA,IAAI,KAAK;;AAGT,IAAA,IAAI,IAAI,CAAC,MAAM,GAAG,KAAK,EAAE;;QAEvB,MAAM,SAAS,GAAG,IAAI;AACtB,QAAA,IAAI,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,GAAG,SAAS,CAAC;AAExD,QAAA,OAAO,aAAa,IAAI,CAAC,EAAE;AACzB,YAAA,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,aAAa,EAAE,aAAa,GAAG,SAAS,CAAC;AACtE,YAAA,KAAK,CAAC,SAAS,GAAG,CAAC;AAEnB,YAAA,IAAI,cAAc,GAAG,EAAE;YACvB,IAAI,eAAe,GAAG,CAAC;AAEvB,YAAA,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,IAAI,EAAE;AAC3C,gBAAA,cAAc,GAAG,KAAK,CAAC,KAAK;AAC5B,gBAAA,eAAe,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM;;AAGnC,YAAA,IAAI,cAAc,KAAK,EAAE,EAAE;AACzB,gBAAA,OAAO,aAAa,GAAG,cAAc,GAAG,eAAe;;;AAIzD,YAAA,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,aAAa,GAAG,SAAS,GAAG,GAAG,CAAC,GAAG,CAAC;YAChE,IAAI,aAAa,IAAI,CAAC;gBAAE;;QAE1B,OAAO,EAAE;;;AAIX,IAAA,OAAO,CAAC,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,IAAI,EAAE;AAC1C,QAAA,SAAS,GAAG,KAAK,CAAC,KAAK;AACvB,QAAA,UAAU,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM;;AAG9B,IAAA,OAAO,SAAS,KAAK,EAAE,GAAG,EAAE,GAAG,SAAS,GAAG,UAAU;AACvD;AAEA;AACA,SAAS,gBAAgB,CAAC,IAAY,EAAE,SAAS,GAAG,UAAU,EAAA;AAC5D,IAAA,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;AAAE,QAAA,OAAO,CAAC;;AAGxC,IAAA,KAAK,MAAM,YAAY,IAAI,gBAAgB,EAAE;AAC3C,QAAA,KAAK,MAAM,OAAO,IAAI,YAAY,EAAE;AAClC,YAAA,MAAM,QAAQ,GACZ,SAAS,KAAK;kBACV,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,KAAK;kBACjC,cAAc,CAAC,IAAI,EAAE,OAAO,CAAC,KAAK,CAAC;AAEzC,YAAA,IAAI,QAAQ,KAAK,EAAE,EAAE;AACnB,gBAAA,OAAO,QAAQ;;;;;AAMrB,IAAA,OAAO,SAAS,KAAK,UAAU,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC;AACnD;AAEA;;AAEG;AACH,SAAS,0BAA0B,CACjC,IAAY,EACZ,YAA2B;;;IAS3B,MAAM,UAAU,GAIV,EAAE;AAER,IAAA,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE;AACrD,QAAA,OAAO,EAAE,UAAU,EAAE,CAAC;;;AAIxB,IAAA,IACE,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC;AACvB,QAAA,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;AACxB,QAAA,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EACxB;QACA,OAAO,EAAE,UAAU,EAAE;;;AAIvB,IAAA,MAAM,UAAU,GAAG,YAAY,CAAC,UAAU,IAAI;AAC5C,QAAA,KAAK,EAAE,EAAE;AACT,QAAA,MAAM,EAAE,EAAE;AACV,QAAA,MAAM,EAAE,EAAE;KACX;;IAGD,MAAM,QAAQ,GAAG,gDAAgD;AACjE,IAAA,IAAI,KAAK;AAET,IAAA,OAAO,CAAC,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,IAAI,EAAE;QAC7C,MAAM,GAAG,IAAI,EAAE,QAAQ,CAAC,GAAG,KAAK;AAChC,QAAA,MAAM,aAAa,GAAG,QAAQ,CAAC,QAAQ,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC;;QAGjD,MAAM,OAAO,GAAG,IAAkC;QAClD,MAAM,WAAW,GAAG,UAAU,CAAC,GAAG,OAAO,CAAA,CAAA,CAAG,CAE/B;;AAGb,QAAA,IACE,CAAC,WAAW;AACZ,YAAA,aAAa,GAAG,CAAC;AACjB,YAAA,aAAa,IAAI,WAAW,CAAC,MAAM,EACnC;AACA,YAAA,SAAS;;;AAIX,QAAA,MAAM,SAAS,GAAG,WAAW,CAAC,aAAa,CAAC;;QAG5C,MAAM,cAAc,GAAG,UAAU,CAAC,IAAI,CACpC,CAAC,GAAG,KAAK,GAAG,CAAC,IAAI,KAAK,OAAO,IAAI,GAAG,CAAC,aAAa,KAAK,aAAa,CACrE;QAED,IAAI,CAAC,cAAc,EAAE;YACnB,UAAU,CAAC,IAAI,CAAC;AACd,gBAAA,IAAI,EAAE,OAAO;gBACb,aAAa;gBACb,SAAS;AACV,aAAA,CAAC;;;IAIN,OAAO,EAAE,UAAU,EAAE;AACvB;AAEA;;;;;;;;;;AAUG;AACG,SAAU,gBAAgB,CAC9B,aAAiC,EACjC,YAAY,GAAG,GAAG,EAClB,iBAAiB,GAAG,GAAG,EAAA;;AAGvB,IAAA,MAAM,UAAU,GAAG,EAAE,GAAG,aAAa,EAAE;IACvC,IAAI,UAAU,CAAC,OAAO;QAAE,UAAU,CAAC,OAAO,GAAG,CAAC,GAAG,UAAU,CAAC,OAAO,CAAC;IACpE,IAAI,UAAU,CAAC,UAAU;QAAE,UAAU,CAAC,UAAU,GAAG,CAAC,GAAG,UAAU,CAAC,UAAU,CAAC;;AAG7E,IAAA,MAAM,kBAAkB,GAAG,CAAC,SAAS,EAAE,YAAY,CAAU;AAE7D,IAAA,KAAK,MAAM,UAAU,IAAI,kBAAkB,EAAE;AAC3C,QAAA,IAAI,CAAC,UAAU,CAAC,UAAsC,CAAC;YAAE;;AAGzD,QAAA,UAAU,CAAC,UAAU,CAAC,GAAG,UAAU,CAAC,UAAU,CAAC,EAAE,GAAG,CAAC,CAAC,MAAM,KAAI;AAC9D,YAAA,IACE,MAAM,CAAC,OAAO,IAAI,IAAI;gBACtB,MAAM,CAAC,OAAO,KAAK,EAAE;gBACrB,CAAC,MAAM,CAAC,UAAU;AAClB,gBAAA,MAAM,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,EAC9B;gBACA,OAAO,MAAM,CAAC;;;AAIhB,YAAA,MAAM,UAAU,GAAG,EAAE,GAAG,MAAM,EAAE;AAChC,YAAA,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO;YAC9B,MAAM,UAAU,GAAG,EAAE;;AAErB,YAAA,KAAK,MAAM,SAAS,IAAI,MAAM,CAAC,UAAU,EAAE;AACzC,gBAAA,MAAM,EAAE,UAAU,EAAE,GAAG,0BAA0B,CAC/C,SAAS,CAAC,IAAI,EACd,MAAM,CACP;gBAED,IAAI,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI,CAAC;AAC9C,gBAAA,IAAI,YAAY,GAAG,SAAS,CAAC,IAAI,CAAC,MAAM;AAExC,gBAAA,IAAI,QAAQ,KAAK,EAAE,EAAE;;oBAEnB,MAAM,iBAAiB,GAAG,SAAS,CAAC,IAAI,CAAC,IAAI,EAAE;AAC/C,oBAAA,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,iBAAiB,CAAC;AAE7C,oBAAA,IAAI,QAAQ,KAAK,EAAE,EAAE;wBACnB,UAAU,CAAC,IAAI,CAAC;4BACd,IAAI,EAAE,SAAS,CAAC,IAAI;4BACpB,KAAK,EAAE,SAAS,CAAC,KAAK;4BACtB,UAAU;AACX,yBAAA,CAAC;wBACF;;AAEF,oBAAA,YAAY,GAAG,iBAAiB,CAAC,MAAM;;;AAIzC,gBAAA,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,GAAG,YAAY,CAAC;AACtD,gBAAA,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CACtB,OAAO,CAAC,MAAM,EACd,QAAQ,GAAG,YAAY,GAAG,YAAY,CACvC;AAED,gBAAA,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,GAAG,iBAAiB,CAAC;AACjE,gBAAA,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAC3B,OAAO,CAAC,MAAM,EACd,OAAO,GAAG,iBAAiB,CAC5B;;gBAGD,MAAM,QAAQ,GAAG,OAAO,CAAC,SAAS,CAAC,cAAc,EAAE,SAAS,CAAC;gBAC7D,MAAM,QAAQ,GAAG,OAAO,CAAC,SAAS,CAAC,OAAO,EAAE,YAAY,CAAC;;gBAGzD,MAAM,gBAAgB,GAAG,gBAAgB,CAAC,QAAQ,EAAE,UAAU,CAAC;gBAC/D,MAAM,gBAAgB,GAAG,gBAAgB,CAAC,QAAQ,EAAE,SAAS,CAAC;;AAG9D,gBAAA,MAAM,UAAU,GAAG,cAAc,GAAG,gBAAgB;AACpD,gBAAA,MAAM,QAAQ,GAAG,OAAO,GAAG,gBAAgB;;gBAG3C,MAAM,qBAAqB,GAAG;AAC3B,qBAAA,SAAS,CAAC,UAAU,EAAE,QAAQ;AAC9B,qBAAA,IAAI,EAAE;gBACT,UAAU,CAAC,IAAI,CAAC;AACd,oBAAA,IAAI,EAAE,qBAAqB;oBAC3B,KAAK,EAAE,SAAS,CAAC,KAAK;oBACtB,UAAU;AACX,iBAAA,CAAC;;AAGJ,YAAA,UAAU,CAAC,UAAU,GAAG,UAAU;YAClC,OAAO,UAAU,CAAC,OAAO;YACzB,OAAO,UAAU,CAAC,UAAU;AAC5B,YAAA,OAAO,UAAU;AACnB,SAAC,CAAC;;AAGJ,IAAA,OAAO,UAAU;AACnB;;;;"}
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
var axios = require('axios');
|
|
4
4
|
var textsplitters = require('@langchain/textsplitters');
|
|
5
|
-
var
|
|
5
|
+
var utils = require('./utils.cjs');
|
|
6
6
|
|
|
7
7
|
/* eslint-disable no-console */
|
|
8
8
|
const chunker = {
|
|
@@ -79,23 +79,23 @@ const createSerperAPI = (apiKey) => {
|
|
|
79
79
|
const config = {
|
|
80
80
|
apiKey: apiKey ?? process.env.SERPER_API_KEY,
|
|
81
81
|
apiUrl: 'https://google.serper.dev/search',
|
|
82
|
-
defaultLocation: 'us',
|
|
83
82
|
timeout: 10000,
|
|
84
83
|
};
|
|
85
84
|
if (config.apiKey == null || config.apiKey === '') {
|
|
86
85
|
throw new Error('SERPER_API_KEY is required for SerperAPI');
|
|
87
86
|
}
|
|
88
|
-
const getSources = async (query, numResults = 8,
|
|
87
|
+
const getSources = async ({ query, country, numResults = 8, }) => {
|
|
89
88
|
if (!query.trim()) {
|
|
90
89
|
return { success: false, error: 'Query cannot be empty' };
|
|
91
90
|
}
|
|
92
91
|
try {
|
|
93
|
-
const searchLocation = (storedLocation ?? config.defaultLocation).toLowerCase();
|
|
94
92
|
const payload = {
|
|
95
93
|
q: query,
|
|
96
94
|
num: Math.min(Math.max(1, numResults), 10),
|
|
97
|
-
gl: searchLocation,
|
|
98
95
|
};
|
|
96
|
+
if (country != null && country !== '') {
|
|
97
|
+
payload['gl'] = country.toLowerCase();
|
|
98
|
+
}
|
|
99
99
|
const response = await axios.post(config.apiUrl, payload, {
|
|
100
100
|
headers: {
|
|
101
101
|
'X-API-KEY': config.apiKey,
|
|
@@ -107,10 +107,10 @@ const createSerperAPI = (apiKey) => {
|
|
|
107
107
|
const results = {
|
|
108
108
|
organic: data.organic,
|
|
109
109
|
images: data.images ?? [],
|
|
110
|
-
topStories: data.topStories ?? [],
|
|
111
|
-
knowledgeGraph: data.knowledgeGraph,
|
|
112
110
|
answerBox: data.answerBox,
|
|
111
|
+
topStories: data.topStories ?? [],
|
|
113
112
|
peopleAlsoAsk: data.peopleAlsoAsk,
|
|
113
|
+
knowledgeGraph: data.knowledgeGraph,
|
|
114
114
|
relatedSearches: data.relatedSearches,
|
|
115
115
|
};
|
|
116
116
|
return { success: true, data: results };
|
|
@@ -131,7 +131,7 @@ const createSearXNGAPI = (instanceUrl, apiKey) => {
|
|
|
131
131
|
if (config.instanceUrl == null || config.instanceUrl === '') {
|
|
132
132
|
throw new Error('SEARXNG_INSTANCE_URL is required for SearXNG API');
|
|
133
133
|
}
|
|
134
|
-
const getSources = async (query, numResults = 8,
|
|
134
|
+
const getSources = async ({ query, numResults = 8, }) => {
|
|
135
135
|
if (!query.trim()) {
|
|
136
136
|
return { success: false, error: 'Query cannot be empty' };
|
|
137
137
|
}
|
|
@@ -153,11 +153,7 @@ const createSearXNGAPI = (instanceUrl, apiKey) => {
|
|
|
153
153
|
language: 'all',
|
|
154
154
|
safesearch: 0,
|
|
155
155
|
engines: 'google,bing,duckduckgo',
|
|
156
|
-
max_results: Math.min(Math.max(1, numResults), 20),
|
|
157
156
|
};
|
|
158
|
-
if (storedLocation != null && storedLocation !== 'all') {
|
|
159
|
-
params.language = storedLocation;
|
|
160
|
-
}
|
|
161
157
|
const headers = {
|
|
162
158
|
'Content-Type': 'application/json',
|
|
163
159
|
};
|
|
@@ -237,11 +233,12 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
237
233
|
const promise = firecrawlScraper
|
|
238
234
|
.scrapeUrl(currentLink, {})
|
|
239
235
|
.then(([url, response]) => {
|
|
240
|
-
const attribution =
|
|
236
|
+
const attribution = utils.getAttribution(url, response.data?.metadata);
|
|
241
237
|
if (response.success && response.data) {
|
|
242
|
-
const content = firecrawlScraper.extractContent(response);
|
|
238
|
+
const [content, references] = firecrawlScraper.extractContent(response);
|
|
243
239
|
return {
|
|
244
240
|
url,
|
|
241
|
+
references,
|
|
245
242
|
attribution,
|
|
246
243
|
content: chunker.cleanText(content),
|
|
247
244
|
};
|
|
@@ -304,10 +301,11 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
304
301
|
if (result.error === true) {
|
|
305
302
|
continue;
|
|
306
303
|
}
|
|
307
|
-
const { url, content, attribution, highlights } = result;
|
|
304
|
+
const { url, content, attribution, references, highlights } = result;
|
|
308
305
|
onContentScraped?.(url, {
|
|
309
306
|
content,
|
|
310
307
|
attribution,
|
|
308
|
+
references,
|
|
311
309
|
highlights,
|
|
312
310
|
});
|
|
313
311
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"search.cjs","sources":["../../../../src/tools/search/search.ts"],"sourcesContent":["/* eslint-disable no-console */\nimport axios from 'axios';\nimport { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';\nimport type * as t from './types';\nimport { getAttribution, FirecrawlScraper } from './firecrawl';\nimport { BaseReranker } from './rerankers';\n\nconst chunker = {\n cleanText: (text: string): string => {\n if (!text) return '';\n\n /** Normalized all line endings to '\\n' */\n const normalizedText = text.replace(/\\r\\n/g, '\\n').replace(/\\r/g, '\\n');\n\n /** Handle multiple backslashes followed by newlines\n * This replaces patterns like '\\\\\\\\\\\\n' with a single newline */\n const fixedBackslashes = normalizedText.replace(/\\\\+\\n/g, '\\n');\n\n /** Cleaned up consecutive newlines, tabs, and spaces around newlines */\n const cleanedNewlines = fixedBackslashes.replace(/[\\t ]*\\n[\\t \\n]*/g, '\\n');\n\n /** Cleaned up excessive spaces and tabs */\n const cleanedSpaces = cleanedNewlines.replace(/[ \\t]+/g, ' ');\n\n return cleanedSpaces.trim();\n },\n splitText: async (\n text: string,\n options?: {\n chunkSize?: number;\n chunkOverlap?: number;\n separators?: string[];\n }\n ): Promise<string[]> => {\n const chunkSize = options?.chunkSize ?? 150;\n const chunkOverlap = options?.chunkOverlap ?? 50;\n const separators = options?.separators || ['\\n\\n', '\\n'];\n\n const splitter = new RecursiveCharacterTextSplitter({\n separators,\n chunkSize,\n chunkOverlap,\n });\n\n return await splitter.splitText(text);\n },\n\n splitTexts: async (\n texts: string[],\n options?: {\n chunkSize?: number;\n chunkOverlap?: number;\n separators?: string[];\n }\n ): Promise<string[][]> => {\n // Split multiple texts\n const promises = texts.map((text) =>\n chunker.splitText(text, options).catch((error) => {\n console.error('Error splitting text:', error);\n return [text];\n })\n );\n return Promise.all(promises);\n },\n};\n\nconst createSourceUpdateCallback = (sourceMap: Map<string, t.ValidSource>) => {\n return (link: string, update?: Partial<t.ValidSource>): void => {\n const source = sourceMap.get(link);\n if (source) {\n sourceMap.set(link, {\n ...source,\n ...update,\n });\n }\n };\n};\n\nconst getHighlights = async ({\n query,\n content,\n reranker,\n topResults = 5,\n}: {\n content: string;\n query: string;\n reranker?: BaseReranker;\n topResults?: number;\n}): Promise<t.Highlight[] | undefined> => {\n if (!content) {\n console.warn('No content provided for highlights');\n return;\n }\n if (!reranker) {\n console.warn('No reranker provided for highlights');\n return;\n }\n\n try {\n const documents = await chunker.splitText(content);\n if (Array.isArray(documents)) {\n return await reranker.rerank(query, documents, topResults);\n } else {\n console.error(\n 'Expected documents to be an array, got:',\n typeof documents\n );\n return;\n }\n } catch (error) {\n console.error('Error in content processing:', error);\n return;\n }\n};\n\nconst createSerperAPI = (\n apiKey?: string\n): {\n getSources: (\n query: string,\n numResults?: number,\n storedLocation?: string\n ) => Promise<t.SearchResult>;\n} => {\n const config = {\n apiKey: apiKey ?? process.env.SERPER_API_KEY,\n apiUrl: 'https://google.serper.dev/search',\n defaultLocation: 'us',\n timeout: 10000,\n };\n\n if (config.apiKey == null || config.apiKey === '') {\n throw new Error('SERPER_API_KEY is required for SerperAPI');\n }\n\n const getSources = async (\n query: string,\n numResults: number = 8,\n storedLocation?: string\n ): Promise<t.SearchResult> => {\n if (!query.trim()) {\n return { success: false, error: 'Query cannot be empty' };\n }\n\n try {\n const searchLocation = (\n storedLocation ?? config.defaultLocation\n ).toLowerCase();\n\n const payload = {\n q: query,\n num: Math.min(Math.max(1, numResults), 10),\n gl: searchLocation,\n };\n\n const response = await axios.post(config.apiUrl, payload, {\n headers: {\n 'X-API-KEY': config.apiKey,\n 'Content-Type': 'application/json',\n },\n timeout: config.timeout,\n });\n\n const data = response.data;\n const results: t.SearchResultData = {\n organic: data.organic,\n images: data.images ?? [],\n topStories: data.topStories ?? [],\n knowledgeGraph: data.knowledgeGraph as t.KnowledgeGraphResult,\n answerBox: data.answerBox as t.AnswerBoxResult,\n peopleAlsoAsk: data.peopleAlsoAsk as t.PeopleAlsoAskResult[],\n relatedSearches: data.relatedSearches as string[],\n };\n\n return { success: true, data: results };\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return { success: false, error: `API request failed: ${errorMessage}` };\n }\n };\n\n return { getSources };\n};\n\nconst createSearXNGAPI = (\n instanceUrl?: string,\n apiKey?: string\n): {\n getSources: (\n query: string,\n numResults?: number,\n storedLocation?: string\n ) => Promise<t.SearchResult>;\n} => {\n const config = {\n instanceUrl: instanceUrl ?? process.env.SEARXNG_INSTANCE_URL,\n apiKey: apiKey ?? process.env.SEARXNG_API_KEY,\n defaultLocation: 'all',\n timeout: 10000,\n };\n\n if (config.instanceUrl == null || config.instanceUrl === '') {\n throw new Error('SEARXNG_INSTANCE_URL is required for SearXNG API');\n }\n\n const getSources = async (\n query: string,\n numResults: number = 8,\n storedLocation?: string\n ): Promise<t.SearchResult> => {\n if (!query.trim()) {\n return { success: false, error: 'Query cannot be empty' };\n }\n\n try {\n // Ensure the instance URL ends with /search\n if (config.instanceUrl == null || config.instanceUrl === '') {\n return { success: false, error: 'Instance URL is not defined' };\n }\n\n let searchUrl = config.instanceUrl;\n if (!searchUrl.endsWith('/search')) {\n searchUrl = searchUrl.replace(/\\/$/, '') + '/search';\n }\n\n // Prepare parameters for SearXNG\n const params: Record<string, string | number> = {\n q: query,\n format: 'json',\n pageno: 1,\n categories: 'general',\n language: 'all',\n safesearch: 0,\n engines: 'google,bing,duckduckgo',\n max_results: Math.min(Math.max(1, numResults), 20),\n };\n\n if (storedLocation != null && storedLocation !== 'all') {\n params.language = storedLocation;\n }\n\n const headers: Record<string, string> = {\n 'Content-Type': 'application/json',\n };\n\n if (config.apiKey != null && config.apiKey !== '') {\n headers['X-API-Key'] = config.apiKey;\n }\n\n const response = await axios.get(searchUrl, {\n headers,\n params,\n timeout: config.timeout,\n });\n\n const data = response.data;\n\n // Transform SearXNG results to match SerperAPI format\n const organicResults = (data.results ?? [])\n .slice(0, numResults)\n .map((result: t.SearXNGResult) => ({\n title: result.title ?? '',\n link: result.url ?? '',\n snippet: result.content ?? '',\n date: result.publishedDate ?? '',\n }));\n\n // Extract image results if available\n const imageResults = (data.results ?? [])\n .filter((result: t.SearXNGResult) => result.img_src)\n .slice(0, 6)\n .map((result: t.SearXNGResult) => ({\n title: result.title ?? '',\n imageUrl: result.img_src ?? '',\n }));\n\n // Format results to match SerperAPI structure\n const results: t.SearchResultData = {\n organic: organicResults,\n images: imageResults,\n topStories: [],\n // Use undefined instead of null for optional properties\n relatedSearches: data.suggestions ?? [],\n };\n\n return { success: true, data: results };\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return {\n success: false,\n error: `SearXNG API request failed: ${errorMessage}`,\n };\n }\n };\n\n return { getSources };\n};\n\nexport const createSearchAPI = (\n config: t.SearchConfig\n): {\n getSources: (\n query: string,\n numResults?: number,\n storedLocation?: string\n ) => Promise<t.SearchResult>;\n} => {\n const {\n searchProvider = 'serper',\n serperApiKey,\n searxngInstanceUrl,\n searxngApiKey,\n } = config;\n\n if (searchProvider.toLowerCase() === 'serper') {\n return createSerperAPI(serperApiKey);\n } else if (searchProvider.toLowerCase() === 'searxng') {\n return createSearXNGAPI(searxngInstanceUrl, searxngApiKey);\n } else {\n throw new Error(\n `Invalid search provider: ${searchProvider}. Must be 'serper' or 'searxng'`\n );\n }\n};\n\nexport const createSourceProcessor = (\n config: t.ProcessSourcesConfig = {},\n scraperInstance?: FirecrawlScraper\n): {\n processSources: (\n result: t.SearchResult,\n numElements: number,\n query: string,\n proMode?: boolean\n ) => Promise<t.SearchResultData>;\n topResults: number;\n} => {\n if (!scraperInstance) {\n throw new Error('Firecrawl scraper instance is required');\n }\n const {\n topResults = 5,\n // strategies = ['no_extraction'],\n // filterContent = true,\n reranker,\n } = config;\n\n const firecrawlScraper = scraperInstance;\n\n const webScraper = {\n scrapeMany: async ({\n query,\n links,\n }: {\n query: string;\n links: string[];\n }): Promise<Array<t.ScrapeResult>> => {\n console.log(`Scraping ${links.length} links with Firecrawl`);\n const promises: Array<Promise<t.ScrapeResult>> = [];\n try {\n for (const currentLink of links) {\n const promise: Promise<t.ScrapeResult> = firecrawlScraper\n .scrapeUrl(currentLink, {})\n .then(([url, response]) => {\n const attribution = getAttribution(url, response.data?.metadata);\n if (response.success && response.data) {\n const content = firecrawlScraper.extractContent(response);\n return {\n url,\n attribution,\n content: chunker.cleanText(content),\n };\n }\n\n return {\n url,\n attribution,\n error: true,\n content: `Failed to scrape ${url}: ${response.error ?? 'Unknown error'}`,\n };\n })\n .then(async (result) => {\n try {\n if (result.error != null) {\n console.error(\n `Error scraping ${result.url}: ${result.content}`\n );\n return {\n ...result,\n };\n }\n const highlights = await getHighlights({\n query,\n reranker,\n content: result.content,\n });\n return {\n ...result,\n highlights,\n };\n } catch (error) {\n console.error('Error processing scraped content:', error);\n return {\n ...result,\n };\n }\n })\n .catch((error) => {\n console.error(`Error scraping ${currentLink}:`, error);\n return {\n url: currentLink,\n error: true,\n content: `Failed to scrape ${currentLink}: ${error.message ?? 'Unknown error'}`,\n };\n });\n promises.push(promise);\n }\n return await Promise.all(promises);\n } catch (error) {\n console.error('Error in scrapeMany:', error);\n return [];\n }\n },\n };\n\n const fetchContents = async ({\n links,\n query,\n target,\n onContentScraped,\n }: {\n links: string[];\n query: string;\n target: number;\n onContentScraped?: (link: string, update?: Partial<t.ValidSource>) => void;\n }): Promise<void> => {\n const initialLinks = links.slice(0, target);\n // const remainingLinks = links.slice(target).reverse();\n const results = await webScraper.scrapeMany({ query, links: initialLinks });\n for (const result of results) {\n if (result.error === true) {\n continue;\n }\n const { url, content, attribution, highlights } = result;\n onContentScraped?.(url, {\n content,\n attribution,\n highlights,\n });\n }\n };\n\n const processSources = async (\n result: t.SearchResult,\n numElements: number,\n query: string,\n proMode: boolean = false\n ): Promise<t.SearchResultData> => {\n try {\n if (!result.data) {\n return {\n organic: [],\n topStories: [],\n images: [],\n relatedSearches: [],\n };\n } else if (!result.data.organic) {\n return result.data;\n }\n\n if (!proMode) {\n const wikiSources = result.data.organic.filter((source) =>\n source.link.includes('wikipedia.org')\n );\n\n if (!wikiSources.length) {\n return result.data;\n }\n\n const wikiSourceMap = new Map<string, t.ValidSource>();\n wikiSourceMap.set(wikiSources[0].link, wikiSources[0]);\n const onContentScraped = createSourceUpdateCallback(wikiSourceMap);\n await fetchContents({\n query,\n target: 1,\n onContentScraped,\n links: [wikiSources[0].link],\n });\n\n for (let i = 0; i < result.data.organic.length; i++) {\n const source = result.data.organic[i];\n const updatedSource = wikiSourceMap.get(source.link);\n if (updatedSource) {\n result.data.organic[i] = {\n ...source,\n ...updatedSource,\n };\n }\n }\n\n return result.data;\n }\n\n const sourceMap = new Map<string, t.ValidSource>();\n const allLinks: string[] = [];\n\n for (const source of result.data.organic) {\n if (source.link) {\n allLinks.push(source.link);\n sourceMap.set(source.link, source);\n }\n }\n\n if (allLinks.length === 0) {\n return result.data;\n }\n\n const onContentScraped = createSourceUpdateCallback(sourceMap);\n await fetchContents({\n links: allLinks,\n query,\n onContentScraped,\n target: numElements,\n });\n\n for (let i = 0; i < result.data.organic.length; i++) {\n const source = result.data.organic[i];\n const updatedSource = sourceMap.get(source.link);\n if (updatedSource) {\n result.data.organic[i] = {\n ...source,\n ...updatedSource,\n };\n }\n }\n\n const successfulSources = result.data.organic\n .filter(\n (source) =>\n source.content != null && !source.content.startsWith('Failed')\n )\n .slice(0, numElements);\n\n if (successfulSources.length > 0) {\n result.data.organic = successfulSources;\n }\n return result.data;\n } catch (error) {\n console.error('Error in processSources:', error);\n return {\n organic: [],\n topStories: [],\n images: [],\n relatedSearches: [],\n ...result.data,\n error: error instanceof Error ? error.message : String(error),\n };\n }\n };\n\n return {\n processSources,\n topResults,\n };\n};\n"],"names":["RecursiveCharacterTextSplitter","getAttribution"],"mappings":";;;;;;AAAA;AAOA,MAAM,OAAO,GAAG;AACd,IAAA,SAAS,EAAE,CAAC,IAAY,KAAY;AAClC,QAAA,IAAI,CAAC,IAAI;AAAE,YAAA,OAAO,EAAE;;AAGpB,QAAA,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC;AAEvE;AACiE;QACjE,MAAM,gBAAgB,GAAG,cAAc,CAAC,OAAO,CAAC,QAAQ,EAAE,IAAI,CAAC;;QAG/D,MAAM,eAAe,GAAG,gBAAgB,CAAC,OAAO,CAAC,mBAAmB,EAAE,IAAI,CAAC;;QAG3E,MAAM,aAAa,GAAG,eAAe,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;AAE7D,QAAA,OAAO,aAAa,CAAC,IAAI,EAAE;KAC5B;AACD,IAAA,SAAS,EAAE,OACT,IAAY,EACZ,OAIC,KACoB;AACrB,QAAA,MAAM,SAAS,GAAG,OAAO,EAAE,SAAS,IAAI,GAAG;AAC3C,QAAA,MAAM,YAAY,GAAG,OAAO,EAAE,YAAY,IAAI,EAAE;QAChD,MAAM,UAAU,GAAG,OAAO,EAAE,UAAU,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC;AAExD,QAAA,MAAM,QAAQ,GAAG,IAAIA,4CAA8B,CAAC;YAClD,UAAU;YACV,SAAS;YACT,YAAY;AACb,SAAA,CAAC;AAEF,QAAA,OAAO,MAAM,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC;KACtC;AAED,IAAA,UAAU,EAAE,OACV,KAAe,EACf,OAIC,KACsB;;QAEvB,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,KAC9B,OAAO,CAAC,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,KAAK,CAAC,CAAC,KAAK,KAAI;AAC/C,YAAA,OAAO,CAAC,KAAK,CAAC,uBAAuB,EAAE,KAAK,CAAC;YAC7C,OAAO,CAAC,IAAI,CAAC;SACd,CAAC,CACH;AACD,QAAA,OAAO,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC;KAC7B;CACF;AAED,MAAM,0BAA0B,GAAG,CAAC,SAAqC,KAAI;AAC3E,IAAA,OAAO,CAAC,IAAY,EAAE,MAA+B,KAAU;QAC7D,MAAM,MAAM,GAAG,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC;QAClC,IAAI,MAAM,EAAE;AACV,YAAA,SAAS,CAAC,GAAG,CAAC,IAAI,EAAE;AAClB,gBAAA,GAAG,MAAM;AACT,gBAAA,GAAG,MAAM;AACV,aAAA,CAAC;;AAEN,KAAC;AACH,CAAC;AAED,MAAM,aAAa,GAAG,OAAO,EAC3B,KAAK,EACL,OAAO,EACP,QAAQ,EACR,UAAU,GAAG,CAAC,GAMf,KAAwC;IACvC,IAAI,CAAC,OAAO,EAAE;AACZ,QAAA,OAAO,CAAC,IAAI,CAAC,oCAAoC,CAAC;QAClD;;IAEF,IAAI,CAAC,QAAQ,EAAE;AACb,QAAA,OAAO,CAAC,IAAI,CAAC,qCAAqC,CAAC;QACnD;;AAGF,IAAA,IAAI;QACF,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC;AAClD,QAAA,IAAI,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE;YAC5B,OAAO,MAAM,QAAQ,CAAC,MAAM,CAAC,KAAK,EAAE,SAAS,EAAE,UAAU,CAAC;;aACrD;YACL,OAAO,CAAC,KAAK,CACX,yCAAyC,EACzC,OAAO,SAAS,CACjB;YACD;;;IAEF,OAAO,KAAK,EAAE;AACd,QAAA,OAAO,CAAC,KAAK,CAAC,8BAA8B,EAAE,KAAK,CAAC;QACpD;;AAEJ,CAAC;AAED,MAAM,eAAe,GAAG,CACtB,MAAe,KAOb;AACF,IAAA,MAAM,MAAM,GAAG;AACb,QAAA,MAAM,EAAE,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc;AAC5C,QAAA,MAAM,EAAE,kCAAkC;AAC1C,QAAA,eAAe,EAAE,IAAI;AACrB,QAAA,OAAO,EAAE,KAAK;KACf;AAED,IAAA,IAAI,MAAM,CAAC,MAAM,IAAI,IAAI,IAAI,MAAM,CAAC,MAAM,KAAK,EAAE,EAAE;AACjD,QAAA,MAAM,IAAI,KAAK,CAAC,0CAA0C,CAAC;;IAG7D,MAAM,UAAU,GAAG,OACjB,KAAa,EACb,UAAA,GAAqB,CAAC,EACtB,cAAuB,KACI;AAC3B,QAAA,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,EAAE;YACjB,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,uBAAuB,EAAE;;AAG3D,QAAA,IAAI;AACF,YAAA,MAAM,cAAc,GAAG,CACrB,cAAc,IAAI,MAAM,CAAC,eAAe,EACxC,WAAW,EAAE;AAEf,YAAA,MAAM,OAAO,GAAG;AACd,gBAAA,CAAC,EAAE,KAAK;AACR,gBAAA,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,CAAC,EAAE,EAAE,CAAC;AAC1C,gBAAA,EAAE,EAAE,cAAc;aACnB;AAED,YAAA,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,EAAE;AACxD,gBAAA,OAAO,EAAE;oBACP,WAAW,EAAE,MAAM,CAAC,MAAM;AAC1B,oBAAA,cAAc,EAAE,kBAAkB;AACnC,iBAAA;gBACD,OAAO,EAAE,MAAM,CAAC,OAAO;AACxB,aAAA,CAAC;AAEF,YAAA,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI;AAC1B,YAAA,MAAM,OAAO,GAAuB;gBAClC,OAAO,EAAE,IAAI,CAAC,OAAO;AACrB,gBAAA,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,EAAE;AACzB,gBAAA,UAAU,EAAE,IAAI,CAAC,UAAU,IAAI,EAAE;gBACjC,cAAc,EAAE,IAAI,CAAC,cAAwC;gBAC7D,SAAS,EAAE,IAAI,CAAC,SAA8B;gBAC9C,aAAa,EAAE,IAAI,CAAC,aAAwC;gBAC5D,eAAe,EAAE,IAAI,CAAC,eAA2B;aAClD;YAED,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE;;QACvC,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACxD,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,CAAuB,oBAAA,EAAA,YAAY,CAAE,CAAA,EAAE;;AAE3E,KAAC;IAED,OAAO,EAAE,UAAU,EAAE;AACvB,CAAC;AAED,MAAM,gBAAgB,GAAG,CACvB,WAAoB,EACpB,MAAe,KAOb;AACF,IAAA,MAAM,MAAM,GAAG;AACb,QAAA,WAAW,EAAE,WAAW,IAAI,OAAO,CAAC,GAAG,CAAC,oBAAoB;AAC5D,QAAA,MAAM,EAAE,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,eAAe;AAC7C,QACA,OAAO,EAAE,KAAK;KACf;AAED,IAAA,IAAI,MAAM,CAAC,WAAW,IAAI,IAAI,IAAI,MAAM,CAAC,WAAW,KAAK,EAAE,EAAE;AAC3D,QAAA,MAAM,IAAI,KAAK,CAAC,kDAAkD,CAAC;;IAGrE,MAAM,UAAU,GAAG,OACjB,KAAa,EACb,UAAA,GAAqB,CAAC,EACtB,cAAuB,KACI;AAC3B,QAAA,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,EAAE;YACjB,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,uBAAuB,EAAE;;AAG3D,QAAA,IAAI;;AAEF,YAAA,IAAI,MAAM,CAAC,WAAW,IAAI,IAAI,IAAI,MAAM,CAAC,WAAW,KAAK,EAAE,EAAE;gBAC3D,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,6BAA6B,EAAE;;AAGjE,YAAA,IAAI,SAAS,GAAG,MAAM,CAAC,WAAW;YAClC,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE;gBAClC,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,GAAG,SAAS;;;AAItD,YAAA,MAAM,MAAM,GAAoC;AAC9C,gBAAA,CAAC,EAAE,KAAK;AACR,gBAAA,MAAM,EAAE,MAAM;AACd,gBAAA,MAAM,EAAE,CAAC;AACT,gBAAA,UAAU,EAAE,SAAS;AACrB,gBAAA,QAAQ,EAAE,KAAK;AACf,gBAAA,UAAU,EAAE,CAAC;AACb,gBAAA,OAAO,EAAE,wBAAwB;AACjC,gBAAA,WAAW,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,CAAC,EAAE,EAAE,CAAC;aACnD;YAED,IAAI,cAAc,IAAI,IAAI,IAAI,cAAc,KAAK,KAAK,EAAE;AACtD,gBAAA,MAAM,CAAC,QAAQ,GAAG,cAAc;;AAGlC,YAAA,MAAM,OAAO,GAA2B;AACtC,gBAAA,cAAc,EAAE,kBAAkB;aACnC;AAED,YAAA,IAAI,MAAM,CAAC,MAAM,IAAI,IAAI,IAAI,MAAM,CAAC,MAAM,KAAK,EAAE,EAAE;AACjD,gBAAA,OAAO,CAAC,WAAW,CAAC,GAAG,MAAM,CAAC,MAAM;;YAGtC,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,SAAS,EAAE;gBAC1C,OAAO;gBACP,MAAM;gBACN,OAAO,EAAE,MAAM,CAAC,OAAO;AACxB,aAAA,CAAC;AAEF,YAAA,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI;;YAG1B,MAAM,cAAc,GAAG,CAAC,IAAI,CAAC,OAAO,IAAI,EAAE;AACvC,iBAAA,KAAK,CAAC,CAAC,EAAE,UAAU;AACnB,iBAAA,GAAG,CAAC,CAAC,MAAuB,MAAM;AACjC,gBAAA,KAAK,EAAE,MAAM,CAAC,KAAK,IAAI,EAAE;AACzB,gBAAA,IAAI,EAAE,MAAM,CAAC,GAAG,IAAI,EAAE;AACtB,gBAAA,OAAO,EAAE,MAAM,CAAC,OAAO,IAAI,EAAE;AAC7B,gBAAA,IAAI,EAAE,MAAM,CAAC,aAAa,IAAI,EAAE;AACjC,aAAA,CAAC,CAAC;;YAGL,MAAM,YAAY,GAAG,CAAC,IAAI,CAAC,OAAO,IAAI,EAAE;iBACrC,MAAM,CAAC,CAAC,MAAuB,KAAK,MAAM,CAAC,OAAO;AAClD,iBAAA,KAAK,CAAC,CAAC,EAAE,CAAC;AACV,iBAAA,GAAG,CAAC,CAAC,MAAuB,MAAM;AACjC,gBAAA,KAAK,EAAE,MAAM,CAAC,KAAK,IAAI,EAAE;AACzB,gBAAA,QAAQ,EAAE,MAAM,CAAC,OAAO,IAAI,EAAE;AAC/B,aAAA,CAAC,CAAC;;AAGL,YAAA,MAAM,OAAO,GAAuB;AAClC,gBAAA,OAAO,EAAE,cAAc;AACvB,gBAAA,MAAM,EAAE,YAAY;AACpB,gBAAA,UAAU,EAAE,EAAE;;AAEd,gBAAA,eAAe,EAAE,IAAI,CAAC,WAAW,IAAI,EAAE;aACxC;YAED,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE;;QACvC,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACxD,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;gBACd,KAAK,EAAE,CAA+B,4BAAA,EAAA,YAAY,CAAE,CAAA;aACrD;;AAEL,KAAC;IAED,OAAO,EAAE,UAAU,EAAE;AACvB,CAAC;AAEY,MAAA,eAAe,GAAG,CAC7B,MAAsB,KAOpB;AACF,IAAA,MAAM,EACJ,cAAc,GAAG,QAAQ,EACzB,YAAY,EACZ,kBAAkB,EAClB,aAAa,GACd,GAAG,MAAM;AAEV,IAAA,IAAI,cAAc,CAAC,WAAW,EAAE,KAAK,QAAQ,EAAE;AAC7C,QAAA,OAAO,eAAe,CAAC,YAAY,CAAC;;AAC/B,SAAA,IAAI,cAAc,CAAC,WAAW,EAAE,KAAK,SAAS,EAAE;AACrD,QAAA,OAAO,gBAAgB,CAAC,kBAAkB,EAAE,aAAa,CAAC;;SACrD;AACL,QAAA,MAAM,IAAI,KAAK,CACb,4BAA4B,cAAc,CAAA,+BAAA,CAAiC,CAC5E;;AAEL;AAEa,MAAA,qBAAqB,GAAG,CACnC,SAAiC,EAAE,EACnC,eAAkC,KAShC;IACF,IAAI,CAAC,eAAe,EAAE;AACpB,QAAA,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC;;IAE3D,MAAM,EACJ,UAAU,GAAG,CAAC;;;IAGd,QAAQ,GACT,GAAG,MAAM;IAEV,MAAM,gBAAgB,GAAG,eAAe;AAExC,IAAA,MAAM,UAAU,GAAG;QACjB,UAAU,EAAE,OAAO,EACjB,KAAK,EACL,KAAK,GAIN,KAAoC;YACnC,OAAO,CAAC,GAAG,CAAC,CAAA,SAAA,EAAY,KAAK,CAAC,MAAM,CAAuB,qBAAA,CAAA,CAAC;YAC5D,MAAM,QAAQ,GAAmC,EAAE;AACnD,YAAA,IAAI;AACF,gBAAA,KAAK,MAAM,WAAW,IAAI,KAAK,EAAE;oBAC/B,MAAM,OAAO,GAA4B;AACtC,yBAAA,SAAS,CAAC,WAAW,EAAE,EAAE;yBACzB,IAAI,CAAC,CAAC,CAAC,GAAG,EAAE,QAAQ,CAAC,KAAI;AACxB,wBAAA,MAAM,WAAW,GAAGC,wBAAc,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC;wBAChE,IAAI,QAAQ,CAAC,OAAO,IAAI,QAAQ,CAAC,IAAI,EAAE;4BACrC,MAAM,OAAO,GAAG,gBAAgB,CAAC,cAAc,CAAC,QAAQ,CAAC;4BACzD,OAAO;gCACL,GAAG;gCACH,WAAW;AACX,gCAAA,OAAO,EAAE,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC;6BACpC;;wBAGH,OAAO;4BACL,GAAG;4BACH,WAAW;AACX,4BAAA,KAAK,EAAE,IAAI;4BACX,OAAO,EAAE,oBAAoB,GAAG,CAAA,EAAA,EAAK,QAAQ,CAAC,KAAK,IAAI,eAAe,CAAE,CAAA;yBACzE;AACH,qBAAC;AACA,yBAAA,IAAI,CAAC,OAAO,MAAM,KAAI;AACrB,wBAAA,IAAI;AACF,4BAAA,IAAI,MAAM,CAAC,KAAK,IAAI,IAAI,EAAE;AACxB,gCAAA,OAAO,CAAC,KAAK,CACX,CAAA,eAAA,EAAkB,MAAM,CAAC,GAAG,CAAA,EAAA,EAAK,MAAM,CAAC,OAAO,CAAA,CAAE,CAClD;gCACD,OAAO;AACL,oCAAA,GAAG,MAAM;iCACV;;AAEH,4BAAA,MAAM,UAAU,GAAG,MAAM,aAAa,CAAC;gCACrC,KAAK;gCACL,QAAQ;gCACR,OAAO,EAAE,MAAM,CAAC,OAAO;AACxB,6BAAA,CAAC;4BACF,OAAO;AACL,gCAAA,GAAG,MAAM;gCACT,UAAU;6BACX;;wBACD,OAAO,KAAK,EAAE;AACd,4BAAA,OAAO,CAAC,KAAK,CAAC,mCAAmC,EAAE,KAAK,CAAC;4BACzD,OAAO;AACL,gCAAA,GAAG,MAAM;6BACV;;AAEL,qBAAC;AACA,yBAAA,KAAK,CAAC,CAAC,KAAK,KAAI;wBACf,OAAO,CAAC,KAAK,CAAC,CAAA,eAAA,EAAkB,WAAW,CAAG,CAAA,CAAA,EAAE,KAAK,CAAC;wBACtD,OAAO;AACL,4BAAA,GAAG,EAAE,WAAW;AAChB,4BAAA,KAAK,EAAE,IAAI;4BACX,OAAO,EAAE,oBAAoB,WAAW,CAAA,EAAA,EAAK,KAAK,CAAC,OAAO,IAAI,eAAe,CAAE,CAAA;yBAChF;AACH,qBAAC,CAAC;AACJ,oBAAA,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC;;AAExB,gBAAA,OAAO,MAAM,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC;;YAClC,OAAO,KAAK,EAAE;AACd,gBAAA,OAAO,CAAC,KAAK,CAAC,sBAAsB,EAAE,KAAK,CAAC;AAC5C,gBAAA,OAAO,EAAE;;SAEZ;KACF;AAED,IAAA,MAAM,aAAa,GAAG,OAAO,EAC3B,KAAK,EACL,KAAK,EACL,MAAM,EACN,gBAAgB,GAMjB,KAAmB;QAClB,MAAM,YAAY,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC;;AAE3C,QAAA,MAAM,OAAO,GAAG,MAAM,UAAU,CAAC,UAAU,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC;AAC3E,QAAA,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE;AAC5B,YAAA,IAAI,MAAM,CAAC,KAAK,KAAK,IAAI,EAAE;gBACzB;;YAEF,MAAM,EAAE,GAAG,EAAE,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,GAAG,MAAM;YACxD,gBAAgB,GAAG,GAAG,EAAE;gBACtB,OAAO;gBACP,WAAW;gBACX,UAAU;AACX,aAAA,CAAC;;AAEN,KAAC;AAED,IAAA,MAAM,cAAc,GAAG,OACrB,MAAsB,EACtB,WAAmB,EACnB,KAAa,EACb,OAAmB,GAAA,KAAK,KACO;AAC/B,QAAA,IAAI;AACF,YAAA,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE;gBAChB,OAAO;AACL,oBAAA,OAAO,EAAE,EAAE;AACX,oBAAA,UAAU,EAAE,EAAE;AACd,oBAAA,MAAM,EAAE,EAAE;AACV,oBAAA,eAAe,EAAE,EAAE;iBACpB;;AACI,iBAAA,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE;gBAC/B,OAAO,MAAM,CAAC,IAAI;;YAGpB,IAAI,CAAC,OAAO,EAAE;gBACZ,MAAM,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,KACpD,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,eAAe,CAAC,CACtC;AAED,gBAAA,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE;oBACvB,OAAO,MAAM,CAAC,IAAI;;AAGpB,gBAAA,MAAM,aAAa,GAAG,IAAI,GAAG,EAAyB;AACtD,gBAAA,aAAa,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC;AACtD,gBAAA,MAAM,gBAAgB,GAAG,0BAA0B,CAAC,aAAa,CAAC;AAClE,gBAAA,MAAM,aAAa,CAAC;oBAClB,KAAK;AACL,oBAAA,MAAM,EAAE,CAAC;oBACT,gBAAgB;oBAChB,KAAK,EAAE,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AAC7B,iBAAA,CAAC;AAEF,gBAAA,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;oBACnD,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;oBACrC,MAAM,aAAa,GAAG,aAAa,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC;oBACpD,IAAI,aAAa,EAAE;AACjB,wBAAA,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;AACvB,4BAAA,GAAG,MAAM;AACT,4BAAA,GAAG,aAAa;yBACjB;;;gBAIL,OAAO,MAAM,CAAC,IAAI;;AAGpB,YAAA,MAAM,SAAS,GAAG,IAAI,GAAG,EAAyB;YAClD,MAAM,QAAQ,GAAa,EAAE;YAE7B,KAAK,MAAM,MAAM,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE;AACxC,gBAAA,IAAI,MAAM,CAAC,IAAI,EAAE;AACf,oBAAA,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC;oBAC1B,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC;;;AAItC,YAAA,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE;gBACzB,OAAO,MAAM,CAAC,IAAI;;AAGpB,YAAA,MAAM,gBAAgB,GAAG,0BAA0B,CAAC,SAAS,CAAC;AAC9D,YAAA,MAAM,aAAa,CAAC;AAClB,gBAAA,KAAK,EAAE,QAAQ;gBACf,KAAK;gBACL,gBAAgB;AAChB,gBAAA,MAAM,EAAE,WAAW;AACpB,aAAA,CAAC;AAEF,YAAA,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACnD,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;gBACrC,MAAM,aAAa,GAAG,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC;gBAChD,IAAI,aAAa,EAAE;AACjB,oBAAA,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;AACvB,wBAAA,GAAG,MAAM;AACT,wBAAA,GAAG,aAAa;qBACjB;;;AAIL,YAAA,MAAM,iBAAiB,GAAG,MAAM,CAAC,IAAI,CAAC;iBACnC,MAAM,CACL,CAAC,MAAM,KACL,MAAM,CAAC,OAAO,IAAI,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,QAAQ,CAAC;AAEjE,iBAAA,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC;AAExB,YAAA,IAAI,iBAAiB,CAAC,MAAM,GAAG,CAAC,EAAE;AAChC,gBAAA,MAAM,CAAC,IAAI,CAAC,OAAO,GAAG,iBAAiB;;YAEzC,OAAO,MAAM,CAAC,IAAI;;QAClB,OAAO,KAAK,EAAE;AACd,YAAA,OAAO,CAAC,KAAK,CAAC,0BAA0B,EAAE,KAAK,CAAC;YAChD,OAAO;AACL,gBAAA,OAAO,EAAE,EAAE;AACX,gBAAA,UAAU,EAAE,EAAE;AACd,gBAAA,MAAM,EAAE,EAAE;AACV,gBAAA,eAAe,EAAE,EAAE;gBACnB,GAAG,MAAM,CAAC,IAAI;AACd,gBAAA,KAAK,EAAE,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;aAC9D;;AAEL,KAAC;IAED,OAAO;QACL,cAAc;QACd,UAAU;KACX;AACH;;;;;"}
|
|
1
|
+
{"version":3,"file":"search.cjs","sources":["../../../../src/tools/search/search.ts"],"sourcesContent":["/* eslint-disable no-console */\nimport axios from 'axios';\nimport { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';\nimport type * as t from './types';\nimport { FirecrawlScraper } from './firecrawl';\nimport { BaseReranker } from './rerankers';\nimport { getAttribution } from './utils';\n\nconst chunker = {\n cleanText: (text: string): string => {\n if (!text) return '';\n\n /** Normalized all line endings to '\\n' */\n const normalizedText = text.replace(/\\r\\n/g, '\\n').replace(/\\r/g, '\\n');\n\n /** Handle multiple backslashes followed by newlines\n * This replaces patterns like '\\\\\\\\\\\\n' with a single newline */\n const fixedBackslashes = normalizedText.replace(/\\\\+\\n/g, '\\n');\n\n /** Cleaned up consecutive newlines, tabs, and spaces around newlines */\n const cleanedNewlines = fixedBackslashes.replace(/[\\t ]*\\n[\\t \\n]*/g, '\\n');\n\n /** Cleaned up excessive spaces and tabs */\n const cleanedSpaces = cleanedNewlines.replace(/[ \\t]+/g, ' ');\n\n return cleanedSpaces.trim();\n },\n splitText: async (\n text: string,\n options?: {\n chunkSize?: number;\n chunkOverlap?: number;\n separators?: string[];\n }\n ): Promise<string[]> => {\n const chunkSize = options?.chunkSize ?? 150;\n const chunkOverlap = options?.chunkOverlap ?? 50;\n const separators = options?.separators || ['\\n\\n', '\\n'];\n\n const splitter = new RecursiveCharacterTextSplitter({\n separators,\n chunkSize,\n chunkOverlap,\n });\n\n return await splitter.splitText(text);\n },\n\n splitTexts: async (\n texts: string[],\n options?: {\n chunkSize?: number;\n chunkOverlap?: number;\n separators?: string[];\n }\n ): Promise<string[][]> => {\n // Split multiple texts\n const promises = texts.map((text) =>\n chunker.splitText(text, options).catch((error) => {\n console.error('Error splitting text:', error);\n return [text];\n })\n );\n return Promise.all(promises);\n },\n};\n\nconst createSourceUpdateCallback = (sourceMap: Map<string, t.ValidSource>) => {\n return (link: string, update?: Partial<t.ValidSource>): void => {\n const source = sourceMap.get(link);\n if (source) {\n sourceMap.set(link, {\n ...source,\n ...update,\n });\n }\n };\n};\n\nconst getHighlights = async ({\n query,\n content,\n reranker,\n topResults = 5,\n}: {\n content: string;\n query: string;\n reranker?: BaseReranker;\n topResults?: number;\n}): Promise<t.Highlight[] | undefined> => {\n if (!content) {\n console.warn('No content provided for highlights');\n return;\n }\n if (!reranker) {\n console.warn('No reranker provided for highlights');\n return;\n }\n\n try {\n const documents = await chunker.splitText(content);\n if (Array.isArray(documents)) {\n return await reranker.rerank(query, documents, topResults);\n } else {\n console.error(\n 'Expected documents to be an array, got:',\n typeof documents\n );\n return;\n }\n } catch (error) {\n console.error('Error in content processing:', error);\n return;\n }\n};\n\nconst createSerperAPI = (\n apiKey?: string\n): {\n getSources: (params: t.GetSourcesParams) => Promise<t.SearchResult>;\n} => {\n const config = {\n apiKey: apiKey ?? process.env.SERPER_API_KEY,\n apiUrl: 'https://google.serper.dev/search',\n timeout: 10000,\n };\n\n if (config.apiKey == null || config.apiKey === '') {\n throw new Error('SERPER_API_KEY is required for SerperAPI');\n }\n\n const getSources = async ({\n query,\n country,\n numResults = 8,\n }: t.GetSourcesParams): Promise<t.SearchResult> => {\n if (!query.trim()) {\n return { success: false, error: 'Query cannot be empty' };\n }\n\n try {\n const payload: t.SerperSearchPayload = {\n q: query,\n num: Math.min(Math.max(1, numResults), 10),\n };\n\n if (country != null && country !== '') {\n payload['gl'] = country.toLowerCase();\n }\n\n const response = await axios.post<t.SerperResultData>(\n config.apiUrl,\n payload,\n {\n headers: {\n 'X-API-KEY': config.apiKey,\n 'Content-Type': 'application/json',\n },\n timeout: config.timeout,\n }\n );\n\n const data = response.data;\n const results: t.SearchResultData = {\n organic: data.organic,\n images: data.images ?? [],\n answerBox: data.answerBox,\n topStories: data.topStories ?? [],\n peopleAlsoAsk: data.peopleAlsoAsk,\n knowledgeGraph: data.knowledgeGraph,\n relatedSearches: data.relatedSearches,\n };\n\n return { success: true, data: results };\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return { success: false, error: `API request failed: ${errorMessage}` };\n }\n };\n\n return { getSources };\n};\n\nconst createSearXNGAPI = (\n instanceUrl?: string,\n apiKey?: string\n): {\n getSources: (params: t.GetSourcesParams) => Promise<t.SearchResult>;\n} => {\n const config = {\n instanceUrl: instanceUrl ?? process.env.SEARXNG_INSTANCE_URL,\n apiKey: apiKey ?? process.env.SEARXNG_API_KEY,\n defaultLocation: 'all',\n timeout: 10000,\n };\n\n if (config.instanceUrl == null || config.instanceUrl === '') {\n throw new Error('SEARXNG_INSTANCE_URL is required for SearXNG API');\n }\n\n const getSources = async ({\n query,\n numResults = 8,\n }: t.GetSourcesParams): Promise<t.SearchResult> => {\n if (!query.trim()) {\n return { success: false, error: 'Query cannot be empty' };\n }\n\n try {\n // Ensure the instance URL ends with /search\n if (config.instanceUrl == null || config.instanceUrl === '') {\n return { success: false, error: 'Instance URL is not defined' };\n }\n\n let searchUrl = config.instanceUrl;\n if (!searchUrl.endsWith('/search')) {\n searchUrl = searchUrl.replace(/\\/$/, '') + '/search';\n }\n\n // Prepare parameters for SearXNG\n const params: t.SearxNGSearchPayload = {\n q: query,\n format: 'json',\n pageno: 1,\n categories: 'general',\n language: 'all',\n safesearch: 0,\n engines: 'google,bing,duckduckgo',\n };\n\n const headers: Record<string, string> = {\n 'Content-Type': 'application/json',\n };\n\n if (config.apiKey != null && config.apiKey !== '') {\n headers['X-API-Key'] = config.apiKey;\n }\n\n const response = await axios.get(searchUrl, {\n headers,\n params,\n timeout: config.timeout,\n });\n\n const data = response.data;\n\n // Transform SearXNG results to match SerperAPI format\n const organicResults = (data.results ?? [])\n .slice(0, numResults)\n .map((result: t.SearXNGResult) => ({\n title: result.title ?? '',\n link: result.url ?? '',\n snippet: result.content ?? '',\n date: result.publishedDate ?? '',\n }));\n\n // Extract image results if available\n const imageResults = (data.results ?? [])\n .filter((result: t.SearXNGResult) => result.img_src)\n .slice(0, 6)\n .map((result: t.SearXNGResult) => ({\n title: result.title ?? '',\n imageUrl: result.img_src ?? '',\n }));\n\n // Format results to match SerperAPI structure\n const results: t.SearchResultData = {\n organic: organicResults,\n images: imageResults,\n topStories: [],\n // Use undefined instead of null for optional properties\n relatedSearches: data.suggestions ?? [],\n };\n\n return { success: true, data: results };\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return {\n success: false,\n error: `SearXNG API request failed: ${errorMessage}`,\n };\n }\n };\n\n return { getSources };\n};\n\nexport const createSearchAPI = (\n config: t.SearchConfig\n): {\n getSources: (params: t.GetSourcesParams) => Promise<t.SearchResult>;\n} => {\n const {\n searchProvider = 'serper',\n serperApiKey,\n searxngInstanceUrl,\n searxngApiKey,\n } = config;\n\n if (searchProvider.toLowerCase() === 'serper') {\n return createSerperAPI(serperApiKey);\n } else if (searchProvider.toLowerCase() === 'searxng') {\n return createSearXNGAPI(searxngInstanceUrl, searxngApiKey);\n } else {\n throw new Error(\n `Invalid search provider: ${searchProvider}. Must be 'serper' or 'searxng'`\n );\n }\n};\n\nexport const createSourceProcessor = (\n config: t.ProcessSourcesConfig = {},\n scraperInstance?: FirecrawlScraper\n): {\n processSources: (\n result: t.SearchResult,\n numElements: number,\n query: string,\n proMode?: boolean\n ) => Promise<t.SearchResultData>;\n topResults: number;\n} => {\n if (!scraperInstance) {\n throw new Error('Firecrawl scraper instance is required');\n }\n const {\n topResults = 5,\n // strategies = ['no_extraction'],\n // filterContent = true,\n reranker,\n } = config;\n\n const firecrawlScraper = scraperInstance;\n\n const webScraper = {\n scrapeMany: async ({\n query,\n links,\n }: {\n query: string;\n links: string[];\n }): Promise<Array<t.ScrapeResult>> => {\n console.log(`Scraping ${links.length} links with Firecrawl`);\n const promises: Array<Promise<t.ScrapeResult>> = [];\n try {\n for (const currentLink of links) {\n const promise: Promise<t.ScrapeResult> = firecrawlScraper\n .scrapeUrl(currentLink, {})\n .then(([url, response]) => {\n const attribution = getAttribution(url, response.data?.metadata);\n if (response.success && response.data) {\n const [content, references] =\n firecrawlScraper.extractContent(response);\n return {\n url,\n references,\n attribution,\n content: chunker.cleanText(content),\n } as t.ScrapeResult;\n }\n\n return {\n url,\n attribution,\n error: true,\n content: `Failed to scrape ${url}: ${response.error ?? 'Unknown error'}`,\n } as t.ScrapeResult;\n })\n .then(async (result) => {\n try {\n if (result.error != null) {\n console.error(\n `Error scraping ${result.url}: ${result.content}`\n );\n return {\n ...result,\n };\n }\n const highlights = await getHighlights({\n query,\n reranker,\n content: result.content,\n });\n return {\n ...result,\n highlights,\n };\n } catch (error) {\n console.error('Error processing scraped content:', error);\n return {\n ...result,\n };\n }\n })\n .catch((error) => {\n console.error(`Error scraping ${currentLink}:`, error);\n return {\n url: currentLink,\n error: true,\n content: `Failed to scrape ${currentLink}: ${error.message ?? 'Unknown error'}`,\n };\n });\n promises.push(promise);\n }\n return await Promise.all(promises);\n } catch (error) {\n console.error('Error in scrapeMany:', error);\n return [];\n }\n },\n };\n\n const fetchContents = async ({\n links,\n query,\n target,\n onContentScraped,\n }: {\n links: string[];\n query: string;\n target: number;\n onContentScraped?: (link: string, update?: Partial<t.ValidSource>) => void;\n }): Promise<void> => {\n const initialLinks = links.slice(0, target);\n // const remainingLinks = links.slice(target).reverse();\n const results = await webScraper.scrapeMany({ query, links: initialLinks });\n for (const result of results) {\n if (result.error === true) {\n continue;\n }\n const { url, content, attribution, references, highlights } = result;\n onContentScraped?.(url, {\n content,\n attribution,\n references,\n highlights,\n });\n }\n };\n\n const processSources = async (\n result: t.SearchResult,\n numElements: number,\n query: string,\n proMode: boolean = false\n ): Promise<t.SearchResultData> => {\n try {\n if (!result.data) {\n return {\n organic: [],\n topStories: [],\n images: [],\n relatedSearches: [],\n };\n } else if (!result.data.organic) {\n return result.data;\n }\n\n if (!proMode) {\n const wikiSources = result.data.organic.filter((source) =>\n source.link.includes('wikipedia.org')\n );\n\n if (!wikiSources.length) {\n return result.data;\n }\n\n const wikiSourceMap = new Map<string, t.ValidSource>();\n wikiSourceMap.set(wikiSources[0].link, wikiSources[0]);\n const onContentScraped = createSourceUpdateCallback(wikiSourceMap);\n await fetchContents({\n query,\n target: 1,\n onContentScraped,\n links: [wikiSources[0].link],\n });\n\n for (let i = 0; i < result.data.organic.length; i++) {\n const source = result.data.organic[i];\n const updatedSource = wikiSourceMap.get(source.link);\n if (updatedSource) {\n result.data.organic[i] = {\n ...source,\n ...updatedSource,\n };\n }\n }\n\n return result.data;\n }\n\n const sourceMap = new Map<string, t.ValidSource>();\n const allLinks: string[] = [];\n\n for (const source of result.data.organic) {\n if (source.link) {\n allLinks.push(source.link);\n sourceMap.set(source.link, source);\n }\n }\n\n if (allLinks.length === 0) {\n return result.data;\n }\n\n const onContentScraped = createSourceUpdateCallback(sourceMap);\n await fetchContents({\n links: allLinks,\n query,\n onContentScraped,\n target: numElements,\n });\n\n for (let i = 0; i < result.data.organic.length; i++) {\n const source = result.data.organic[i];\n const updatedSource = sourceMap.get(source.link);\n if (updatedSource) {\n result.data.organic[i] = {\n ...source,\n ...updatedSource,\n };\n }\n }\n\n const successfulSources = result.data.organic\n .filter(\n (source) =>\n source.content != null && !source.content.startsWith('Failed')\n )\n .slice(0, numElements);\n\n if (successfulSources.length > 0) {\n result.data.organic = successfulSources;\n }\n return result.data;\n } catch (error) {\n console.error('Error in processSources:', error);\n return {\n organic: [],\n topStories: [],\n images: [],\n relatedSearches: [],\n ...result.data,\n error: error instanceof Error ? error.message : String(error),\n };\n }\n };\n\n return {\n processSources,\n topResults,\n };\n};\n"],"names":["RecursiveCharacterTextSplitter","getAttribution"],"mappings":";;;;;;AAAA;AAQA,MAAM,OAAO,GAAG;AACd,IAAA,SAAS,EAAE,CAAC,IAAY,KAAY;AAClC,QAAA,IAAI,CAAC,IAAI;AAAE,YAAA,OAAO,EAAE;;AAGpB,QAAA,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC;AAEvE;AACiE;QACjE,MAAM,gBAAgB,GAAG,cAAc,CAAC,OAAO,CAAC,QAAQ,EAAE,IAAI,CAAC;;QAG/D,MAAM,eAAe,GAAG,gBAAgB,CAAC,OAAO,CAAC,mBAAmB,EAAE,IAAI,CAAC;;QAG3E,MAAM,aAAa,GAAG,eAAe,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;AAE7D,QAAA,OAAO,aAAa,CAAC,IAAI,EAAE;KAC5B;AACD,IAAA,SAAS,EAAE,OACT,IAAY,EACZ,OAIC,KACoB;AACrB,QAAA,MAAM,SAAS,GAAG,OAAO,EAAE,SAAS,IAAI,GAAG;AAC3C,QAAA,MAAM,YAAY,GAAG,OAAO,EAAE,YAAY,IAAI,EAAE;QAChD,MAAM,UAAU,GAAG,OAAO,EAAE,UAAU,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC;AAExD,QAAA,MAAM,QAAQ,GAAG,IAAIA,4CAA8B,CAAC;YAClD,UAAU;YACV,SAAS;YACT,YAAY;AACb,SAAA,CAAC;AAEF,QAAA,OAAO,MAAM,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC;KACtC;AAED,IAAA,UAAU,EAAE,OACV,KAAe,EACf,OAIC,KACsB;;QAEvB,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,KAC9B,OAAO,CAAC,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,KAAK,CAAC,CAAC,KAAK,KAAI;AAC/C,YAAA,OAAO,CAAC,KAAK,CAAC,uBAAuB,EAAE,KAAK,CAAC;YAC7C,OAAO,CAAC,IAAI,CAAC;SACd,CAAC,CACH;AACD,QAAA,OAAO,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC;KAC7B;CACF;AAED,MAAM,0BAA0B,GAAG,CAAC,SAAqC,KAAI;AAC3E,IAAA,OAAO,CAAC,IAAY,EAAE,MAA+B,KAAU;QAC7D,MAAM,MAAM,GAAG,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC;QAClC,IAAI,MAAM,EAAE;AACV,YAAA,SAAS,CAAC,GAAG,CAAC,IAAI,EAAE;AAClB,gBAAA,GAAG,MAAM;AACT,gBAAA,GAAG,MAAM;AACV,aAAA,CAAC;;AAEN,KAAC;AACH,CAAC;AAED,MAAM,aAAa,GAAG,OAAO,EAC3B,KAAK,EACL,OAAO,EACP,QAAQ,EACR,UAAU,GAAG,CAAC,GAMf,KAAwC;IACvC,IAAI,CAAC,OAAO,EAAE;AACZ,QAAA,OAAO,CAAC,IAAI,CAAC,oCAAoC,CAAC;QAClD;;IAEF,IAAI,CAAC,QAAQ,EAAE;AACb,QAAA,OAAO,CAAC,IAAI,CAAC,qCAAqC,CAAC;QACnD;;AAGF,IAAA,IAAI;QACF,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC;AAClD,QAAA,IAAI,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE;YAC5B,OAAO,MAAM,QAAQ,CAAC,MAAM,CAAC,KAAK,EAAE,SAAS,EAAE,UAAU,CAAC;;aACrD;YACL,OAAO,CAAC,KAAK,CACX,yCAAyC,EACzC,OAAO,SAAS,CACjB;YACD;;;IAEF,OAAO,KAAK,EAAE;AACd,QAAA,OAAO,CAAC,KAAK,CAAC,8BAA8B,EAAE,KAAK,CAAC;QACpD;;AAEJ,CAAC;AAED,MAAM,eAAe,GAAG,CACtB,MAAe,KAGb;AACF,IAAA,MAAM,MAAM,GAAG;AACb,QAAA,MAAM,EAAE,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc;AAC5C,QAAA,MAAM,EAAE,kCAAkC;AAC1C,QAAA,OAAO,EAAE,KAAK;KACf;AAED,IAAA,IAAI,MAAM,CAAC,MAAM,IAAI,IAAI,IAAI,MAAM,CAAC,MAAM,KAAK,EAAE,EAAE;AACjD,QAAA,MAAM,IAAI,KAAK,CAAC,0CAA0C,CAAC;;AAG7D,IAAA,MAAM,UAAU,GAAG,OAAO,EACxB,KAAK,EACL,OAAO,EACP,UAAU,GAAG,CAAC,GACK,KAA6B;AAChD,QAAA,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,EAAE;YACjB,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,uBAAuB,EAAE;;AAG3D,QAAA,IAAI;AACF,YAAA,MAAM,OAAO,GAA0B;AACrC,gBAAA,CAAC,EAAE,KAAK;AACR,gBAAA,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,CAAC,EAAE,EAAE,CAAC;aAC3C;YAED,IAAI,OAAO,IAAI,IAAI,IAAI,OAAO,KAAK,EAAE,EAAE;gBACrC,OAAO,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,WAAW,EAAE;;AAGvC,YAAA,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,MAAM,CAAC,MAAM,EACb,OAAO,EACP;AACE,gBAAA,OAAO,EAAE;oBACP,WAAW,EAAE,MAAM,CAAC,MAAM;AAC1B,oBAAA,cAAc,EAAE,kBAAkB;AACnC,iBAAA;gBACD,OAAO,EAAE,MAAM,CAAC,OAAO;AACxB,aAAA,CACF;AAED,YAAA,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI;AAC1B,YAAA,MAAM,OAAO,GAAuB;gBAClC,OAAO,EAAE,IAAI,CAAC,OAAO;AACrB,gBAAA,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,EAAE;gBACzB,SAAS,EAAE,IAAI,CAAC,SAAS;AACzB,gBAAA,UAAU,EAAE,IAAI,CAAC,UAAU,IAAI,EAAE;gBACjC,aAAa,EAAE,IAAI,CAAC,aAAa;gBACjC,cAAc,EAAE,IAAI,CAAC,cAAc;gBACnC,eAAe,EAAE,IAAI,CAAC,eAAe;aACtC;YAED,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE;;QACvC,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACxD,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,CAAuB,oBAAA,EAAA,YAAY,CAAE,CAAA,EAAE;;AAE3E,KAAC;IAED,OAAO,EAAE,UAAU,EAAE;AACvB,CAAC;AAED,MAAM,gBAAgB,GAAG,CACvB,WAAoB,EACpB,MAAe,KAGb;AACF,IAAA,MAAM,MAAM,GAAG;AACb,QAAA,WAAW,EAAE,WAAW,IAAI,OAAO,CAAC,GAAG,CAAC,oBAAoB;AAC5D,QAAA,MAAM,EAAE,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,eAAe;AAC7C,QACA,OAAO,EAAE,KAAK;KACf;AAED,IAAA,IAAI,MAAM,CAAC,WAAW,IAAI,IAAI,IAAI,MAAM,CAAC,WAAW,KAAK,EAAE,EAAE;AAC3D,QAAA,MAAM,IAAI,KAAK,CAAC,kDAAkD,CAAC;;AAGrE,IAAA,MAAM,UAAU,GAAG,OAAO,EACxB,KAAK,EACL,UAAU,GAAG,CAAC,GACK,KAA6B;AAChD,QAAA,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,EAAE;YACjB,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,uBAAuB,EAAE;;AAG3D,QAAA,IAAI;;AAEF,YAAA,IAAI,MAAM,CAAC,WAAW,IAAI,IAAI,IAAI,MAAM,CAAC,WAAW,KAAK,EAAE,EAAE;gBAC3D,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,6BAA6B,EAAE;;AAGjE,YAAA,IAAI,SAAS,GAAG,MAAM,CAAC,WAAW;YAClC,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE;gBAClC,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,GAAG,SAAS;;;AAItD,YAAA,MAAM,MAAM,GAA2B;AACrC,gBAAA,CAAC,EAAE,KAAK;AACR,gBAAA,MAAM,EAAE,MAAM;AACd,gBAAA,MAAM,EAAE,CAAC;AACT,gBAAA,UAAU,EAAE,SAAS;AACrB,gBAAA,QAAQ,EAAE,KAAK;AACf,gBAAA,UAAU,EAAE,CAAC;AACb,gBAAA,OAAO,EAAE,wBAAwB;aAClC;AAED,YAAA,MAAM,OAAO,GAA2B;AACtC,gBAAA,cAAc,EAAE,kBAAkB;aACnC;AAED,YAAA,IAAI,MAAM,CAAC,MAAM,IAAI,IAAI,IAAI,MAAM,CAAC,MAAM,KAAK,EAAE,EAAE;AACjD,gBAAA,OAAO,CAAC,WAAW,CAAC,GAAG,MAAM,CAAC,MAAM;;YAGtC,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,SAAS,EAAE;gBAC1C,OAAO;gBACP,MAAM;gBACN,OAAO,EAAE,MAAM,CAAC,OAAO;AACxB,aAAA,CAAC;AAEF,YAAA,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI;;YAG1B,MAAM,cAAc,GAAG,CAAC,IAAI,CAAC,OAAO,IAAI,EAAE;AACvC,iBAAA,KAAK,CAAC,CAAC,EAAE,UAAU;AACnB,iBAAA,GAAG,CAAC,CAAC,MAAuB,MAAM;AACjC,gBAAA,KAAK,EAAE,MAAM,CAAC,KAAK,IAAI,EAAE;AACzB,gBAAA,IAAI,EAAE,MAAM,CAAC,GAAG,IAAI,EAAE;AACtB,gBAAA,OAAO,EAAE,MAAM,CAAC,OAAO,IAAI,EAAE;AAC7B,gBAAA,IAAI,EAAE,MAAM,CAAC,aAAa,IAAI,EAAE;AACjC,aAAA,CAAC,CAAC;;YAGL,MAAM,YAAY,GAAG,CAAC,IAAI,CAAC,OAAO,IAAI,EAAE;iBACrC,MAAM,CAAC,CAAC,MAAuB,KAAK,MAAM,CAAC,OAAO;AAClD,iBAAA,KAAK,CAAC,CAAC,EAAE,CAAC;AACV,iBAAA,GAAG,CAAC,CAAC,MAAuB,MAAM;AACjC,gBAAA,KAAK,EAAE,MAAM,CAAC,KAAK,IAAI,EAAE;AACzB,gBAAA,QAAQ,EAAE,MAAM,CAAC,OAAO,IAAI,EAAE;AAC/B,aAAA,CAAC,CAAC;;AAGL,YAAA,MAAM,OAAO,GAAuB;AAClC,gBAAA,OAAO,EAAE,cAAc;AACvB,gBAAA,MAAM,EAAE,YAAY;AACpB,gBAAA,UAAU,EAAE,EAAE;;AAEd,gBAAA,eAAe,EAAE,IAAI,CAAC,WAAW,IAAI,EAAE;aACxC;YAED,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE;;QACvC,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACxD,OAAO;AACL,gBAAA,OAAO,EAAE,KAAK;gBACd,KAAK,EAAE,CAA+B,4BAAA,EAAA,YAAY,CAAE,CAAA;aACrD;;AAEL,KAAC;IAED,OAAO,EAAE,UAAU,EAAE;AACvB,CAAC;AAEY,MAAA,eAAe,GAAG,CAC7B,MAAsB,KAGpB;AACF,IAAA,MAAM,EACJ,cAAc,GAAG,QAAQ,EACzB,YAAY,EACZ,kBAAkB,EAClB,aAAa,GACd,GAAG,MAAM;AAEV,IAAA,IAAI,cAAc,CAAC,WAAW,EAAE,KAAK,QAAQ,EAAE;AAC7C,QAAA,OAAO,eAAe,CAAC,YAAY,CAAC;;AAC/B,SAAA,IAAI,cAAc,CAAC,WAAW,EAAE,KAAK,SAAS,EAAE;AACrD,QAAA,OAAO,gBAAgB,CAAC,kBAAkB,EAAE,aAAa,CAAC;;SACrD;AACL,QAAA,MAAM,IAAI,KAAK,CACb,4BAA4B,cAAc,CAAA,+BAAA,CAAiC,CAC5E;;AAEL;AAEa,MAAA,qBAAqB,GAAG,CACnC,SAAiC,EAAE,EACnC,eAAkC,KAShC;IACF,IAAI,CAAC,eAAe,EAAE;AACpB,QAAA,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC;;IAE3D,MAAM,EACJ,UAAU,GAAG,CAAC;;;IAGd,QAAQ,GACT,GAAG,MAAM;IAEV,MAAM,gBAAgB,GAAG,eAAe;AAExC,IAAA,MAAM,UAAU,GAAG;QACjB,UAAU,EAAE,OAAO,EACjB,KAAK,EACL,KAAK,GAIN,KAAoC;YACnC,OAAO,CAAC,GAAG,CAAC,CAAA,SAAA,EAAY,KAAK,CAAC,MAAM,CAAuB,qBAAA,CAAA,CAAC;YAC5D,MAAM,QAAQ,GAAmC,EAAE;AACnD,YAAA,IAAI;AACF,gBAAA,KAAK,MAAM,WAAW,IAAI,KAAK,EAAE;oBAC/B,MAAM,OAAO,GAA4B;AACtC,yBAAA,SAAS,CAAC,WAAW,EAAE,EAAE;yBACzB,IAAI,CAAC,CAAC,CAAC,GAAG,EAAE,QAAQ,CAAC,KAAI;AACxB,wBAAA,MAAM,WAAW,GAAGC,oBAAc,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC;wBAChE,IAAI,QAAQ,CAAC,OAAO,IAAI,QAAQ,CAAC,IAAI,EAAE;AACrC,4BAAA,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,GACzB,gBAAgB,CAAC,cAAc,CAAC,QAAQ,CAAC;4BAC3C,OAAO;gCACL,GAAG;gCACH,UAAU;gCACV,WAAW;AACX,gCAAA,OAAO,EAAE,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC;6BAClB;;wBAGrB,OAAO;4BACL,GAAG;4BACH,WAAW;AACX,4BAAA,KAAK,EAAE,IAAI;4BACX,OAAO,EAAE,oBAAoB,GAAG,CAAA,EAAA,EAAK,QAAQ,CAAC,KAAK,IAAI,eAAe,CAAE,CAAA;yBACvD;AACrB,qBAAC;AACA,yBAAA,IAAI,CAAC,OAAO,MAAM,KAAI;AACrB,wBAAA,IAAI;AACF,4BAAA,IAAI,MAAM,CAAC,KAAK,IAAI,IAAI,EAAE;AACxB,gCAAA,OAAO,CAAC,KAAK,CACX,CAAA,eAAA,EAAkB,MAAM,CAAC,GAAG,CAAA,EAAA,EAAK,MAAM,CAAC,OAAO,CAAA,CAAE,CAClD;gCACD,OAAO;AACL,oCAAA,GAAG,MAAM;iCACV;;AAEH,4BAAA,MAAM,UAAU,GAAG,MAAM,aAAa,CAAC;gCACrC,KAAK;gCACL,QAAQ;gCACR,OAAO,EAAE,MAAM,CAAC,OAAO;AACxB,6BAAA,CAAC;4BACF,OAAO;AACL,gCAAA,GAAG,MAAM;gCACT,UAAU;6BACX;;wBACD,OAAO,KAAK,EAAE;AACd,4BAAA,OAAO,CAAC,KAAK,CAAC,mCAAmC,EAAE,KAAK,CAAC;4BACzD,OAAO;AACL,gCAAA,GAAG,MAAM;6BACV;;AAEL,qBAAC;AACA,yBAAA,KAAK,CAAC,CAAC,KAAK,KAAI;wBACf,OAAO,CAAC,KAAK,CAAC,CAAA,eAAA,EAAkB,WAAW,CAAG,CAAA,CAAA,EAAE,KAAK,CAAC;wBACtD,OAAO;AACL,4BAAA,GAAG,EAAE,WAAW;AAChB,4BAAA,KAAK,EAAE,IAAI;4BACX,OAAO,EAAE,oBAAoB,WAAW,CAAA,EAAA,EAAK,KAAK,CAAC,OAAO,IAAI,eAAe,CAAE,CAAA;yBAChF;AACH,qBAAC,CAAC;AACJ,oBAAA,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC;;AAExB,gBAAA,OAAO,MAAM,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC;;YAClC,OAAO,KAAK,EAAE;AACd,gBAAA,OAAO,CAAC,KAAK,CAAC,sBAAsB,EAAE,KAAK,CAAC;AAC5C,gBAAA,OAAO,EAAE;;SAEZ;KACF;AAED,IAAA,MAAM,aAAa,GAAG,OAAO,EAC3B,KAAK,EACL,KAAK,EACL,MAAM,EACN,gBAAgB,GAMjB,KAAmB;QAClB,MAAM,YAAY,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC;;AAE3C,QAAA,MAAM,OAAO,GAAG,MAAM,UAAU,CAAC,UAAU,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC;AAC3E,QAAA,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE;AAC5B,YAAA,IAAI,MAAM,CAAC,KAAK,KAAK,IAAI,EAAE;gBACzB;;AAEF,YAAA,MAAM,EAAE,GAAG,EAAE,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,UAAU,EAAE,GAAG,MAAM;YACpE,gBAAgB,GAAG,GAAG,EAAE;gBACtB,OAAO;gBACP,WAAW;gBACX,UAAU;gBACV,UAAU;AACX,aAAA,CAAC;;AAEN,KAAC;AAED,IAAA,MAAM,cAAc,GAAG,OACrB,MAAsB,EACtB,WAAmB,EACnB,KAAa,EACb,OAAmB,GAAA,KAAK,KACO;AAC/B,QAAA,IAAI;AACF,YAAA,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE;gBAChB,OAAO;AACL,oBAAA,OAAO,EAAE,EAAE;AACX,oBAAA,UAAU,EAAE,EAAE;AACd,oBAAA,MAAM,EAAE,EAAE;AACV,oBAAA,eAAe,EAAE,EAAE;iBACpB;;AACI,iBAAA,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE;gBAC/B,OAAO,MAAM,CAAC,IAAI;;YAGpB,IAAI,CAAC,OAAO,EAAE;gBACZ,MAAM,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,MAAM,KACpD,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,eAAe,CAAC,CACtC;AAED,gBAAA,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE;oBACvB,OAAO,MAAM,CAAC,IAAI;;AAGpB,gBAAA,MAAM,aAAa,GAAG,IAAI,GAAG,EAAyB;AACtD,gBAAA,aAAa,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC;AACtD,gBAAA,MAAM,gBAAgB,GAAG,0BAA0B,CAAC,aAAa,CAAC;AAClE,gBAAA,MAAM,aAAa,CAAC;oBAClB,KAAK;AACL,oBAAA,MAAM,EAAE,CAAC;oBACT,gBAAgB;oBAChB,KAAK,EAAE,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AAC7B,iBAAA,CAAC;AAEF,gBAAA,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;oBACnD,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;oBACrC,MAAM,aAAa,GAAG,aAAa,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC;oBACpD,IAAI,aAAa,EAAE;AACjB,wBAAA,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;AACvB,4BAAA,GAAG,MAAM;AACT,4BAAA,GAAG,aAAa;yBACjB;;;gBAIL,OAAO,MAAM,CAAC,IAAI;;AAGpB,YAAA,MAAM,SAAS,GAAG,IAAI,GAAG,EAAyB;YAClD,MAAM,QAAQ,GAAa,EAAE;YAE7B,KAAK,MAAM,MAAM,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE;AACxC,gBAAA,IAAI,MAAM,CAAC,IAAI,EAAE;AACf,oBAAA,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC;oBAC1B,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC;;;AAItC,YAAA,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE;gBACzB,OAAO,MAAM,CAAC,IAAI;;AAGpB,YAAA,MAAM,gBAAgB,GAAG,0BAA0B,CAAC,SAAS,CAAC;AAC9D,YAAA,MAAM,aAAa,CAAC;AAClB,gBAAA,KAAK,EAAE,QAAQ;gBACf,KAAK;gBACL,gBAAgB;AAChB,gBAAA,MAAM,EAAE,WAAW;AACpB,aAAA,CAAC;AAEF,YAAA,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACnD,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;gBACrC,MAAM,aAAa,GAAG,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC;gBAChD,IAAI,aAAa,EAAE;AACjB,oBAAA,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;AACvB,wBAAA,GAAG,MAAM;AACT,wBAAA,GAAG,aAAa;qBACjB;;;AAIL,YAAA,MAAM,iBAAiB,GAAG,MAAM,CAAC,IAAI,CAAC;iBACnC,MAAM,CACL,CAAC,MAAM,KACL,MAAM,CAAC,OAAO,IAAI,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,QAAQ,CAAC;AAEjE,iBAAA,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC;AAExB,YAAA,IAAI,iBAAiB,CAAC,MAAM,GAAG,CAAC,EAAE;AAChC,gBAAA,MAAM,CAAC,IAAI,CAAC,OAAO,GAAG,iBAAiB;;YAEzC,OAAO,MAAM,CAAC,IAAI;;QAClB,OAAO,KAAK,EAAE;AACd,YAAA,OAAO,CAAC,KAAK,CAAC,0BAA0B,EAAE,KAAK,CAAC;YAChD,OAAO;AACL,gBAAA,OAAO,EAAE,EAAE;AACX,gBAAA,UAAU,EAAE,EAAE;AACd,gBAAA,MAAM,EAAE,EAAE;AACV,gBAAA,eAAe,EAAE,EAAE;gBACnB,GAAG,MAAM,CAAC,IAAI;AACd,gBAAA,KAAK,EAAE,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;aAC9D;;AAEL,KAAC;IAED,OAAO;QACL,cAAc;QACd,UAAU;KACX;AACH;;;;;"}
|
|
@@ -10,8 +10,7 @@ var rerankers = require('./rerankers.cjs');
|
|
|
10
10
|
var _enum = require('../../common/enum.cjs');
|
|
11
11
|
|
|
12
12
|
/* eslint-disable no-console */
|
|
13
|
-
const
|
|
14
|
-
query: zod.z.string().describe(`
|
|
13
|
+
const DEFAULT_QUERY_DESCRIPTION = `
|
|
15
14
|
GUIDELINES:
|
|
16
15
|
- Start broad, then narrow: Begin with key concepts, then refine with specifics
|
|
17
16
|
- Think like sources: Use terminology experts would use in the field
|
|
@@ -29,10 +28,35 @@ TECHNIQUES (combine for power searches):
|
|
|
29
28
|
- SPECIFIC QUESTIONS: Use who/what/when/where/why/how
|
|
30
29
|
- DOMAIN TERMS: Include technical terminology for specialized topics
|
|
31
30
|
- CONCISE TERMS: Prioritize keywords over sentences
|
|
32
|
-
`.trim()
|
|
33
|
-
|
|
31
|
+
`.trim();
|
|
32
|
+
const DEFAULT_COUNTRY_DESCRIPTION = `Country code to localize search results.
|
|
33
|
+
Use standard 2-letter country codes: "us", "uk", "ca", "de", "fr", "jp", "br", etc.
|
|
34
|
+
Provide this when the search should return results specific to a particular country.
|
|
35
|
+
Examples:
|
|
36
|
+
- "us" for United States (default)
|
|
37
|
+
- "de" for Germany
|
|
38
|
+
- "in" for India
|
|
39
|
+
`.trim();
|
|
40
|
+
/**
|
|
41
|
+
* Creates a search tool with a schema that dynamically includes the country field
|
|
42
|
+
* only when the searchProvider is 'serper'.
|
|
43
|
+
*
|
|
44
|
+
* @param config - The search tool configuration
|
|
45
|
+
* @returns A DynamicStructuredTool with a schema that depends on the searchProvider
|
|
46
|
+
*/
|
|
34
47
|
const createSearchTool = (config = {}) => {
|
|
35
48
|
const { searchProvider = 'serper', serperApiKey, searxngInstanceUrl, searxngApiKey, rerankerType = 'cohere', topResults = 5, strategies = ['no_extraction'], filterContent = true, firecrawlApiKey, firecrawlApiUrl, firecrawlFormats = ['markdown', 'html'], jinaApiKey, cohereApiKey, onSearchResults: _onSearchResults, } = config;
|
|
49
|
+
const querySchema = zod.z.string().describe(DEFAULT_QUERY_DESCRIPTION);
|
|
50
|
+
const schemaObject = {
|
|
51
|
+
query: querySchema,
|
|
52
|
+
};
|
|
53
|
+
if (searchProvider === 'serper') {
|
|
54
|
+
schemaObject.country = zod.z
|
|
55
|
+
.string()
|
|
56
|
+
.optional()
|
|
57
|
+
.describe(DEFAULT_COUNTRY_DESCRIPTION);
|
|
58
|
+
}
|
|
59
|
+
const SearchToolSchema = zod.z.object(schemaObject);
|
|
36
60
|
const searchAPI = search.createSearchAPI({
|
|
37
61
|
searchProvider,
|
|
38
62
|
serperApiKey,
|
|
@@ -55,9 +79,9 @@ const createSearchTool = (config = {}) => {
|
|
|
55
79
|
const sourceProcessor = search.createSourceProcessor({
|
|
56
80
|
reranker: selectedReranker,
|
|
57
81
|
topResults}, firecrawlScraper);
|
|
58
|
-
const search$1 = async ({ query, proMode = true, maxSources = 5, onSearchResults, }) => {
|
|
82
|
+
const search$1 = async ({ query, country, proMode = true, maxSources = 5, onSearchResults, }) => {
|
|
59
83
|
try {
|
|
60
|
-
const sources = await searchAPI.getSources(query);
|
|
84
|
+
const sources = await searchAPI.getSources({ query, country });
|
|
61
85
|
onSearchResults?.(sources);
|
|
62
86
|
if (!sources.success) {
|
|
63
87
|
throw new Error(sources.error ?? 'Search failed');
|
|
@@ -76,9 +100,12 @@ const createSearchTool = (config = {}) => {
|
|
|
76
100
|
};
|
|
77
101
|
}
|
|
78
102
|
};
|
|
79
|
-
return tools.tool(async (
|
|
103
|
+
return tools.tool(async (params, runnableConfig) => {
|
|
104
|
+
const { query, country: _c } = params;
|
|
105
|
+
const country = typeof _c === 'string' && _c ? _c : undefined;
|
|
80
106
|
const searchResult = await search$1({
|
|
81
107
|
query,
|
|
108
|
+
country,
|
|
82
109
|
onSearchResults: _onSearchResults
|
|
83
110
|
? (result) => {
|
|
84
111
|
_onSearchResults(result, runnableConfig);
|
|
@@ -86,16 +113,21 @@ const createSearchTool = (config = {}) => {
|
|
|
86
113
|
: undefined,
|
|
87
114
|
});
|
|
88
115
|
const turn = runnableConfig.toolCall?.turn ?? 0;
|
|
89
|
-
const output = format.formatResultsForLLM(turn, searchResult);
|
|
90
|
-
return [
|
|
116
|
+
const { output, references } = format.formatResultsForLLM(turn, searchResult);
|
|
117
|
+
return [
|
|
118
|
+
output,
|
|
119
|
+
{ [_enum.Constants.WEB_SEARCH]: { turn, ...searchResult, references } },
|
|
120
|
+
];
|
|
91
121
|
}, {
|
|
92
122
|
name: _enum.Constants.WEB_SEARCH,
|
|
93
123
|
description: `
|
|
94
|
-
Real-time search. Results have required
|
|
124
|
+
Real-time search. Results have required citation anchors.
|
|
125
|
+
|
|
126
|
+
Note: Use ONCE per reply unless instructed otherwise.
|
|
95
127
|
|
|
96
128
|
Anchors:
|
|
97
|
-
- \\
|
|
98
|
-
- X = turn, Y = item
|
|
129
|
+
- \\ue202turnXtypeY
|
|
130
|
+
- X = turn idx, type = 'search' | 'news' | 'image' | 'ref', Y = item idx
|
|
99
131
|
|
|
100
132
|
Special Markers:
|
|
101
133
|
- \\ue203...\\ue204 — highlight start/end of cited text (for Standalone or Group citations)
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tool.cjs","sources":["../../../../src/tools/search/tool.ts"],"sourcesContent":["/* eslint-disable no-console */\nimport { z } from 'zod';\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\nimport type * as t from './types';\nimport { createSearchAPI, createSourceProcessor } from './search';\nimport { createFirecrawlScraper } from './firecrawl';\nimport { expandHighlights } from './highlights';\nimport { formatResultsForLLM } from './format';\nimport { createReranker } from './rerankers';\nimport { Constants } from '@/common';\n\nconst
|
|
1
|
+
{"version":3,"file":"tool.cjs","sources":["../../../../src/tools/search/tool.ts"],"sourcesContent":["/* eslint-disable no-console */\nimport { z } from 'zod';\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\nimport type * as t from './types';\nimport { createSearchAPI, createSourceProcessor } from './search';\nimport { createFirecrawlScraper } from './firecrawl';\nimport { expandHighlights } from './highlights';\nimport { formatResultsForLLM } from './format';\nimport { createReranker } from './rerankers';\nimport { Constants } from '@/common';\n\nconst DEFAULT_QUERY_DESCRIPTION = `\nGUIDELINES:\n- Start broad, then narrow: Begin with key concepts, then refine with specifics\n- Think like sources: Use terminology experts would use in the field\n- Consider perspective: Frame queries from different viewpoints for better results\n- Quality over quantity: A precise 3-4 word query often beats lengthy sentences\n\nTECHNIQUES (combine for power searches):\n- EXACT PHRASES: Use quotes (\"climate change report\")\n- EXCLUDE TERMS: Use minus to remove unwanted results (-wikipedia)\n- SITE-SPECIFIC: Restrict to websites (site:edu research)\n- FILETYPE: Find specific documents (filetype:pdf study)\n- OR OPERATOR: Find alternatives (electric OR hybrid cars)\n- DATE RANGE: Recent information (data after:2020)\n- WILDCARDS: Use * for unknown terms (how to * bread)\n- SPECIFIC QUESTIONS: Use who/what/when/where/why/how\n- DOMAIN TERMS: Include technical terminology for specialized topics\n- CONCISE TERMS: Prioritize keywords over sentences\n`.trim();\n\nconst DEFAULT_COUNTRY_DESCRIPTION = `Country code to localize search results.\nUse standard 2-letter country codes: \"us\", \"uk\", \"ca\", \"de\", \"fr\", \"jp\", \"br\", etc.\nProvide this when the search should return results specific to a particular country.\nExamples:\n- \"us\" for United States (default)\n- \"de\" for Germany\n- \"in\" for India\n`.trim();\n\n/**\n * Creates a search tool with a schema that dynamically includes the country field\n * only when the searchProvider is 'serper'.\n *\n * @param config - The search tool configuration\n * @returns A DynamicStructuredTool with a schema that depends on the searchProvider\n */\nexport const createSearchTool = (\n config: t.SearchToolConfig = {}\n): DynamicStructuredTool<typeof SearchToolSchema> => {\n const {\n searchProvider = 'serper',\n serperApiKey,\n searxngInstanceUrl,\n searxngApiKey,\n rerankerType = 'cohere',\n topResults = 5,\n strategies = ['no_extraction'],\n filterContent = true,\n firecrawlApiKey,\n firecrawlApiUrl,\n firecrawlFormats = ['markdown', 'html'],\n jinaApiKey,\n cohereApiKey,\n onSearchResults: _onSearchResults,\n } = config;\n\n const querySchema = z.string().describe(DEFAULT_QUERY_DESCRIPTION);\n const schemaObject: {\n query: z.ZodString;\n country?: z.ZodOptional<z.ZodString>;\n } = {\n query: querySchema,\n };\n\n if (searchProvider === 'serper') {\n schemaObject.country = z\n .string()\n .optional()\n .describe(DEFAULT_COUNTRY_DESCRIPTION);\n }\n\n const SearchToolSchema = z.object(schemaObject);\n\n const searchAPI = createSearchAPI({\n searchProvider,\n serperApiKey,\n searxngInstanceUrl,\n searxngApiKey,\n });\n\n const firecrawlScraper = createFirecrawlScraper({\n apiKey: firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY,\n apiUrl: firecrawlApiUrl,\n formats: firecrawlFormats,\n });\n\n const selectedReranker = createReranker({\n rerankerType,\n jinaApiKey,\n cohereApiKey,\n });\n\n if (!selectedReranker) {\n console.warn('No reranker selected. Using default ranking.');\n }\n\n const sourceProcessor = createSourceProcessor(\n {\n reranker: selectedReranker,\n topResults,\n strategies,\n filterContent,\n },\n firecrawlScraper\n );\n\n const search = async ({\n query,\n country,\n proMode = true,\n maxSources = 5,\n onSearchResults,\n }: {\n query: string;\n country?: string;\n maxSources?: number;\n proMode?: boolean;\n onSearchResults?: (sources: t.SearchResult) => void;\n }): Promise<t.SearchResultData> => {\n try {\n const sources = await searchAPI.getSources({ query, country });\n onSearchResults?.(sources);\n\n if (!sources.success) {\n throw new Error(sources.error ?? 'Search failed');\n }\n\n const processedSources = await sourceProcessor.processSources(\n sources,\n maxSources,\n query,\n proMode\n );\n return expandHighlights(processedSources);\n } catch (error) {\n console.error('Error in search:', error);\n return {\n organic: [],\n topStories: [],\n images: [],\n relatedSearches: [],\n error: error instanceof Error ? error.message : String(error),\n };\n }\n };\n\n return tool<typeof SearchToolSchema>(\n async (params, runnableConfig) => {\n const { query, country: _c } = params;\n const country = typeof _c === 'string' && _c ? _c : undefined;\n const searchResult = await search({\n query,\n country,\n onSearchResults: _onSearchResults\n ? (result): void => {\n _onSearchResults(result, runnableConfig);\n }\n : undefined,\n });\n const turn = runnableConfig.toolCall?.turn ?? 0;\n const { output, references } = formatResultsForLLM(turn, searchResult);\n return [\n output,\n { [Constants.WEB_SEARCH]: { turn, ...searchResult, references } },\n ];\n },\n {\n name: Constants.WEB_SEARCH,\n description: `\nReal-time search. Results have required citation anchors.\n\nNote: Use ONCE per reply unless instructed otherwise.\n\nAnchors:\n- \\\\ue202turnXtypeY\n- X = turn idx, type = 'search' | 'news' | 'image' | 'ref', Y = item idx\n\nSpecial Markers:\n- \\\\ue203...\\\\ue204 — highlight start/end of cited text (for Standalone or Group citations)\n- \\\\ue200...\\\\ue201 — group block (e.g. \\\\ue200\\\\ue202turn0search1\\\\ue202turn0news2\\\\ue201)\n\n**CITE EVERY NON-OBVIOUS FACT/QUOTE:**\nUse anchor marker(s) immediately after the statement:\n- Standalone: \"Pure functions produce same output. \\\\ue202turn0search0\"\n- Standalone (multiple): \"Today's News \\\\ue202turn0search0\\\\ue202turn0news0\"\n- Highlight: \"\\\\ue203Highlight text.\\\\ue204\\\\ue202turn0news1\"\n- Group: \"Sources. \\\\ue200\\\\ue202turn0search0\\\\ue202turn0news1\\\\ue201\"\n- Group Highlight: \"\\\\ue203Highlight for group.\\\\ue204 \\\\ue200\\\\ue202turn0search0\\\\ue202turn0news1\\\\ue201\"\n- Image: \"See photo \\\\ue202turn0image0.\"\n\n**NEVER use markdown links, [1], or footnotes. CITE ONLY with anchors provided.**\n`.trim(),\n schema: SearchToolSchema,\n responseFormat: Constants.CONTENT_AND_ARTIFACT,\n }\n );\n};\n"],"names":["z","createSearchAPI","createFirecrawlScraper","createReranker","createSourceProcessor","search","expandHighlights","tool","formatResultsForLLM","Constants"],"mappings":";;;;;;;;;;;AAAA;AAWA,MAAM,yBAAyB,GAAG;;;;;;;;;;;;;;;;;;CAkBjC,CAAC,IAAI,EAAE;AAER,MAAM,2BAA2B,GAAG,CAAA;;;;;;;CAOnC,CAAC,IAAI,EAAE;AAER;;;;;;AAMG;MACU,gBAAgB,GAAG,CAC9B,MAA6B,GAAA,EAAE,KACmB;IAClD,MAAM,EACJ,cAAc,GAAG,QAAQ,EACzB,YAAY,EACZ,kBAAkB,EAClB,aAAa,EACb,YAAY,GAAG,QAAQ,EACvB,UAAU,GAAG,CAAC,EACd,UAAU,GAAG,CAAC,eAAe,CAAC,EAC9B,aAAa,GAAG,IAAI,EACpB,eAAe,EACf,eAAe,EACf,gBAAgB,GAAG,CAAC,UAAU,EAAE,MAAM,CAAC,EACvC,UAAU,EACV,YAAY,EACZ,eAAe,EAAE,gBAAgB,GAClC,GAAG,MAAM;IAEV,MAAM,WAAW,GAAGA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,yBAAyB,CAAC;AAClE,IAAA,MAAM,YAAY,GAGd;AACF,QAAA,KAAK,EAAE,WAAW;KACnB;AAED,IAAA,IAAI,cAAc,KAAK,QAAQ,EAAE;QAC/B,YAAY,CAAC,OAAO,GAAGA;AACpB,aAAA,MAAM;AACN,aAAA,QAAQ;aACR,QAAQ,CAAC,2BAA2B,CAAC;;IAG1C,MAAM,gBAAgB,GAAGA,KAAC,CAAC,MAAM,CAAC,YAAY,CAAC;IAE/C,MAAM,SAAS,GAAGC,sBAAe,CAAC;QAChC,cAAc;QACd,YAAY;QACZ,kBAAkB;QAClB,aAAa;AACd,KAAA,CAAC;IAEF,MAAM,gBAAgB,GAAGC,gCAAsB,CAAC;AAC9C,QAAA,MAAM,EAAE,eAAe,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB;AACxD,QAAA,MAAM,EAAE,eAAe;AACvB,QAAA,OAAO,EAAE,gBAAgB;AAC1B,KAAA,CAAC;IAEF,MAAM,gBAAgB,GAAGC,wBAAc,CAAC;QACtC,YAAY;QACZ,UAAU;QACV,YAAY;AACb,KAAA,CAAC;IAEF,IAAI,CAAC,gBAAgB,EAAE;AACrB,QAAA,OAAO,CAAC,IAAI,CAAC,8CAA8C,CAAC;;IAG9D,MAAM,eAAe,GAAGC,4BAAqB,CAC3C;AACE,QAAA,QAAQ,EAAE,gBAAgB;QAC1B,WAGD,EACD,gBAAgB,CACjB;IAED,MAAMC,QAAM,GAAG,OAAO,EACpB,KAAK,EACL,OAAO,EACP,OAAO,GAAG,IAAI,EACd,UAAU,GAAG,CAAC,EACd,eAAe,GAOhB,KAAiC;AAChC,QAAA,IAAI;AACF,YAAA,MAAM,OAAO,GAAG,MAAM,SAAS,CAAC,UAAU,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC;AAC9D,YAAA,eAAe,GAAG,OAAO,CAAC;AAE1B,YAAA,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE;gBACpB,MAAM,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,IAAI,eAAe,CAAC;;AAGnD,YAAA,MAAM,gBAAgB,GAAG,MAAM,eAAe,CAAC,cAAc,CAC3D,OAAO,EACP,UAAU,EACV,KAAK,EACL,OAAO,CACR;AACD,YAAA,OAAOC,2BAAgB,CAAC,gBAAgB,CAAC;;QACzC,OAAO,KAAK,EAAE;AACd,YAAA,OAAO,CAAC,KAAK,CAAC,kBAAkB,EAAE,KAAK,CAAC;YACxC,OAAO;AACL,gBAAA,OAAO,EAAE,EAAE;AACX,gBAAA,UAAU,EAAE,EAAE;AACd,gBAAA,MAAM,EAAE,EAAE;AACV,gBAAA,eAAe,EAAE,EAAE;AACnB,gBAAA,KAAK,EAAE,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;aAC9D;;AAEL,KAAC;IAED,OAAOC,UAAI,CACT,OAAO,MAAM,EAAE,cAAc,KAAI;QAC/B,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,EAAE,GAAG,MAAM;AACrC,QAAA,MAAM,OAAO,GAAG,OAAO,EAAE,KAAK,QAAQ,IAAI,EAAE,GAAG,EAAE,GAAG,SAAS;AAC7D,QAAA,MAAM,YAAY,GAAG,MAAMF,QAAM,CAAC;YAChC,KAAK;YACL,OAAO;AACP,YAAA,eAAe,EAAE;AACf,kBAAE,CAAC,MAAM,KAAU;AACjB,oBAAA,gBAAgB,CAAC,MAAM,EAAE,cAAc,CAAC;;AAE1C,kBAAE,SAAS;AACd,SAAA,CAAC;QACF,MAAM,IAAI,GAAG,cAAc,CAAC,QAAQ,EAAE,IAAI,IAAI,CAAC;AAC/C,QAAA,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAGG,0BAAmB,CAAC,IAAI,EAAE,YAAY,CAAC;QACtE,OAAO;YACL,MAAM;AACN,YAAA,EAAE,CAACC,eAAS,CAAC,UAAU,GAAG,EAAE,IAAI,EAAE,GAAG,YAAY,EAAE,UAAU,EAAE,EAAE;SAClE;AACH,KAAC,EACD;QACE,IAAI,EAAEA,eAAS,CAAC,UAAU;AAC1B,QAAA,WAAW,EAAE;;;;;;;;;;;;;;;;;;;;;;;AAuBlB,CAAA,CAAC,IAAI,EAAE;AACF,QAAA,MAAM,EAAE,gBAAgB;QACxB,cAAc,EAAEA,eAAS,CAAC,oBAAoB;AAC/C,KAAA,CACF;AACH;;;;"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const getDomainName = (link, metadata) => {
|
|
4
|
+
try {
|
|
5
|
+
const url = metadata?.sourceURL ?? metadata?.url ?? (link || '');
|
|
6
|
+
const domain = new URL(url).hostname.replace(/^www\./, '');
|
|
7
|
+
if (domain) {
|
|
8
|
+
return domain;
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
catch (e) {
|
|
12
|
+
// URL parsing failed
|
|
13
|
+
console.error('Error parsing URL:', e);
|
|
14
|
+
}
|
|
15
|
+
return;
|
|
16
|
+
};
|
|
17
|
+
function getAttribution(link, metadata) {
|
|
18
|
+
if (!metadata)
|
|
19
|
+
return getDomainName(link, metadata);
|
|
20
|
+
const possibleAttributions = [
|
|
21
|
+
metadata.ogSiteName,
|
|
22
|
+
metadata['og:site_name'],
|
|
23
|
+
metadata.title?.split('|').pop()?.trim(),
|
|
24
|
+
metadata['twitter:site']?.replace(/^@/, ''),
|
|
25
|
+
];
|
|
26
|
+
const attribution = possibleAttributions.find((attr) => attr != null && typeof attr === 'string' && attr.trim() !== '');
|
|
27
|
+
if (attribution != null) {
|
|
28
|
+
return attribution;
|
|
29
|
+
}
|
|
30
|
+
return getDomainName(link, metadata);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
exports.getAttribution = getAttribution;
|
|
34
|
+
exports.getDomainName = getDomainName;
|
|
35
|
+
//# sourceMappingURL=utils.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"utils.cjs","sources":["../../../../src/tools/search/utils.ts"],"sourcesContent":["/* eslint-disable no-console */\nimport type * as t from './types';\n\nexport const getDomainName = (\n link: string,\n metadata?: t.ScrapeMetadata\n): string | undefined => {\n try {\n const url = metadata?.sourceURL ?? metadata?.url ?? (link || '');\n const domain = new URL(url).hostname.replace(/^www\\./, '');\n if (domain) {\n return domain;\n }\n } catch (e) {\n // URL parsing failed\n console.error('Error parsing URL:', e);\n }\n\n return;\n};\n\nexport function getAttribution(\n link: string,\n metadata?: t.ScrapeMetadata\n): string | undefined {\n if (!metadata) return getDomainName(link, metadata);\n\n const possibleAttributions = [\n metadata.ogSiteName,\n metadata['og:site_name'],\n metadata.title?.split('|').pop()?.trim(),\n metadata['twitter:site']?.replace(/^@/, ''),\n ];\n\n const attribution = possibleAttributions.find(\n (attr) => attr != null && typeof attr === 'string' && attr.trim() !== ''\n );\n if (attribution != null) {\n return attribution;\n }\n\n return getDomainName(link, metadata);\n}\n"],"names":[],"mappings":";;MAGa,aAAa,GAAG,CAC3B,IAAY,EACZ,QAA2B,KACL;AACtB,IAAA,IAAI;AACF,QAAA,MAAM,GAAG,GAAG,QAAQ,EAAE,SAAS,IAAI,QAAQ,EAAE,GAAG,KAAK,IAAI,IAAI,EAAE,CAAC;AAChE,QAAA,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;QAC1D,IAAI,MAAM,EAAE;AACV,YAAA,OAAO,MAAM;;;IAEf,OAAO,CAAC,EAAE;;AAEV,QAAA,OAAO,CAAC,KAAK,CAAC,oBAAoB,EAAE,CAAC,CAAC;;IAGxC;AACF;AAEgB,SAAA,cAAc,CAC5B,IAAY,EACZ,QAA2B,EAAA;AAE3B,IAAA,IAAI,CAAC,QAAQ;AAAE,QAAA,OAAO,aAAa,CAAC,IAAI,EAAE,QAAQ,CAAC;AAEnD,IAAA,MAAM,oBAAoB,GAAG;AAC3B,QAAA,QAAQ,CAAC,UAAU;QACnB,QAAQ,CAAC,cAAc,CAAC;AACxB,QAAA,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,IAAI,EAAE;QACxC,QAAQ,CAAC,cAAc,CAAC,EAAE,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;KAC5C;IAED,MAAM,WAAW,GAAG,oBAAoB,CAAC,IAAI,CAC3C,CAAC,IAAI,KAAK,IAAI,IAAI,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CACzE;AACD,IAAA,IAAI,WAAW,IAAI,IAAI,EAAE;AACvB,QAAA,OAAO,WAAW;;AAGpB,IAAA,OAAO,aAAa,CAAC,IAAI,EAAE,QAAQ,CAAC;AACtC;;;;;"}
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import * as cheerio from 'cheerio';
|
|
2
|
+
|
|
3
|
+
function processContent(html, markdown) {
|
|
4
|
+
const linkMap = new Map();
|
|
5
|
+
const imageMap = new Map();
|
|
6
|
+
const videoMap = new Map();
|
|
7
|
+
const iframeMap = new Map();
|
|
8
|
+
const $ = cheerio.load(html, {
|
|
9
|
+
xmlMode: false,
|
|
10
|
+
});
|
|
11
|
+
// Extract all media references
|
|
12
|
+
$('a[href]').each((_, el) => {
|
|
13
|
+
const href = $(el).attr('href');
|
|
14
|
+
if (href != null && href) {
|
|
15
|
+
linkMap.set(href, {
|
|
16
|
+
originalUrl: href,
|
|
17
|
+
title: $(el).attr('title'),
|
|
18
|
+
text: $(el).text().trim(),
|
|
19
|
+
});
|
|
20
|
+
}
|
|
21
|
+
});
|
|
22
|
+
$('img[src]').each((_, el) => {
|
|
23
|
+
const src = $(el).attr('src');
|
|
24
|
+
if (src != null && src) {
|
|
25
|
+
imageMap.set(src, {
|
|
26
|
+
originalUrl: src,
|
|
27
|
+
title: $(el).attr('alt') ?? $(el).attr('title'),
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
});
|
|
31
|
+
// Handle videos (dedicated video elements and video platforms in iframes)
|
|
32
|
+
$('video[src], iframe[src*="youtube"], iframe[src*="vimeo"]').each((_, el) => {
|
|
33
|
+
const src = $(el).attr('src');
|
|
34
|
+
if (src != null && src) {
|
|
35
|
+
videoMap.set(src, {
|
|
36
|
+
originalUrl: src,
|
|
37
|
+
title: $(el).attr('title'),
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
});
|
|
41
|
+
// Handle all other generic iframes that aren't already captured as videos
|
|
42
|
+
$('iframe').each((_, el) => {
|
|
43
|
+
const src = $(el).attr('src');
|
|
44
|
+
if (src != null &&
|
|
45
|
+
src &&
|
|
46
|
+
!src.includes('youtube') &&
|
|
47
|
+
!src.includes('vimeo')) {
|
|
48
|
+
iframeMap.set(src, {
|
|
49
|
+
originalUrl: src,
|
|
50
|
+
title: $(el).attr('title'),
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
});
|
|
54
|
+
// Create lookup maps with indices
|
|
55
|
+
const linkIndexMap = new Map();
|
|
56
|
+
const imageIndexMap = new Map();
|
|
57
|
+
const videoIndexMap = new Map();
|
|
58
|
+
const iframeIndexMap = new Map();
|
|
59
|
+
Array.from(linkMap.keys()).forEach((url, i) => linkIndexMap.set(url, i + 1));
|
|
60
|
+
Array.from(imageMap.keys()).forEach((url, i) => imageIndexMap.set(url, i + 1));
|
|
61
|
+
Array.from(videoMap.keys()).forEach((url, i) => videoIndexMap.set(url, i + 1));
|
|
62
|
+
Array.from(iframeMap.keys()).forEach((url, i) => iframeIndexMap.set(url, i + 1));
|
|
63
|
+
// Process the markdown
|
|
64
|
+
let result = markdown;
|
|
65
|
+
// Replace each URL one by one, starting with the longest URLs first to avoid partial matches
|
|
66
|
+
const allUrls = [
|
|
67
|
+
...Array.from(imageMap.keys()).map((url) => ({
|
|
68
|
+
url,
|
|
69
|
+
type: 'image',
|
|
70
|
+
idx: imageIndexMap.get(url),
|
|
71
|
+
})),
|
|
72
|
+
...Array.from(videoMap.keys()).map((url) => ({
|
|
73
|
+
url,
|
|
74
|
+
type: 'video',
|
|
75
|
+
idx: videoIndexMap.get(url),
|
|
76
|
+
})),
|
|
77
|
+
...Array.from(iframeMap.keys()).map((url) => ({
|
|
78
|
+
url,
|
|
79
|
+
type: 'iframe',
|
|
80
|
+
idx: iframeIndexMap.get(url),
|
|
81
|
+
})),
|
|
82
|
+
...Array.from(linkMap.keys()).map((url) => ({
|
|
83
|
+
url,
|
|
84
|
+
type: 'link',
|
|
85
|
+
idx: linkIndexMap.get(url),
|
|
86
|
+
})),
|
|
87
|
+
].sort((a, b) => b.url.length - a.url.length);
|
|
88
|
+
// Create a function to escape special characters in URLs for regex
|
|
89
|
+
function escapeRegex(string) {
|
|
90
|
+
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
91
|
+
}
|
|
92
|
+
// Replace each URL in the markdown
|
|
93
|
+
for (const { url, type, idx } of allUrls) {
|
|
94
|
+
// Create a regex that captures URLs in markdown links
|
|
95
|
+
const regex = new RegExp(`\\(${escapeRegex(url)}(?:\\s+"[^"]*")?\\)`, 'g');
|
|
96
|
+
result = result.replace(regex, (match) => {
|
|
97
|
+
// Keep any title attribute that might exist
|
|
98
|
+
const titleMatch = match.match(/\s+"([^"]*)"/);
|
|
99
|
+
const titlePart = titleMatch ? ` "${titleMatch[1]}"` : '';
|
|
100
|
+
return `(${type}#${idx}${titlePart})`;
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
iframeMap.clear();
|
|
104
|
+
const links = Array.from(linkMap.values());
|
|
105
|
+
linkMap.clear();
|
|
106
|
+
const images = Array.from(imageMap.values());
|
|
107
|
+
imageMap.clear();
|
|
108
|
+
const videos = Array.from(videoMap.values());
|
|
109
|
+
videoMap.clear();
|
|
110
|
+
return {
|
|
111
|
+
markdown: result,
|
|
112
|
+
links,
|
|
113
|
+
images,
|
|
114
|
+
videos,
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export { processContent };
|
|
119
|
+
//# sourceMappingURL=content.mjs.map
|