@librechat/agents 2.4.316 → 2.4.318
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/tools/search/content.cjs +140 -0
- package/dist/cjs/tools/search/content.cjs.map +1 -0
- package/dist/cjs/tools/search/firecrawl.cjs +17 -37
- package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
- package/dist/cjs/tools/search/format.cjs +79 -29
- package/dist/cjs/tools/search/format.cjs.map +1 -1
- package/dist/cjs/tools/search/highlights.cjs +64 -13
- package/dist/cjs/tools/search/highlights.cjs.map +1 -1
- package/dist/cjs/tools/search/search.cjs +13 -15
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +44 -12
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/search/utils.cjs +35 -0
- package/dist/cjs/tools/search/utils.cjs.map +1 -0
- package/dist/esm/tools/search/content.mjs +119 -0
- package/dist/esm/tools/search/content.mjs.map +1 -0
- package/dist/esm/tools/search/firecrawl.mjs +18 -37
- package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
- package/dist/esm/tools/search/format.mjs +79 -29
- package/dist/esm/tools/search/format.mjs.map +1 -1
- package/dist/esm/tools/search/highlights.mjs +64 -13
- package/dist/esm/tools/search/highlights.mjs.map +1 -1
- package/dist/esm/tools/search/search.mjs +12 -14
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +44 -12
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/search/utils.mjs +32 -0
- package/dist/esm/tools/search/utils.mjs.map +1 -0
- package/dist/types/tools/search/content.d.ts +4 -0
- package/dist/types/tools/search/firecrawl.d.ts +6 -86
- package/dist/types/tools/search/format.d.ts +4 -1
- package/dist/types/tools/search/highlights.d.ts +1 -1
- package/dist/types/tools/search/search.d.ts +1 -1
- package/dist/types/tools/search/test.d.ts +1 -0
- package/dist/types/tools/search/tool.d.ts +12 -4
- package/dist/types/tools/search/types.d.ts +380 -46
- package/dist/types/tools/search/utils.d.ts +3 -0
- package/package.json +3 -2
- package/src/scripts/search.ts +5 -3
- package/src/tools/search/content.test.ts +173 -0
- package/src/tools/search/content.ts +147 -0
- package/src/tools/search/firecrawl.ts +27 -144
- package/src/tools/search/format.ts +89 -31
- package/src/tools/search/highlights.ts +99 -17
- package/src/tools/search/output.md +2775 -0
- package/src/tools/search/search.ts +42 -54
- package/src/tools/search/test.html +884 -0
- package/src/tools/search/test.md +643 -0
- package/src/tools/search/test.ts +159 -0
- package/src/tools/search/tool.ts +54 -15
- package/src/tools/search/types.ts +430 -52
- package/src/tools/search/utils.ts +43 -0
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var cheerio = require('cheerio');
|
|
4
|
+
|
|
5
|
+
function _interopNamespaceDefault(e) {
|
|
6
|
+
var n = Object.create(null);
|
|
7
|
+
if (e) {
|
|
8
|
+
Object.keys(e).forEach(function (k) {
|
|
9
|
+
if (k !== 'default') {
|
|
10
|
+
var d = Object.getOwnPropertyDescriptor(e, k);
|
|
11
|
+
Object.defineProperty(n, k, d.get ? d : {
|
|
12
|
+
enumerable: true,
|
|
13
|
+
get: function () { return e[k]; }
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
n.default = e;
|
|
19
|
+
return Object.freeze(n);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
var cheerio__namespace = /*#__PURE__*/_interopNamespaceDefault(cheerio);
|
|
23
|
+
|
|
24
|
+
function processContent(html, markdown) {
|
|
25
|
+
const linkMap = new Map();
|
|
26
|
+
const imageMap = new Map();
|
|
27
|
+
const videoMap = new Map();
|
|
28
|
+
const iframeMap = new Map();
|
|
29
|
+
const $ = cheerio__namespace.load(html, {
|
|
30
|
+
xmlMode: false,
|
|
31
|
+
});
|
|
32
|
+
// Extract all media references
|
|
33
|
+
$('a[href]').each((_, el) => {
|
|
34
|
+
const href = $(el).attr('href');
|
|
35
|
+
if (href != null && href) {
|
|
36
|
+
linkMap.set(href, {
|
|
37
|
+
originalUrl: href,
|
|
38
|
+
title: $(el).attr('title'),
|
|
39
|
+
text: $(el).text().trim(),
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
$('img[src]').each((_, el) => {
|
|
44
|
+
const src = $(el).attr('src');
|
|
45
|
+
if (src != null && src) {
|
|
46
|
+
imageMap.set(src, {
|
|
47
|
+
originalUrl: src,
|
|
48
|
+
title: $(el).attr('alt') ?? $(el).attr('title'),
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
});
|
|
52
|
+
// Handle videos (dedicated video elements and video platforms in iframes)
|
|
53
|
+
$('video[src], iframe[src*="youtube"], iframe[src*="vimeo"]').each((_, el) => {
|
|
54
|
+
const src = $(el).attr('src');
|
|
55
|
+
if (src != null && src) {
|
|
56
|
+
videoMap.set(src, {
|
|
57
|
+
originalUrl: src,
|
|
58
|
+
title: $(el).attr('title'),
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
});
|
|
62
|
+
// Handle all other generic iframes that aren't already captured as videos
|
|
63
|
+
$('iframe').each((_, el) => {
|
|
64
|
+
const src = $(el).attr('src');
|
|
65
|
+
if (src != null &&
|
|
66
|
+
src &&
|
|
67
|
+
!src.includes('youtube') &&
|
|
68
|
+
!src.includes('vimeo')) {
|
|
69
|
+
iframeMap.set(src, {
|
|
70
|
+
originalUrl: src,
|
|
71
|
+
title: $(el).attr('title'),
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
// Create lookup maps with indices
|
|
76
|
+
const linkIndexMap = new Map();
|
|
77
|
+
const imageIndexMap = new Map();
|
|
78
|
+
const videoIndexMap = new Map();
|
|
79
|
+
const iframeIndexMap = new Map();
|
|
80
|
+
Array.from(linkMap.keys()).forEach((url, i) => linkIndexMap.set(url, i + 1));
|
|
81
|
+
Array.from(imageMap.keys()).forEach((url, i) => imageIndexMap.set(url, i + 1));
|
|
82
|
+
Array.from(videoMap.keys()).forEach((url, i) => videoIndexMap.set(url, i + 1));
|
|
83
|
+
Array.from(iframeMap.keys()).forEach((url, i) => iframeIndexMap.set(url, i + 1));
|
|
84
|
+
// Process the markdown
|
|
85
|
+
let result = markdown;
|
|
86
|
+
// Replace each URL one by one, starting with the longest URLs first to avoid partial matches
|
|
87
|
+
const allUrls = [
|
|
88
|
+
...Array.from(imageMap.keys()).map((url) => ({
|
|
89
|
+
url,
|
|
90
|
+
type: 'image',
|
|
91
|
+
idx: imageIndexMap.get(url),
|
|
92
|
+
})),
|
|
93
|
+
...Array.from(videoMap.keys()).map((url) => ({
|
|
94
|
+
url,
|
|
95
|
+
type: 'video',
|
|
96
|
+
idx: videoIndexMap.get(url),
|
|
97
|
+
})),
|
|
98
|
+
...Array.from(iframeMap.keys()).map((url) => ({
|
|
99
|
+
url,
|
|
100
|
+
type: 'iframe',
|
|
101
|
+
idx: iframeIndexMap.get(url),
|
|
102
|
+
})),
|
|
103
|
+
...Array.from(linkMap.keys()).map((url) => ({
|
|
104
|
+
url,
|
|
105
|
+
type: 'link',
|
|
106
|
+
idx: linkIndexMap.get(url),
|
|
107
|
+
})),
|
|
108
|
+
].sort((a, b) => b.url.length - a.url.length);
|
|
109
|
+
// Create a function to escape special characters in URLs for regex
|
|
110
|
+
function escapeRegex(string) {
|
|
111
|
+
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
112
|
+
}
|
|
113
|
+
// Replace each URL in the markdown
|
|
114
|
+
for (const { url, type, idx } of allUrls) {
|
|
115
|
+
// Create a regex that captures URLs in markdown links
|
|
116
|
+
const regex = new RegExp(`\\(${escapeRegex(url)}(?:\\s+"[^"]*")?\\)`, 'g');
|
|
117
|
+
result = result.replace(regex, (match) => {
|
|
118
|
+
// Keep any title attribute that might exist
|
|
119
|
+
const titleMatch = match.match(/\s+"([^"]*)"/);
|
|
120
|
+
const titlePart = titleMatch ? ` "${titleMatch[1]}"` : '';
|
|
121
|
+
return `(${type}#${idx}${titlePart})`;
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
iframeMap.clear();
|
|
125
|
+
const links = Array.from(linkMap.values());
|
|
126
|
+
linkMap.clear();
|
|
127
|
+
const images = Array.from(imageMap.values());
|
|
128
|
+
imageMap.clear();
|
|
129
|
+
const videos = Array.from(videoMap.values());
|
|
130
|
+
videoMap.clear();
|
|
131
|
+
return {
|
|
132
|
+
markdown: result,
|
|
133
|
+
links,
|
|
134
|
+
images,
|
|
135
|
+
videos,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
exports.processContent = processContent;
|
|
140
|
+
//# sourceMappingURL=content.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"content.cjs","sources":["../../../../src/tools/search/content.ts"],"sourcesContent":["import * as cheerio from 'cheerio';\nimport type { References, MediaReference } from './types';\n\nexport function processContent(\n html: string,\n markdown: string\n): {\n markdown: string;\n} & References {\n const linkMap = new Map<string, MediaReference>();\n const imageMap = new Map<string, MediaReference>();\n const videoMap = new Map<string, MediaReference>();\n const iframeMap = new Map<string, MediaReference>();\n\n const $ = cheerio.load(html, {\n xmlMode: false,\n });\n\n // Extract all media references\n $('a[href]').each((_, el) => {\n const href = $(el).attr('href');\n if (href != null && href) {\n linkMap.set(href, {\n originalUrl: href,\n title: $(el).attr('title'),\n text: $(el).text().trim(),\n });\n }\n });\n\n $('img[src]').each((_, el) => {\n const src = $(el).attr('src');\n if (src != null && src) {\n imageMap.set(src, {\n originalUrl: src,\n title: $(el).attr('alt') ?? $(el).attr('title'),\n });\n }\n });\n\n // Handle videos (dedicated video elements and video platforms in iframes)\n $('video[src], iframe[src*=\"youtube\"], iframe[src*=\"vimeo\"]').each(\n (_, el) => {\n const src = $(el).attr('src');\n if (src != null && src) {\n videoMap.set(src, {\n originalUrl: src,\n title: $(el).attr('title'),\n });\n }\n }\n );\n\n // Handle all other generic iframes that aren't already captured as videos\n $('iframe').each((_, el) => {\n const src = $(el).attr('src');\n if (\n src != null &&\n src &&\n !src.includes('youtube') &&\n !src.includes('vimeo')\n ) {\n iframeMap.set(src, {\n originalUrl: src,\n title: $(el).attr('title'),\n });\n }\n });\n\n // Create lookup maps with indices\n const linkIndexMap = new Map<string, number>();\n const imageIndexMap = new Map<string, number>();\n const videoIndexMap = new Map<string, number>();\n const iframeIndexMap = new Map<string, number>();\n\n Array.from(linkMap.keys()).forEach((url, i) => linkIndexMap.set(url, i + 1));\n Array.from(imageMap.keys()).forEach((url, i) =>\n imageIndexMap.set(url, i + 1)\n );\n Array.from(videoMap.keys()).forEach((url, i) =>\n videoIndexMap.set(url, i + 1)\n );\n Array.from(iframeMap.keys()).forEach((url, i) =>\n iframeIndexMap.set(url, i + 1)\n );\n\n // Process the markdown\n let result = markdown;\n\n // Replace each URL one by one, starting with the longest URLs first to avoid partial matches\n const allUrls = [\n ...Array.from(imageMap.keys()).map((url) => ({\n url,\n type: 'image',\n idx: imageIndexMap.get(url),\n })),\n ...Array.from(videoMap.keys()).map((url) => ({\n url,\n type: 'video',\n idx: videoIndexMap.get(url),\n })),\n ...Array.from(iframeMap.keys()).map((url) => ({\n url,\n type: 'iframe',\n idx: iframeIndexMap.get(url),\n })),\n ...Array.from(linkMap.keys()).map((url) => ({\n url,\n type: 'link',\n idx: linkIndexMap.get(url),\n })),\n ].sort((a, b) => b.url.length - a.url.length);\n\n // Create a function to escape special characters in URLs for regex\n function escapeRegex(string: string): string {\n return string.replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&');\n }\n\n // Replace each URL in the markdown\n for (const { url, type, idx } of allUrls) {\n // Create a regex that captures URLs in markdown links\n const regex = new RegExp(`\\\\(${escapeRegex(url)}(?:\\\\s+\"[^\"]*\")?\\\\)`, 'g');\n\n result = result.replace(regex, (match) => {\n // Keep any title attribute that might exist\n const titleMatch = match.match(/\\s+\"([^\"]*)\"/);\n const titlePart = titleMatch ? ` \"${titleMatch[1]}\"` : '';\n\n return `(${type}#${idx}${titlePart})`;\n });\n }\n\n iframeMap.clear();\n const links = Array.from(linkMap.values());\n linkMap.clear();\n const images = Array.from(imageMap.values());\n imageMap.clear();\n const videos = Array.from(videoMap.values());\n videoMap.clear();\n\n return {\n markdown: result,\n links,\n images,\n videos,\n };\n}\n"],"names":["cheerio"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;AAGgB,SAAA,cAAc,CAC5B,IAAY,EACZ,QAAgB,EAAA;AAIhB,IAAA,MAAM,OAAO,GAAG,IAAI,GAAG,EAA0B;AACjD,IAAA,MAAM,QAAQ,GAAG,IAAI,GAAG,EAA0B;AAClD,IAAA,MAAM,QAAQ,GAAG,IAAI,GAAG,EAA0B;AAClD,IAAA,MAAM,SAAS,GAAG,IAAI,GAAG,EAA0B;AAEnD,IAAA,MAAM,CAAC,GAAGA,kBAAO,CAAC,IAAI,CAAC,IAAI,EAAE;AAC3B,QAAA,OAAO,EAAE,KAAK;AACf,KAAA,CAAC;;IAGF,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,KAAI;QAC1B,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC;AAC/B,QAAA,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,EAAE;AACxB,YAAA,OAAO,CAAC,GAAG,CAAC,IAAI,EAAE;AAChB,gBAAA,WAAW,EAAE,IAAI;gBACjB,KAAK,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC;gBAC1B,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE;AAC1B,aAAA,CAAC;;AAEN,KAAC,CAAC;IAEF,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,KAAI;QAC3B,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC;AAC7B,QAAA,IAAI,GAAG,IAAI,IAAI,IAAI,GAAG,EAAE;AACtB,YAAA,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE;AAChB,gBAAA,WAAW,EAAE,GAAG;AAChB,gBAAA,KAAK,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC;AAChD,aAAA,CAAC;;AAEN,KAAC,CAAC;;IAGF,CAAC,CAAC,0DAA0D,CAAC,CAAC,IAAI,CAChE,CAAC,CAAC,EAAE,EAAE,KAAI;QACR,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC;AAC7B,QAAA,IAAI,GAAG,IAAI,IAAI,IAAI,GAAG,EAAE;AACtB,YAAA,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE;AAChB,gBAAA,WAAW,EAAE,GAAG;gBAChB,KAAK,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC;AAC3B,aAAA,CAAC;;AAEN,KAAC,CACF;;IAGD,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,KAAI;QACzB,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC;QAC7B,IACE,GAAG,IAAI,IAAI;YACX,GAAG;AACH,YAAA,CAAC,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC;AACxB,YAAA,CAAC,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,EACtB;AACA,YAAA,SAAS,CAAC,GAAG,CAAC,GAAG,EAAE;AACjB,gBAAA,WAAW,EAAE,GAAG;gBAChB,KAAK,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC;AAC3B,aAAA,CAAC;;AAEN,KAAC,CAAC;;AAGF,IAAA,MAAM,YAAY,GAAG,IAAI,GAAG,EAAkB;AAC9C,IAAA,MAAM,aAAa,GAAG,IAAI,GAAG,EAAkB;AAC/C,IAAA,MAAM,aAAa,GAAG,IAAI,GAAG,EAAkB;AAC/C,IAAA,MAAM,cAAc,GAAG,IAAI,GAAG,EAAkB;AAEhD,IAAA,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,CAAC,KAAK,YAAY,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;AAC5E,IAAA,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,CAAC,KACzC,aAAa,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAC9B;AACD,IAAA,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,CAAC,KACzC,aAAa,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAC9B;AACD,IAAA,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,CAAC,KAC1C,cAAc,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAC/B;;IAGD,IAAI,MAAM,GAAG,QAAQ;;AAGrB,IAAA,MAAM,OAAO,GAAG;AACd,QAAA,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM;YAC3C,GAAG;AACH,YAAA,IAAI,EAAE,OAAO;AACb,YAAA,GAAG,EAAE,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC;AAC5B,SAAA,CAAC,CAAC;AACH,QAAA,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM;YAC3C,GAAG;AACH,YAAA,IAAI,EAAE,OAAO;AACb,YAAA,GAAG,EAAE,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC;AAC5B,SAAA,CAAC,CAAC;AACH,QAAA,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM;YAC5C,GAAG;AACH,YAAA,IAAI,EAAE,QAAQ;AACd,YAAA,GAAG,EAAE,cAAc,CAAC,GAAG,CAAC,GAAG,CAAC;AAC7B,SAAA,CAAC,CAAC;AACH,QAAA,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM;YAC1C,GAAG;AACH,YAAA,IAAI,EAAE,MAAM;AACZ,YAAA,GAAG,EAAE,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC;AAC3B,SAAA,CAAC,CAAC;KACJ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC;;IAG7C,SAAS,WAAW,CAAC,MAAc,EAAA;QACjC,OAAO,MAAM,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC;;;IAItD,KAAK,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,OAAO,EAAE;;AAExC,QAAA,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,CAAM,GAAA,EAAA,WAAW,CAAC,GAAG,CAAC,CAAA,mBAAA,CAAqB,EAAE,GAAG,CAAC;QAE1E,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC,KAAK,KAAI;;YAEvC,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,cAAc,CAAC;AAC9C,YAAA,MAAM,SAAS,GAAG,UAAU,GAAG,CAAK,EAAA,EAAA,UAAU,CAAC,CAAC,CAAC,CAAG,CAAA,CAAA,GAAG,EAAE;AAEzD,YAAA,OAAO,IAAI,IAAI,CAAA,CAAA,EAAI,GAAG,CAAG,EAAA,SAAS,GAAG;AACvC,SAAC,CAAC;;IAGJ,SAAS,CAAC,KAAK,EAAE;IACjB,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;IAC1C,OAAO,CAAC,KAAK,EAAE;IACf,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;IAC5C,QAAQ,CAAC,KAAK,EAAE;IAChB,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;IAC5C,QAAQ,CAAC,KAAK,EAAE;IAEhB,OAAO;AACL,QAAA,QAAQ,EAAE,MAAM;QAChB,KAAK;QACL,MAAM;QACN,MAAM;KACP;AACH;;;;"}
|
|
@@ -1,37 +1,9 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
var axios = require('axios');
|
|
4
|
+
var content = require('./content.cjs');
|
|
4
5
|
|
|
5
6
|
/* eslint-disable no-console */
|
|
6
|
-
const getDomainName = (link, metadata) => {
|
|
7
|
-
try {
|
|
8
|
-
const url = metadata?.sourceURL ?? metadata?.url ?? (link || '');
|
|
9
|
-
const domain = new URL(url).hostname.replace(/^www\./, '');
|
|
10
|
-
if (domain) {
|
|
11
|
-
return domain;
|
|
12
|
-
}
|
|
13
|
-
}
|
|
14
|
-
catch (e) {
|
|
15
|
-
// URL parsing failed
|
|
16
|
-
console.error('Error parsing URL:', e);
|
|
17
|
-
}
|
|
18
|
-
return;
|
|
19
|
-
};
|
|
20
|
-
function getAttribution(link, metadata) {
|
|
21
|
-
if (!metadata)
|
|
22
|
-
return getDomainName(link, metadata);
|
|
23
|
-
const possibleAttributions = [
|
|
24
|
-
metadata.ogSiteName,
|
|
25
|
-
metadata['og:site_name'],
|
|
26
|
-
metadata.title?.split('|').pop()?.trim(),
|
|
27
|
-
metadata['twitter:site']?.replace(/^@/, ''),
|
|
28
|
-
];
|
|
29
|
-
const attribution = possibleAttributions.find((attr) => attr != null && typeof attr === 'string' && attr.trim() !== '');
|
|
30
|
-
if (attribution != null) {
|
|
31
|
-
return attribution;
|
|
32
|
-
}
|
|
33
|
-
return getDomainName(link, metadata);
|
|
34
|
-
}
|
|
35
7
|
/**
|
|
36
8
|
* Firecrawl scraper implementation
|
|
37
9
|
* Uses the Firecrawl API to scrape web pages
|
|
@@ -106,21 +78,30 @@ class FirecrawlScraper {
|
|
|
106
78
|
*/
|
|
107
79
|
extractContent(response) {
|
|
108
80
|
if (!response.success || !response.data) {
|
|
109
|
-
return '';
|
|
81
|
+
return ['', undefined];
|
|
82
|
+
}
|
|
83
|
+
if (response.data.markdown != null && response.data.html != null) {
|
|
84
|
+
try {
|
|
85
|
+
const { markdown, ...rest } = content.processContent(response.data.html, response.data.markdown);
|
|
86
|
+
return [markdown, rest];
|
|
87
|
+
}
|
|
88
|
+
catch (error) {
|
|
89
|
+
console.error('Error processing content:', error);
|
|
90
|
+
return [response.data.markdown, undefined];
|
|
91
|
+
}
|
|
110
92
|
}
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
return response.data.markdown;
|
|
93
|
+
else if (response.data.markdown != null) {
|
|
94
|
+
return [response.data.markdown, undefined];
|
|
114
95
|
}
|
|
115
96
|
// Fall back to HTML content
|
|
116
97
|
if (response.data.html != null) {
|
|
117
|
-
return response.data.html;
|
|
98
|
+
return [response.data.html, undefined];
|
|
118
99
|
}
|
|
119
100
|
// Fall back to raw HTML content
|
|
120
101
|
if (response.data.rawHtml != null) {
|
|
121
|
-
return response.data.rawHtml;
|
|
102
|
+
return [response.data.rawHtml, undefined];
|
|
122
103
|
}
|
|
123
|
-
return '';
|
|
104
|
+
return ['', undefined];
|
|
124
105
|
}
|
|
125
106
|
/**
|
|
126
107
|
* Extract metadata from scrape response
|
|
@@ -145,5 +126,4 @@ const createFirecrawlScraper = (config = {}) => {
|
|
|
145
126
|
|
|
146
127
|
exports.FirecrawlScraper = FirecrawlScraper;
|
|
147
128
|
exports.createFirecrawlScraper = createFirecrawlScraper;
|
|
148
|
-
exports.getAttribution = getAttribution;
|
|
149
129
|
//# sourceMappingURL=firecrawl.cjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"firecrawl.cjs","sources":["../../../../src/tools/search/firecrawl.ts"],"sourcesContent":["/* eslint-disable no-console */\nimport axios from 'axios';\n\nexport interface FirecrawlScrapeOptions {\n formats?: string[];\n includeTags?: string[];\n excludeTags?: string[];\n headers?: Record<string, string>;\n waitFor?: number;\n timeout?: number;\n}\n\ninterface ScrapeMetadata {\n // Core source information\n sourceURL?: string;\n url?: string;\n scrapeId?: string;\n statusCode?: number;\n // Basic metadata\n title?: string;\n description?: string;\n language?: string;\n favicon?: string;\n viewport?: string;\n robots?: string;\n 'theme-color'?: string;\n // Open Graph metadata\n 'og:url'?: string;\n 'og:title'?: string;\n 'og:description'?: string;\n 'og:type'?: string;\n 'og:image'?: string;\n 'og:image:width'?: string;\n 'og:image:height'?: string;\n 'og:site_name'?: string;\n ogUrl?: string;\n ogTitle?: string;\n ogDescription?: string;\n ogImage?: string;\n ogSiteName?: string;\n // Article metadata\n 'article:author'?: string;\n 'article:published_time'?: string;\n 'article:modified_time'?: string;\n 'article:section'?: string;\n 'article:tag'?: string;\n 'article:publisher'?: string;\n publishedTime?: string;\n modifiedTime?: string;\n // Twitter metadata\n 'twitter:site'?: string;\n 'twitter:creator'?: string;\n 'twitter:card'?: string;\n 'twitter:image'?: string;\n 'twitter:dnt'?: string;\n 'twitter:app:name:iphone'?: string;\n 'twitter:app:id:iphone'?: string;\n 'twitter:app:url:iphone'?: string;\n 'twitter:app:name:ipad'?: string;\n 'twitter:app:id:ipad'?: string;\n 'twitter:app:url:ipad'?: string;\n 'twitter:app:name:googleplay'?: string;\n 'twitter:app:id:googleplay'?: string;\n 'twitter:app:url:googleplay'?: string;\n // Facebook metadata\n 'fb:app_id'?: string;\n // App links\n 'al:ios:url'?: string;\n 'al:ios:app_name'?: string;\n 'al:ios:app_store_id'?: string;\n // Allow for additional properties that might be present\n [key: string]: string | number | boolean | null | undefined;\n}\n\nexport interface FirecrawlScrapeResponse {\n success: boolean;\n data?: {\n markdown?: string;\n html?: string;\n rawHtml?: string;\n screenshot?: string;\n links?: string[];\n metadata?: ScrapeMetadata;\n };\n error?: string;\n}\n\nexport interface FirecrawlScraperConfig {\n apiKey?: string;\n apiUrl?: string;\n formats?: string[];\n timeout?: number;\n}\nconst getDomainName = (\n link: string,\n metadata?: ScrapeMetadata\n): string | undefined => {\n try {\n const url = metadata?.sourceURL ?? metadata?.url ?? (link || '');\n const domain = new URL(url).hostname.replace(/^www\\./, '');\n if (domain) {\n return domain;\n }\n } catch (e) {\n // URL parsing failed\n console.error('Error parsing URL:', e);\n }\n\n return;\n};\n\nexport function getAttribution(\n link: string,\n metadata?: ScrapeMetadata\n): string | undefined {\n if (!metadata) return getDomainName(link, metadata);\n\n const possibleAttributions = [\n metadata.ogSiteName,\n metadata['og:site_name'],\n metadata.title?.split('|').pop()?.trim(),\n metadata['twitter:site']?.replace(/^@/, ''),\n ];\n\n const attribution = possibleAttributions.find(\n (attr) => attr != null && typeof attr === 'string' && attr.trim() !== ''\n );\n if (attribution != null) {\n return attribution;\n }\n\n return getDomainName(link, metadata);\n}\n\n/**\n * Firecrawl scraper implementation\n * Uses the Firecrawl API to scrape web pages\n */\nexport class FirecrawlScraper {\n private apiKey: string;\n private apiUrl: string;\n private defaultFormats: string[];\n private timeout: number;\n\n constructor(config: FirecrawlScraperConfig = {}) {\n this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';\n\n const baseUrl =\n config.apiUrl ??\n process.env.FIRECRAWL_BASE_URL ??\n 'https://api.firecrawl.dev';\n this.apiUrl = `${baseUrl.replace(/\\/+$/, '')}/v1/scrape`;\n\n this.defaultFormats = config.formats ?? ['markdown', 'html'];\n this.timeout = config.timeout ?? 15000;\n\n if (!this.apiKey) {\n console.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');\n }\n\n console.log(`Firecrawl scraper initialized with API URL: ${this.apiUrl}`);\n }\n\n /**\n * Scrape a single URL\n * @param url URL to scrape\n * @param options Scrape options\n * @returns Scrape response\n */\n async scrapeUrl(\n url: string,\n options: FirecrawlScrapeOptions = {}\n ): Promise<[string, FirecrawlScrapeResponse]> {\n if (!this.apiKey) {\n return [\n url,\n {\n success: false,\n error: 'FIRECRAWL_API_KEY is not set',\n },\n ];\n }\n\n try {\n const response = await axios.post(\n this.apiUrl,\n {\n url,\n formats: options.formats || this.defaultFormats,\n includeTags: options.includeTags,\n excludeTags: options.excludeTags,\n headers: options.headers,\n waitFor: options.waitFor,\n timeout: options.timeout ?? this.timeout,\n },\n {\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`,\n },\n timeout: this.timeout,\n }\n );\n\n return [url, response.data];\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return [\n url,\n {\n success: false,\n error: `Firecrawl API request failed: ${errorMessage}`,\n },\n ];\n }\n }\n\n /**\n * Extract content from scrape response\n * @param response Scrape response\n * @returns Extracted content or empty string if not available\n */\n extractContent(response: FirecrawlScrapeResponse): string {\n if (!response.success || !response.data) {\n return '';\n }\n\n // Prefer markdown content if available\n if (response.data.markdown != null) {\n return response.data.markdown;\n }\n\n // Fall back to HTML content\n if (response.data.html != null) {\n return response.data.html;\n }\n\n // Fall back to raw HTML content\n if (response.data.rawHtml != null) {\n return response.data.rawHtml;\n }\n\n return '';\n }\n\n /**\n * Extract metadata from scrape response\n * @param response Scrape response\n * @returns Metadata object\n */\n extractMetadata(response: FirecrawlScrapeResponse): ScrapeMetadata {\n if (!response.success || !response.data || !response.data.metadata) {\n return {};\n }\n\n return response.data.metadata;\n }\n}\n\n/**\n * Create a Firecrawl scraper instance\n * @param config Scraper configuration\n * @returns Firecrawl scraper instance\n */\nexport const createFirecrawlScraper = (\n config: FirecrawlScraperConfig = {}\n): FirecrawlScraper => {\n return new FirecrawlScraper(config);\n};\n"],"names":[],"mappings":";;;;AAAA;AA6FA,MAAM,aAAa,GAAG,CACpB,IAAY,EACZ,QAAyB,KACH;AACtB,IAAA,IAAI;AACF,QAAA,MAAM,GAAG,GAAG,QAAQ,EAAE,SAAS,IAAI,QAAQ,EAAE,GAAG,KAAK,IAAI,IAAI,EAAE,CAAC;AAChE,QAAA,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;QAC1D,IAAI,MAAM,EAAE;AACV,YAAA,OAAO,MAAM;;;IAEf,OAAO,CAAC,EAAE;;AAEV,QAAA,OAAO,CAAC,KAAK,CAAC,oBAAoB,EAAE,CAAC,CAAC;;IAGxC;AACF,CAAC;AAEe,SAAA,cAAc,CAC5B,IAAY,EACZ,QAAyB,EAAA;AAEzB,IAAA,IAAI,CAAC,QAAQ;AAAE,QAAA,OAAO,aAAa,CAAC,IAAI,EAAE,QAAQ,CAAC;AAEnD,IAAA,MAAM,oBAAoB,GAAG;AAC3B,QAAA,QAAQ,CAAC,UAAU;QACnB,QAAQ,CAAC,cAAc,CAAC;AACxB,QAAA,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,IAAI,EAAE;QACxC,QAAQ,CAAC,cAAc,CAAC,EAAE,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;KAC5C;IAED,MAAM,WAAW,GAAG,oBAAoB,CAAC,IAAI,CAC3C,CAAC,IAAI,KAAK,IAAI,IAAI,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CACzE;AACD,IAAA,IAAI,WAAW,IAAI,IAAI,EAAE;AACvB,QAAA,OAAO,WAAW;;AAGpB,IAAA,OAAO,aAAa,CAAC,IAAI,EAAE,QAAQ,CAAC;AACtC;AAEA;;;AAGG;MACU,gBAAgB,CAAA;AACnB,IAAA,MAAM;AACN,IAAA,MAAM;AACN,IAAA,cAAc;AACd,IAAA,OAAO;AAEf,IAAA,WAAA,CAAY,SAAiC,EAAE,EAAA;AAC7C,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,EAAE;AAElE,QAAA,MAAM,OAAO,GACX,MAAM,CAAC,MAAM;YACb,OAAO,CAAC,GAAG,CAAC,kBAAkB;AAC9B,YAAA,2BAA2B;AAC7B,QAAA,IAAI,CAAC,MAAM,GAAG,CAAA,EAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,YAAY;AAExD,QAAA,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC,OAAO,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC;QAC5D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,KAAK;AAEtC,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;AAChB,YAAA,OAAO,CAAC,IAAI,CAAC,uDAAuD,CAAC;;QAGvE,OAAO,CAAC,GAAG,CAAC,CAAA,4CAAA,EAA+C,IAAI,CAAC,MAAM,CAAE,CAAA,CAAC;;AAG3E;;;;;AAKG;AACH,IAAA,MAAM,SAAS,CACb,GAAW,EACX,UAAkC,EAAE,EAAA;AAEpC,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;YAChB,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;AACd,oBAAA,KAAK,EAAE,8BAA8B;AACtC,iBAAA;aACF;;AAGH,QAAA,IAAI;YACF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,IAAI,CAAC,MAAM,EACX;gBACE,GAAG;AACH,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc;gBAC/C,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,OAAO,EAAE,OAAO,CAAC,OAAO;gBACxB,OAAO,EAAE,OAAO,CAAC,OAAO;AACxB,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;aACzC,EACD;AACE,gBAAA,OAAO,EAAE;AACP,oBAAA,cAAc,EAAE,kBAAkB;AAClC,oBAAA,aAAa,EAAE,CAAA,OAAA,EAAU,IAAI,CAAC,MAAM,CAAE,CAAA;AACvC,iBAAA;gBACD,OAAO,EAAE,IAAI,CAAC,OAAO;AACtB,aAAA,CACF;AAED,YAAA,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,CAAC;;QAC3B,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACxD,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,CAAiC,8BAAA,EAAA,YAAY,CAAE,CAAA;AACvD,iBAAA;aACF;;;AAIL;;;;AAIG;AACH,IAAA,cAAc,CAAC,QAAiC,EAAA;QAC9C,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;AACvC,YAAA,OAAO,EAAE;;;QAIX,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,EAAE;AAClC,YAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,QAAQ;;;QAI/B,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;AAC9B,YAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,IAAI;;;QAI3B,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,IAAI,EAAE;AACjC,YAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,OAAO;;AAG9B,QAAA,OAAO,EAAE;;AAGX;;;;AAIG;AACH,IAAA,eAAe,CAAC,QAAiC,EAAA;AAC/C,QAAA,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE;AAClE,YAAA,OAAO,EAAE;;AAGX,QAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,QAAQ;;AAEhC;AAED;;;;AAIG;MACU,sBAAsB,GAAG,CACpC,MAAiC,GAAA,EAAE,KACf;AACpB,IAAA,OAAO,IAAI,gBAAgB,CAAC,MAAM,CAAC;AACrC;;;;;;"}
|
|
1
|
+
{"version":3,"file":"firecrawl.cjs","sources":["../../../../src/tools/search/firecrawl.ts"],"sourcesContent":["/* eslint-disable no-console */\nimport axios from 'axios';\nimport { processContent } from './content';\nimport type * as t from './types';\n\n/**\n * Firecrawl scraper implementation\n * Uses the Firecrawl API to scrape web pages\n */\nexport class FirecrawlScraper {\n private apiKey: string;\n private apiUrl: string;\n private defaultFormats: string[];\n private timeout: number;\n\n constructor(config: t.FirecrawlScraperConfig = {}) {\n this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';\n\n const baseUrl =\n config.apiUrl ??\n process.env.FIRECRAWL_BASE_URL ??\n 'https://api.firecrawl.dev';\n this.apiUrl = `${baseUrl.replace(/\\/+$/, '')}/v1/scrape`;\n\n this.defaultFormats = config.formats ?? ['markdown', 'html'];\n this.timeout = config.timeout ?? 15000;\n\n if (!this.apiKey) {\n console.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');\n }\n\n console.log(`Firecrawl scraper initialized with API URL: ${this.apiUrl}`);\n }\n\n /**\n * Scrape a single URL\n * @param url URL to scrape\n * @param options Scrape options\n * @returns Scrape response\n */\n async scrapeUrl(\n url: string,\n options: t.FirecrawlScrapeOptions = {}\n ): Promise<[string, t.FirecrawlScrapeResponse]> {\n if (!this.apiKey) {\n return [\n url,\n {\n success: false,\n error: 'FIRECRAWL_API_KEY is not set',\n },\n ];\n }\n\n try {\n const response = await axios.post(\n this.apiUrl,\n {\n url,\n formats: options.formats || this.defaultFormats,\n includeTags: options.includeTags,\n excludeTags: options.excludeTags,\n headers: options.headers,\n waitFor: options.waitFor,\n timeout: options.timeout ?? this.timeout,\n },\n {\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`,\n },\n timeout: this.timeout,\n }\n );\n\n return [url, response.data];\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return [\n url,\n {\n success: false,\n error: `Firecrawl API request failed: ${errorMessage}`,\n },\n ];\n }\n }\n\n /**\n * Extract content from scrape response\n * @param response Scrape response\n * @returns Extracted content or empty string if not available\n */\n extractContent(\n response: t.FirecrawlScrapeResponse\n ): [string, undefined | t.References] {\n if (!response.success || !response.data) {\n return ['', undefined];\n }\n\n if (response.data.markdown != null && response.data.html != null) {\n try {\n const { markdown, ...rest } = processContent(\n response.data.html,\n response.data.markdown\n );\n return [markdown, rest];\n } catch (error) {\n console.error('Error processing content:', error);\n return [response.data.markdown, undefined];\n }\n } else if (response.data.markdown != null) {\n return [response.data.markdown, undefined];\n }\n\n // Fall back to HTML content\n if (response.data.html != null) {\n return [response.data.html, undefined];\n }\n\n // Fall back to raw HTML content\n if (response.data.rawHtml != null) {\n return [response.data.rawHtml, undefined];\n }\n\n return ['', undefined];\n }\n\n /**\n * Extract metadata from scrape response\n * @param response Scrape response\n * @returns Metadata object\n */\n extractMetadata(response: t.FirecrawlScrapeResponse): t.ScrapeMetadata {\n if (!response.success || !response.data || !response.data.metadata) {\n return {};\n }\n\n return response.data.metadata;\n }\n}\n\n/**\n * Create a Firecrawl scraper instance\n * @param config Scraper configuration\n * @returns Firecrawl scraper instance\n */\nexport const createFirecrawlScraper = (\n config: t.FirecrawlScraperConfig = {}\n): FirecrawlScraper => {\n return new FirecrawlScraper(config);\n};\n"],"names":["processContent"],"mappings":";;;;;AAAA;AAKA;;;AAGG;MACU,gBAAgB,CAAA;AACnB,IAAA,MAAM;AACN,IAAA,MAAM;AACN,IAAA,cAAc;AACd,IAAA,OAAO;AAEf,IAAA,WAAA,CAAY,SAAmC,EAAE,EAAA;AAC/C,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,EAAE;AAElE,QAAA,MAAM,OAAO,GACX,MAAM,CAAC,MAAM;YACb,OAAO,CAAC,GAAG,CAAC,kBAAkB;AAC9B,YAAA,2BAA2B;AAC7B,QAAA,IAAI,CAAC,MAAM,GAAG,CAAA,EAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,YAAY;AAExD,QAAA,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC,OAAO,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC;QAC5D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,KAAK;AAEtC,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;AAChB,YAAA,OAAO,CAAC,IAAI,CAAC,uDAAuD,CAAC;;QAGvE,OAAO,CAAC,GAAG,CAAC,CAAA,4CAAA,EAA+C,IAAI,CAAC,MAAM,CAAE,CAAA,CAAC;;AAG3E;;;;;AAKG;AACH,IAAA,MAAM,SAAS,CACb,GAAW,EACX,UAAoC,EAAE,EAAA;AAEtC,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;YAChB,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;AACd,oBAAA,KAAK,EAAE,8BAA8B;AACtC,iBAAA;aACF;;AAGH,QAAA,IAAI;YACF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,IAAI,CAAC,MAAM,EACX;gBACE,GAAG;AACH,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc;gBAC/C,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,OAAO,EAAE,OAAO,CAAC,OAAO;gBACxB,OAAO,EAAE,OAAO,CAAC,OAAO;AACxB,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;aACzC,EACD;AACE,gBAAA,OAAO,EAAE;AACP,oBAAA,cAAc,EAAE,kBAAkB;AAClC,oBAAA,aAAa,EAAE,CAAA,OAAA,EAAU,IAAI,CAAC,MAAM,CAAE,CAAA;AACvC,iBAAA;gBACD,OAAO,EAAE,IAAI,CAAC,OAAO;AACtB,aAAA,CACF;AAED,YAAA,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,CAAC;;QAC3B,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACxD,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,CAAiC,8BAAA,EAAA,YAAY,CAAE,CAAA;AACvD,iBAAA;aACF;;;AAIL;;;;AAIG;AACH,IAAA,cAAc,CACZ,QAAmC,EAAA;QAEnC,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;AACvC,YAAA,OAAO,CAAC,EAAE,EAAE,SAAS,CAAC;;AAGxB,QAAA,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;AAChE,YAAA,IAAI;gBACF,MAAM,EAAE,QAAQ,EAAE,GAAG,IAAI,EAAE,GAAGA,sBAAc,CAC1C,QAAQ,CAAC,IAAI,CAAC,IAAI,EAClB,QAAQ,CAAC,IAAI,CAAC,QAAQ,CACvB;AACD,gBAAA,OAAO,CAAC,QAAQ,EAAE,IAAI,CAAC;;YACvB,OAAO,KAAK,EAAE;AACd,gBAAA,OAAO,CAAC,KAAK,CAAC,2BAA2B,EAAE,KAAK,CAAC;gBACjD,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC;;;aAEvC,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,EAAE;YACzC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC;;;QAI5C,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;YAC9B,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,SAAS,CAAC;;;QAIxC,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,IAAI,EAAE;YACjC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,CAAC;;AAG3C,QAAA,OAAO,CAAC,EAAE,EAAE,SAAS,CAAC;;AAGxB;;;;AAIG;AACH,IAAA,eAAe,CAAC,QAAmC,EAAA;AACjD,QAAA,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE;AAClE,YAAA,OAAO,EAAE;;AAGX,QAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,QAAQ;;AAEhC;AAED;;;;AAIG;MACU,sBAAsB,GAAG,CACpC,MAAmC,GAAA,EAAE,KACjB;AACpB,IAAA,OAAO,IAAI,gBAAgB,CAAC,MAAM,CAAC;AACrC;;;;;"}
|
|
@@ -1,32 +1,59 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
+
var utils = require('./utils.cjs');
|
|
4
|
+
|
|
3
5
|
function formatResultsForLLM(turn, results) {
|
|
4
6
|
let output = '';
|
|
5
7
|
const addSection = (title) => {
|
|
6
8
|
output += `\n=== ${title} ===\n`;
|
|
7
9
|
};
|
|
10
|
+
const references = [];
|
|
8
11
|
// Organic (web) results
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
12
|
+
if (results.organic?.length != null && results.organic.length > 0) {
|
|
13
|
+
addSection(`Web Results, Turn ${turn}`);
|
|
14
|
+
for (let i = 0; i < results.organic.length; i++) {
|
|
15
|
+
const r = results.organic[i];
|
|
13
16
|
output += [
|
|
14
|
-
|
|
15
|
-
`
|
|
17
|
+
`# Source ${i}: "${r.title ?? '(no title)'}"`,
|
|
18
|
+
`Anchor: \\ue202turn${turn}search${i}`,
|
|
16
19
|
`URL: ${r.link}`,
|
|
17
20
|
r.snippet != null ? `Summary: ${r.snippet}` : '',
|
|
18
21
|
r.date != null ? `Date: ${r.date}` : '',
|
|
19
22
|
r.attribution != null ? `Source: ${r.attribution}` : '',
|
|
20
23
|
'',
|
|
21
|
-
'
|
|
22
|
-
|
|
23
|
-
.filter((h) => h.text.trim().length > 0)
|
|
24
|
-
.map((h) => `[Relevance: ${h.score.toFixed(2)}]\n${h.text.trim()}`),
|
|
24
|
+
'\n## Highlights\n\n',
|
|
25
|
+
'',
|
|
25
26
|
'',
|
|
26
27
|
]
|
|
27
28
|
.filter(Boolean)
|
|
28
29
|
.join('\n');
|
|
29
|
-
|
|
30
|
+
(r.highlights ?? [])
|
|
31
|
+
.filter((h) => h.text.trim().length > 0)
|
|
32
|
+
.forEach((h, hIndex) => {
|
|
33
|
+
output += `### Highlight ${hIndex + 1} [Relevance: ${h.score.toFixed(2)}]\n\n`;
|
|
34
|
+
output += '```text\n' + h.text.trim() + '\n```\n\n';
|
|
35
|
+
if (h.references != null && h.references.length) {
|
|
36
|
+
output += 'Core References:\n';
|
|
37
|
+
output += h.references
|
|
38
|
+
.map((ref) => {
|
|
39
|
+
references.push({
|
|
40
|
+
link: ref.reference.originalUrl,
|
|
41
|
+
attribution: utils.getDomainName(ref.reference.originalUrl),
|
|
42
|
+
title: (((ref.reference.title ?? '') || ref.reference.text) ??
|
|
43
|
+
'').split('\n')[0],
|
|
44
|
+
});
|
|
45
|
+
return `- ${ref.type}#${ref.originalIndex + 1}: ${ref.reference.originalUrl}\n\t- Anchor: \\ue202turn${turn}ref${references.length - 1}`;
|
|
46
|
+
})
|
|
47
|
+
.join('\n');
|
|
48
|
+
output += '\n\n';
|
|
49
|
+
}
|
|
50
|
+
if (hIndex < (r.highlights?.length ?? 0) - 1) {
|
|
51
|
+
output += '---\n\n';
|
|
52
|
+
}
|
|
53
|
+
});
|
|
54
|
+
delete results.organic[i].highlights;
|
|
55
|
+
output += '\n';
|
|
56
|
+
}
|
|
30
57
|
}
|
|
31
58
|
// Ignoring these sections for now
|
|
32
59
|
// // Top stories (news)
|
|
@@ -62,54 +89,77 @@ function formatResultsForLLM(turn, results) {
|
|
|
62
89
|
if (results.knowledgeGraph != null) {
|
|
63
90
|
addSection('Knowledge Graph');
|
|
64
91
|
output += [
|
|
65
|
-
|
|
92
|
+
`**Title:** ${results.knowledgeGraph.title ?? '(no title)'}`,
|
|
93
|
+
results.knowledgeGraph.type != null
|
|
94
|
+
? `**Type:** ${results.knowledgeGraph.type}`
|
|
95
|
+
: '',
|
|
66
96
|
results.knowledgeGraph.description != null
|
|
67
|
-
?
|
|
97
|
+
? `**Description:** ${results.knowledgeGraph.description}`
|
|
68
98
|
: '',
|
|
69
|
-
results.knowledgeGraph.
|
|
70
|
-
?
|
|
99
|
+
results.knowledgeGraph.descriptionSource != null
|
|
100
|
+
? `**Description Source:** ${results.knowledgeGraph.descriptionSource}`
|
|
101
|
+
: '',
|
|
102
|
+
results.knowledgeGraph.descriptionLink != null
|
|
103
|
+
? `**Description Link:** ${results.knowledgeGraph.descriptionLink}`
|
|
71
104
|
: '',
|
|
72
105
|
results.knowledgeGraph.imageUrl != null
|
|
73
|
-
?
|
|
106
|
+
? `**Image URL:** ${results.knowledgeGraph.imageUrl}`
|
|
107
|
+
: '',
|
|
108
|
+
results.knowledgeGraph.website != null
|
|
109
|
+
? `**Website:** ${results.knowledgeGraph.website}`
|
|
74
110
|
: '',
|
|
75
111
|
results.knowledgeGraph.attributes != null
|
|
76
|
-
?
|
|
112
|
+
? `**Attributes:**\n\`\`\`json\n${JSON.stringify(results.knowledgeGraph.attributes, null, 2)}\n\`\`\``
|
|
77
113
|
: '',
|
|
78
114
|
'',
|
|
79
115
|
]
|
|
80
116
|
.filter(Boolean)
|
|
81
|
-
.join('\n');
|
|
117
|
+
.join('\n\n');
|
|
82
118
|
}
|
|
83
119
|
// Answer Box
|
|
84
120
|
if (results.answerBox != null) {
|
|
85
121
|
addSection('Answer Box');
|
|
86
122
|
output += [
|
|
87
123
|
results.answerBox.title != null
|
|
88
|
-
?
|
|
89
|
-
: '',
|
|
90
|
-
results.answerBox.answer != null
|
|
91
|
-
? `Answer: ${results.answerBox.answer}`
|
|
124
|
+
? `**Title:** ${results.answerBox.title}`
|
|
92
125
|
: '',
|
|
93
126
|
results.answerBox.snippet != null
|
|
94
|
-
?
|
|
127
|
+
? `**Snippet:** ${results.answerBox.snippet}`
|
|
128
|
+
: '',
|
|
129
|
+
results.answerBox.snippetHighlighted != null
|
|
130
|
+
? `**Snippet Highlighted:** ${results.answerBox.snippetHighlighted
|
|
131
|
+
.map((s) => `\`${s}\``)
|
|
132
|
+
.join(' ')}`
|
|
133
|
+
: '',
|
|
134
|
+
results.answerBox.link != null
|
|
135
|
+
? `**Link:** ${results.answerBox.link}`
|
|
95
136
|
: '',
|
|
96
|
-
results.answerBox.date != null ? `Date: ${results.answerBox.date}` : '',
|
|
97
137
|
'',
|
|
98
138
|
]
|
|
99
139
|
.filter(Boolean)
|
|
100
|
-
.join('\n');
|
|
140
|
+
.join('\n\n');
|
|
101
141
|
}
|
|
102
142
|
// People also ask
|
|
103
143
|
const peopleAlsoAsk = results.peopleAlsoAsk ?? [];
|
|
104
144
|
if (peopleAlsoAsk.length) {
|
|
105
145
|
addSection('People Also Ask');
|
|
106
|
-
peopleAlsoAsk.forEach((p,
|
|
107
|
-
output += [
|
|
146
|
+
peopleAlsoAsk.forEach((p, i) => {
|
|
147
|
+
output += [
|
|
148
|
+
`### Question ${i + 1}:`,
|
|
149
|
+
`"${p.question}"`,
|
|
150
|
+
`${p.snippet != null && p.snippet ? `Snippet: ${p.snippet}}` : ''}`,
|
|
151
|
+
`${p.title != null && p.title ? `Title: ${p.title}` : ''}`,
|
|
152
|
+
`${p.link != null && p.link ? `Link: ${p.link}` : ''}`,
|
|
153
|
+
'',
|
|
154
|
+
]
|
|
108
155
|
.filter(Boolean)
|
|
109
|
-
.join('\n');
|
|
156
|
+
.join('\n\n');
|
|
110
157
|
});
|
|
111
158
|
}
|
|
112
|
-
return
|
|
159
|
+
return {
|
|
160
|
+
output: output.trim(),
|
|
161
|
+
references,
|
|
162
|
+
};
|
|
113
163
|
}
|
|
114
164
|
|
|
115
165
|
exports.formatResultsForLLM = formatResultsForLLM;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"format.cjs","sources":["../../../../src/tools/search/format.ts"],"sourcesContent":["import type * as t from './types';\n\nexport function formatResultsForLLM(\n turn: number,\n results: t.SearchResultData\n): string {\n let output = '';\n\n const addSection = (title: string): void => {\n output += `\\n=== ${title} ===\\n`;\n };\n\n // Organic (web) results\n const organic = results.organic ?? [];\n if (organic.length) {\n addSection('Web Results, Turn ' + turn);\n organic.forEach((r, i) => {\n output += [\n `Source ${i}: ${r.title ?? '(no title)'}`,\n `Citation Anchor: \\\\ue202turn${turn}search${i}`,\n `URL: ${r.link}`,\n r.snippet != null ? `Summary: ${r.snippet}` : '',\n r.date != null ? `Date: ${r.date}` : '',\n r.attribution != null ? `Source: ${r.attribution}` : '',\n '',\n '--- Content Highlights ---',\n ...(r.highlights ?? [])\n .filter((h) => h.text.trim().length > 0)\n .map((h) => `[Relevance: ${h.score.toFixed(2)}]\\n${h.text.trim()}`),\n '',\n ]\n .filter(Boolean)\n .join('\\n');\n });\n }\n\n // Ignoring these sections for now\n // // Top stories (news)\n // const topStores = results.topStories ?? [];\n // if (topStores.length) {\n // addSection('News Results');\n // topStores.forEach((r, i) => {\n // output += [\n // `Anchor: \\ue202turn0news${i}`,\n // `Title: ${r.title ?? '(no title)'}`,\n // `URL: ${r.link}`,\n // r.snippet != null ? `Snippet: ${r.snippet}` : '',\n // r.date != null ? `Date: ${r.date}` : '',\n // r.attribution != null ? `Source: ${r.attribution}` : '',\n // ''\n // ].filter(Boolean).join('\\n');\n // });\n // }\n\n // // Images\n // const images = results.images ?? [];\n // if (images.length) {\n // addSection('Image Results');\n // images.forEach((img, i) => {\n // output += [\n // `Anchor: \\ue202turn0image${i}`,\n // `Title: ${img.title ?? '(no title)'}`,\n // `Image URL: ${img.imageUrl}`,\n // ''\n // ].join('\\n');\n // });\n // }\n\n // Knowledge Graph\n if (results.knowledgeGraph != null) {\n addSection('Knowledge Graph');\n output += [\n `Title: ${results.knowledgeGraph.title ?? '(no title)'}`,\n results.knowledgeGraph.description != null\n ? `Description: ${results.knowledgeGraph.description}`\n : '',\n results.knowledgeGraph.type != null\n ? `Type: ${results.knowledgeGraph.type}`\n : '',\n results.knowledgeGraph.imageUrl != null\n ? `Image URL: ${results.knowledgeGraph.imageUrl}`\n : '',\n results.knowledgeGraph.attributes != null\n ? `Attributes: ${JSON.stringify(results.knowledgeGraph.attributes, null, 2)}`\n : '',\n '',\n ]\n .filter(Boolean)\n .join('\\n');\n }\n\n // Answer Box\n if (results.answerBox != null) {\n addSection('Answer Box');\n output += [\n results.answerBox.title != null\n ? `Title: ${results.answerBox.title}`\n : '',\n results.answerBox.answer != null\n ? `Answer: ${results.answerBox.answer}`\n : '',\n results.answerBox.snippet != null\n ? `Snippet: ${results.answerBox.snippet}`\n : '',\n results.answerBox.date != null ? `Date: ${results.answerBox.date}` : '',\n '',\n ]\n .filter(Boolean)\n .join('\\n');\n }\n\n // People also ask\n const peopleAlsoAsk = results.peopleAlsoAsk ?? [];\n if (peopleAlsoAsk.length) {\n addSection('People Also Ask');\n peopleAlsoAsk.forEach((p, _i) => {\n output += [`Q: ${p.question}`, `A: ${p.answer}`, '']\n .filter(Boolean)\n .join('\\n');\n });\n }\n\n return output.trim();\n}\n"],"names":[],"mappings":";;AAEgB,SAAA,mBAAmB,CACjC,IAAY,EACZ,OAA2B,EAAA;IAE3B,IAAI,MAAM,GAAG,EAAE;AAEf,IAAA,MAAM,UAAU,GAAG,CAAC,KAAa,KAAU;AACzC,QAAA,MAAM,IAAI,CAAA,MAAA,EAAS,KAAK,CAAA,MAAA,CAAQ;AAClC,KAAC;;AAGD,IAAA,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,EAAE;AACrC,IAAA,IAAI,OAAO,CAAC,MAAM,EAAE;AAClB,QAAA,UAAU,CAAC,oBAAoB,GAAG,IAAI,CAAC;QACvC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,KAAI;AACvB,YAAA,MAAM,IAAI;AACR,gBAAA,CAAA,OAAA,EAAU,CAAC,CAAK,EAAA,EAAA,CAAC,CAAC,KAAK,IAAI,YAAY,CAAE,CAAA;gBACzC,CAA+B,4BAAA,EAAA,IAAI,CAAS,MAAA,EAAA,CAAC,CAAE,CAAA;gBAC/C,CAAQ,KAAA,EAAA,CAAC,CAAC,IAAI,CAAE,CAAA;AAChB,gBAAA,CAAC,CAAC,OAAO,IAAI,IAAI,GAAG,CAAY,SAAA,EAAA,CAAC,CAAC,OAAO,CAAA,CAAE,GAAG,EAAE;AAChD,gBAAA,CAAC,CAAC,IAAI,IAAI,IAAI,GAAG,CAAS,MAAA,EAAA,CAAC,CAAC,IAAI,CAAA,CAAE,GAAG,EAAE;AACvC,gBAAA,CAAC,CAAC,WAAW,IAAI,IAAI,GAAG,CAAW,QAAA,EAAA,CAAC,CAAC,WAAW,CAAA,CAAE,GAAG,EAAE;gBACvD,EAAE;gBACF,4BAA4B;AAC5B,gBAAA,GAAG,CAAC,CAAC,CAAC,UAAU,IAAI,EAAE;AACnB,qBAAA,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC;qBACtC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAA,YAAA,EAAe,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAA,GAAA,EAAM,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAA,CAAE,CAAC;gBACrE,EAAE;AACH;iBACE,MAAM,CAAC,OAAO;iBACd,IAAI,CAAC,IAAI,CAAC;AACf,SAAC,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAoCJ,IAAA,IAAI,OAAO,CAAC,cAAc,IAAI,IAAI,EAAE;QAClC,UAAU,CAAC,iBAAiB,CAAC;AAC7B,QAAA,MAAM,IAAI;AACR,YAAA,CAAA,OAAA,EAAU,OAAO,CAAC,cAAc,CAAC,KAAK,IAAI,YAAY,CAAE,CAAA;AACxD,YAAA,OAAO,CAAC,cAAc,CAAC,WAAW,IAAI;AACpC,kBAAE,CAAgB,aAAA,EAAA,OAAO,CAAC,cAAc,CAAC,WAAW,CAAE;AACtD,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,IAAI,IAAI;AAC7B,kBAAE,CAAS,MAAA,EAAA,OAAO,CAAC,cAAc,CAAC,IAAI,CAAE;AACxC,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,QAAQ,IAAI;AACjC,kBAAE,CAAc,WAAA,EAAA,OAAO,CAAC,cAAc,CAAC,QAAQ,CAAE;AACjD,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,UAAU,IAAI;AACnC,kBAAE,CAAe,YAAA,EAAA,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,cAAc,CAAC,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC,CAAE;AAC7E,kBAAE,EAAE;YACN,EAAE;AACH;aACE,MAAM,CAAC,OAAO;aACd,IAAI,CAAC,IAAI,CAAC;;;AAIf,IAAA,IAAI,OAAO,CAAC,SAAS,IAAI,IAAI,EAAE;QAC7B,UAAU,CAAC,YAAY,CAAC;AACxB,QAAA,MAAM,IAAI;AACR,YAAA,OAAO,CAAC,SAAS,CAAC,KAAK,IAAI;AACzB,kBAAE,CAAU,OAAA,EAAA,OAAO,CAAC,SAAS,CAAC,KAAK,CAAE;AACrC,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,SAAS,CAAC,MAAM,IAAI;AAC1B,kBAAE,CAAW,QAAA,EAAA,OAAO,CAAC,SAAS,CAAC,MAAM,CAAE;AACvC,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,SAAS,CAAC,OAAO,IAAI;AAC3B,kBAAE,CAAY,SAAA,EAAA,OAAO,CAAC,SAAS,CAAC,OAAO,CAAE;AACzC,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,SAAS,CAAC,IAAI,IAAI,IAAI,GAAG,SAAS,OAAO,CAAC,SAAS,CAAC,IAAI,EAAE,GAAG,EAAE;YACvE,EAAE;AACH;aACE,MAAM,CAAC,OAAO;aACd,IAAI,CAAC,IAAI,CAAC;;;AAIf,IAAA,MAAM,aAAa,GAAG,OAAO,CAAC,aAAa,IAAI,EAAE;AACjD,IAAA,IAAI,aAAa,CAAC,MAAM,EAAE;QACxB,UAAU,CAAC,iBAAiB,CAAC;QAC7B,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,KAAI;AAC9B,YAAA,MAAM,IAAI,CAAC,CAAM,GAAA,EAAA,CAAC,CAAC,QAAQ,CAAA,CAAE,EAAE,CAAA,GAAA,EAAM,CAAC,CAAC,MAAM,CAAE,CAAA,EAAE,EAAE;iBAChD,MAAM,CAAC,OAAO;iBACd,IAAI,CAAC,IAAI,CAAC;AACf,SAAC,CAAC;;AAGJ,IAAA,OAAO,MAAM,CAAC,IAAI,EAAE;AACtB;;;;"}
|
|
1
|
+
{"version":3,"file":"format.cjs","sources":["../../../../src/tools/search/format.ts"],"sourcesContent":["import type * as t from './types';\nimport { getDomainName } from './utils';\n\nexport function formatResultsForLLM(\n turn: number,\n results: t.SearchResultData\n): { output: string; references: t.ResultReference[] } {\n let output = '';\n\n const addSection = (title: string): void => {\n output += `\\n=== ${title} ===\\n`;\n };\n\n const references: t.ResultReference[] = [];\n // Organic (web) results\n if (results.organic?.length != null && results.organic.length > 0) {\n addSection(`Web Results, Turn ${turn}`);\n for (let i = 0; i < results.organic.length; i++) {\n const r = results.organic[i];\n output += [\n `# Source ${i}: \"${r.title ?? '(no title)'}\"`,\n `Anchor: \\\\ue202turn${turn}search${i}`,\n `URL: ${r.link}`,\n r.snippet != null ? `Summary: ${r.snippet}` : '',\n r.date != null ? `Date: ${r.date}` : '',\n r.attribution != null ? `Source: ${r.attribution}` : '',\n '',\n '\\n## Highlights\\n\\n',\n '',\n '',\n ]\n .filter(Boolean)\n .join('\\n');\n\n (r.highlights ?? [])\n .filter((h) => h.text.trim().length > 0)\n .forEach((h, hIndex) => {\n output += `### Highlight ${hIndex + 1} [Relevance: ${h.score.toFixed(2)}]\\n\\n`;\n output += '```text\\n' + h.text.trim() + '\\n```\\n\\n';\n\n if (h.references != null && h.references.length) {\n output += 'Core References:\\n';\n output += h.references\n .map((ref) => {\n references.push({\n link: ref.reference.originalUrl,\n attribution: getDomainName(ref.reference.originalUrl),\n title: (\n ((ref.reference.title ?? '') || ref.reference.text) ??\n ''\n ).split('\\n')[0],\n });\n return `- ${ref.type}#${ref.originalIndex + 1}: ${ref.reference.originalUrl}\\n\\t- Anchor: \\\\ue202turn${turn}ref${references.length - 1}`;\n })\n .join('\\n');\n output += '\\n\\n';\n }\n\n if (hIndex < (r.highlights?.length ?? 0) - 1) {\n output += '---\\n\\n';\n }\n });\n\n delete results.organic[i].highlights;\n output += '\\n';\n }\n }\n\n // Ignoring these sections for now\n // // Top stories (news)\n // const topStores = results.topStories ?? [];\n // if (topStores.length) {\n // addSection('News Results');\n // topStores.forEach((r, i) => {\n // output += [\n // `Anchor: \\ue202turn0news${i}`,\n // `Title: ${r.title ?? '(no title)'}`,\n // `URL: ${r.link}`,\n // r.snippet != null ? `Snippet: ${r.snippet}` : '',\n // r.date != null ? `Date: ${r.date}` : '',\n // r.attribution != null ? `Source: ${r.attribution}` : '',\n // ''\n // ].filter(Boolean).join('\\n');\n // });\n // }\n\n // // Images\n // const images = results.images ?? [];\n // if (images.length) {\n // addSection('Image Results');\n // images.forEach((img, i) => {\n // output += [\n // `Anchor: \\ue202turn0image${i}`,\n // `Title: ${img.title ?? '(no title)'}`,\n // `Image URL: ${img.imageUrl}`,\n // ''\n // ].join('\\n');\n // });\n // }\n\n // Knowledge Graph\n if (results.knowledgeGraph != null) {\n addSection('Knowledge Graph');\n output += [\n `**Title:** ${results.knowledgeGraph.title ?? '(no title)'}`,\n results.knowledgeGraph.type != null\n ? `**Type:** ${results.knowledgeGraph.type}`\n : '',\n results.knowledgeGraph.description != null\n ? `**Description:** ${results.knowledgeGraph.description}`\n : '',\n results.knowledgeGraph.descriptionSource != null\n ? `**Description Source:** ${results.knowledgeGraph.descriptionSource}`\n : '',\n results.knowledgeGraph.descriptionLink != null\n ? `**Description Link:** ${results.knowledgeGraph.descriptionLink}`\n : '',\n results.knowledgeGraph.imageUrl != null\n ? `**Image URL:** ${results.knowledgeGraph.imageUrl}`\n : '',\n results.knowledgeGraph.website != null\n ? `**Website:** ${results.knowledgeGraph.website}`\n : '',\n results.knowledgeGraph.attributes != null\n ? `**Attributes:**\\n\\`\\`\\`json\\n${JSON.stringify(\n results.knowledgeGraph.attributes,\n null,\n 2\n )}\\n\\`\\`\\``\n : '',\n '',\n ]\n .filter(Boolean)\n .join('\\n\\n');\n }\n\n // Answer Box\n if (results.answerBox != null) {\n addSection('Answer Box');\n output += [\n results.answerBox.title != null\n ? `**Title:** ${results.answerBox.title}`\n : '',\n results.answerBox.snippet != null\n ? `**Snippet:** ${results.answerBox.snippet}`\n : '',\n results.answerBox.snippetHighlighted != null\n ? `**Snippet Highlighted:** ${results.answerBox.snippetHighlighted\n .map((s) => `\\`${s}\\``)\n .join(' ')}`\n : '',\n results.answerBox.link != null\n ? `**Link:** ${results.answerBox.link}`\n : '',\n '',\n ]\n .filter(Boolean)\n .join('\\n\\n');\n }\n\n // People also ask\n const peopleAlsoAsk = results.peopleAlsoAsk ?? [];\n if (peopleAlsoAsk.length) {\n addSection('People Also Ask');\n peopleAlsoAsk.forEach((p, i) => {\n output += [\n `### Question ${i + 1}:`,\n `\"${p.question}\"`,\n `${p.snippet != null && p.snippet ? `Snippet: ${p.snippet}}` : ''}`,\n `${p.title != null && p.title ? `Title: ${p.title}` : ''}`,\n `${p.link != null && p.link ? `Link: ${p.link}` : ''}`,\n '',\n ]\n .filter(Boolean)\n .join('\\n\\n');\n });\n }\n return {\n output: output.trim(),\n references,\n };\n}\n"],"names":["getDomainName"],"mappings":";;;;AAGgB,SAAA,mBAAmB,CACjC,IAAY,EACZ,OAA2B,EAAA;IAE3B,IAAI,MAAM,GAAG,EAAE;AAEf,IAAA,MAAM,UAAU,GAAG,CAAC,KAAa,KAAU;AACzC,QAAA,MAAM,IAAI,CAAA,MAAA,EAAS,KAAK,CAAA,MAAA,CAAQ;AAClC,KAAC;IAED,MAAM,UAAU,GAAwB,EAAE;;AAE1C,IAAA,IAAI,OAAO,CAAC,OAAO,EAAE,MAAM,IAAI,IAAI,IAAI,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE;AACjE,QAAA,UAAU,CAAC,CAAA,kBAAA,EAAqB,IAAI,CAAA,CAAE,CAAC;AACvC,QAAA,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YAC/C,MAAM,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC;AAC5B,YAAA,MAAM,IAAI;AACR,gBAAA,CAAA,SAAA,EAAY,CAAC,CAAM,GAAA,EAAA,CAAC,CAAC,KAAK,IAAI,YAAY,CAAG,CAAA,CAAA;gBAC7C,CAAsB,mBAAA,EAAA,IAAI,CAAS,MAAA,EAAA,CAAC,CAAE,CAAA;gBACtC,CAAQ,KAAA,EAAA,CAAC,CAAC,IAAI,CAAE,CAAA;AAChB,gBAAA,CAAC,CAAC,OAAO,IAAI,IAAI,GAAG,CAAY,SAAA,EAAA,CAAC,CAAC,OAAO,CAAA,CAAE,GAAG,EAAE;AAChD,gBAAA,CAAC,CAAC,IAAI,IAAI,IAAI,GAAG,CAAS,MAAA,EAAA,CAAC,CAAC,IAAI,CAAA,CAAE,GAAG,EAAE;AACvC,gBAAA,CAAC,CAAC,WAAW,IAAI,IAAI,GAAG,CAAW,QAAA,EAAA,CAAC,CAAC,WAAW,CAAA,CAAE,GAAG,EAAE;gBACvD,EAAE;gBACF,qBAAqB;gBACrB,EAAE;gBACF,EAAE;AACH;iBACE,MAAM,CAAC,OAAO;iBACd,IAAI,CAAC,IAAI,CAAC;AAEb,YAAA,CAAC,CAAC,CAAC,UAAU,IAAI,EAAE;AAChB,iBAAA,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC;AACtC,iBAAA,OAAO,CAAC,CAAC,CAAC,EAAE,MAAM,KAAI;AACrB,gBAAA,MAAM,IAAI,CAAA,cAAA,EAAiB,MAAM,GAAG,CAAC,CAAgB,aAAA,EAAA,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO;gBAC9E,MAAM,IAAI,WAAW,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,GAAG,WAAW;AAEnD,gBAAA,IAAI,CAAC,CAAC,UAAU,IAAI,IAAI,IAAI,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE;oBAC/C,MAAM,IAAI,oBAAoB;oBAC9B,MAAM,IAAI,CAAC,CAAC;AACT,yBAAA,GAAG,CAAC,CAAC,GAAG,KAAI;wBACX,UAAU,CAAC,IAAI,CAAC;AACd,4BAAA,IAAI,EAAE,GAAG,CAAC,SAAS,CAAC,WAAW;4BAC/B,WAAW,EAAEA,mBAAa,CAAC,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC;AACrD,4BAAA,KAAK,EAAE,CACL,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,KAAK,IAAI,EAAE,KAAK,GAAG,CAAC,SAAS,CAAC,IAAI;gCAClD,EAAE,EACF,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACjB,yBAAA,CAAC;wBACF,OAAO,CAAA,EAAA,EAAK,GAAG,CAAC,IAAI,CAAA,CAAA,EAAI,GAAG,CAAC,aAAa,GAAG,CAAC,CAAK,EAAA,EAAA,GAAG,CAAC,SAAS,CAAC,WAAW,CAAA,yBAAA,EAA4B,IAAI,CAAA,GAAA,EAAM,UAAU,CAAC,MAAM,GAAG,CAAC,CAAA,CAAE;AAC1I,qBAAC;yBACA,IAAI,CAAC,IAAI,CAAC;oBACb,MAAM,IAAI,MAAM;;AAGlB,gBAAA,IAAI,MAAM,GAAG,CAAC,CAAC,CAAC,UAAU,EAAE,MAAM,IAAI,CAAC,IAAI,CAAC,EAAE;oBAC5C,MAAM,IAAI,SAAS;;AAEvB,aAAC,CAAC;YAEJ,OAAO,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,UAAU;YACpC,MAAM,IAAI,IAAI;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAqClB,IAAA,IAAI,OAAO,CAAC,cAAc,IAAI,IAAI,EAAE;QAClC,UAAU,CAAC,iBAAiB,CAAC;AAC7B,QAAA,MAAM,IAAI;AACR,YAAA,CAAA,WAAA,EAAc,OAAO,CAAC,cAAc,CAAC,KAAK,IAAI,YAAY,CAAE,CAAA;AAC5D,YAAA,OAAO,CAAC,cAAc,CAAC,IAAI,IAAI;AAC7B,kBAAE,CAAa,UAAA,EAAA,OAAO,CAAC,cAAc,CAAC,IAAI,CAAE;AAC5C,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,WAAW,IAAI;AACpC,kBAAE,CAAoB,iBAAA,EAAA,OAAO,CAAC,cAAc,CAAC,WAAW,CAAE;AAC1D,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,iBAAiB,IAAI;AAC1C,kBAAE,CAA2B,wBAAA,EAAA,OAAO,CAAC,cAAc,CAAC,iBAAiB,CAAE;AACvE,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,eAAe,IAAI;AACxC,kBAAE,CAAyB,sBAAA,EAAA,OAAO,CAAC,cAAc,CAAC,eAAe,CAAE;AACnE,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,QAAQ,IAAI;AACjC,kBAAE,CAAkB,eAAA,EAAA,OAAO,CAAC,cAAc,CAAC,QAAQ,CAAE;AACrD,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,OAAO,IAAI;AAChC,kBAAE,CAAgB,aAAA,EAAA,OAAO,CAAC,cAAc,CAAC,OAAO,CAAE;AAClD,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,UAAU,IAAI;AACnC,kBAAE,CAAgC,6BAAA,EAAA,IAAI,CAAC,SAAS,CAC9C,OAAO,CAAC,cAAc,CAAC,UAAU,EACjC,IAAI,EACJ,CAAC,CACF,CAAU,QAAA;AACX,kBAAE,EAAE;YACN,EAAE;AACH;aACE,MAAM,CAAC,OAAO;aACd,IAAI,CAAC,MAAM,CAAC;;;AAIjB,IAAA,IAAI,OAAO,CAAC,SAAS,IAAI,IAAI,EAAE;QAC7B,UAAU,CAAC,YAAY,CAAC;AACxB,QAAA,MAAM,IAAI;AACR,YAAA,OAAO,CAAC,SAAS,CAAC,KAAK,IAAI;AACzB,kBAAE,CAAc,WAAA,EAAA,OAAO,CAAC,SAAS,CAAC,KAAK,CAAE;AACzC,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,SAAS,CAAC,OAAO,IAAI;AAC3B,kBAAE,CAAgB,aAAA,EAAA,OAAO,CAAC,SAAS,CAAC,OAAO,CAAE;AAC7C,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,SAAS,CAAC,kBAAkB,IAAI;AACtC,kBAAE,CAA4B,yBAAA,EAAA,OAAO,CAAC,SAAS,CAAC;qBAC7C,GAAG,CAAC,CAAC,CAAC,KAAK,CAAA,EAAA,EAAK,CAAC,CAAA,EAAA,CAAI;qBACrB,IAAI,CAAC,GAAG,CAAC,CAAE;AACd,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,SAAS,CAAC,IAAI,IAAI;AACxB,kBAAE,CAAa,UAAA,EAAA,OAAO,CAAC,SAAS,CAAC,IAAI,CAAE;AACvC,kBAAE,EAAE;YACN,EAAE;AACH;aACE,MAAM,CAAC,OAAO;aACd,IAAI,CAAC,MAAM,CAAC;;;AAIjB,IAAA,MAAM,aAAa,GAAG,OAAO,CAAC,aAAa,IAAI,EAAE;AACjD,IAAA,IAAI,aAAa,CAAC,MAAM,EAAE;QACxB,UAAU,CAAC,iBAAiB,CAAC;QAC7B,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,KAAI;AAC7B,YAAA,MAAM,IAAI;gBACR,CAAgB,aAAA,EAAA,CAAC,GAAG,CAAC,CAAG,CAAA,CAAA;gBACxB,CAAI,CAAA,EAAA,CAAC,CAAC,QAAQ,CAAG,CAAA,CAAA;gBACjB,CAAG,EAAA,CAAC,CAAC,OAAO,IAAI,IAAI,IAAI,CAAC,CAAC,OAAO,GAAG,CAAA,SAAA,EAAY,CAAC,CAAC,OAAO,GAAG,GAAG,EAAE,CAAE,CAAA;gBACnE,CAAG,EAAA,CAAC,CAAC,KAAK,IAAI,IAAI,IAAI,CAAC,CAAC,KAAK,GAAG,CAAA,OAAA,EAAU,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,CAAE,CAAA;gBAC1D,CAAG,EAAA,CAAC,CAAC,IAAI,IAAI,IAAI,IAAI,CAAC,CAAC,IAAI,GAAG,CAAA,MAAA,EAAS,CAAC,CAAC,IAAI,EAAE,GAAG,EAAE,CAAE,CAAA;gBACtD,EAAE;AACH;iBACE,MAAM,CAAC,OAAO;iBACd,IAAI,CAAC,MAAM,CAAC;AACjB,SAAC,CAAC;;IAEJ,OAAO;AACL,QAAA,MAAM,EAAE,MAAM,CAAC,IAAI,EAAE;QACrB,UAAU;KACX;AACH;;;;"}
|
|
@@ -103,6 +103,57 @@ function findBestBoundary(text, direction = 'backward') {
|
|
|
103
103
|
// No match found, use character boundary
|
|
104
104
|
return direction === 'backward' ? text.length : 0;
|
|
105
105
|
}
|
|
106
|
+
/**
|
|
107
|
+
* Tracks references used in a highlight without changing their numbers
|
|
108
|
+
*/
|
|
109
|
+
function trackReferencesInHighlight(text, sourceResult // Source containing the original references
|
|
110
|
+
) {
|
|
111
|
+
// Track used references
|
|
112
|
+
const references = [];
|
|
113
|
+
if (!text || text.length === 0 || !text.includes('#')) {
|
|
114
|
+
return { references }; // Early return
|
|
115
|
+
}
|
|
116
|
+
// Quick check for reference markers
|
|
117
|
+
if (!text.includes('link#') &&
|
|
118
|
+
!text.includes('image#') &&
|
|
119
|
+
!text.includes('video#')) {
|
|
120
|
+
return { references };
|
|
121
|
+
}
|
|
122
|
+
// Get references from the source if available
|
|
123
|
+
const sourceRefs = sourceResult.references || {
|
|
124
|
+
links: [],
|
|
125
|
+
images: [],
|
|
126
|
+
videos: [],
|
|
127
|
+
};
|
|
128
|
+
// Find references but don't modify text
|
|
129
|
+
const refRegex = /\((link|image|video)#(\d+)(?:\s+"([^"]*)")?\)/g;
|
|
130
|
+
let match;
|
|
131
|
+
while ((match = refRegex.exec(text)) !== null) {
|
|
132
|
+
const [, type, indexStr] = match;
|
|
133
|
+
const originalIndex = parseInt(indexStr, 10) - 1; // Convert to 0-based
|
|
134
|
+
// Get the source array for this type
|
|
135
|
+
const refType = type;
|
|
136
|
+
const sourceArray = sourceRefs[`${refType}s`];
|
|
137
|
+
// Skip if invalid reference
|
|
138
|
+
if (!sourceArray ||
|
|
139
|
+
originalIndex < 0 ||
|
|
140
|
+
originalIndex >= sourceArray.length) {
|
|
141
|
+
continue; // Skip invalid references
|
|
142
|
+
}
|
|
143
|
+
// Get original reference
|
|
144
|
+
const reference = sourceArray[originalIndex];
|
|
145
|
+
// Track if not already tracked
|
|
146
|
+
const alreadyTracked = references.some((ref) => ref.type === refType && ref.originalIndex === originalIndex);
|
|
147
|
+
if (!alreadyTracked) {
|
|
148
|
+
references.push({
|
|
149
|
+
type: refType,
|
|
150
|
+
originalIndex,
|
|
151
|
+
reference,
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
return { references };
|
|
156
|
+
}
|
|
106
157
|
/**
|
|
107
158
|
* Expand highlights in search results using smart boundary detection.
|
|
108
159
|
*
|
|
@@ -112,19 +163,16 @@ function findBestBoundary(text, direction = 'backward') {
|
|
|
112
163
|
* @param searchResults - Search results object
|
|
113
164
|
* @param mainExpandBy - Primary expansion size on each side (default: 300)
|
|
114
165
|
* @param separatorExpandBy - Additional range to look for separators (default: 150)
|
|
115
|
-
* @returns Copy of search results with expanded highlights
|
|
166
|
+
* @returns Copy of search results with expanded highlights and tracked references
|
|
116
167
|
*/
|
|
117
168
|
function expandHighlights(searchResults, mainExpandBy = 300, separatorExpandBy = 150) {
|
|
118
|
-
//
|
|
169
|
+
// Avoid deep copy - only copy what we modify
|
|
119
170
|
const resultCopy = { ...searchResults };
|
|
120
|
-
|
|
121
|
-
if (resultCopy.organic) {
|
|
171
|
+
if (resultCopy.organic)
|
|
122
172
|
resultCopy.organic = [...resultCopy.organic];
|
|
123
|
-
|
|
124
|
-
if (resultCopy.topStories) {
|
|
173
|
+
if (resultCopy.topStories)
|
|
125
174
|
resultCopy.topStories = [...resultCopy.topStories];
|
|
126
|
-
|
|
127
|
-
// 5. Process the results efficiently
|
|
175
|
+
// Process the results efficiently
|
|
128
176
|
const processResultTypes = ['organic', 'topStories'];
|
|
129
177
|
for (const resultType of processResultTypes) {
|
|
130
178
|
if (!resultCopy[resultType])
|
|
@@ -143,17 +191,18 @@ function expandHighlights(searchResults, mainExpandBy = 300, separatorExpandBy =
|
|
|
143
191
|
const highlights = [];
|
|
144
192
|
// Process each highlight
|
|
145
193
|
for (const highlight of result.highlights) {
|
|
146
|
-
const
|
|
147
|
-
let startPos = content.indexOf(
|
|
148
|
-
let highlightLen =
|
|
194
|
+
const { references } = trackReferencesInHighlight(highlight.text, result);
|
|
195
|
+
let startPos = content.indexOf(highlight.text);
|
|
196
|
+
let highlightLen = highlight.text.length;
|
|
149
197
|
if (startPos === -1) {
|
|
150
198
|
// Try with stripped whitespace
|
|
151
|
-
const strippedHighlight =
|
|
199
|
+
const strippedHighlight = highlight.text.trim();
|
|
152
200
|
startPos = content.indexOf(strippedHighlight);
|
|
153
201
|
if (startPos === -1) {
|
|
154
202
|
highlights.push({
|
|
155
203
|
text: highlight.text,
|
|
156
204
|
score: highlight.score,
|
|
205
|
+
references,
|
|
157
206
|
});
|
|
158
207
|
continue;
|
|
159
208
|
}
|
|
@@ -180,10 +229,12 @@ function expandHighlights(searchResults, mainExpandBy = 300, separatorExpandBy =
|
|
|
180
229
|
highlights.push({
|
|
181
230
|
text: expandedHighlightText,
|
|
182
231
|
score: highlight.score,
|
|
232
|
+
references,
|
|
183
233
|
});
|
|
184
234
|
}
|
|
185
|
-
delete resultCopy.content;
|
|
186
235
|
resultCopy.highlights = highlights;
|
|
236
|
+
delete resultCopy.content;
|
|
237
|
+
delete resultCopy.references;
|
|
187
238
|
return resultCopy;
|
|
188
239
|
});
|
|
189
240
|
}
|