@librechat/agents 2.4.319 → 2.4.321
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/tools/search/format.cjs +111 -80
- package/dist/cjs/tools/search/format.cjs.map +1 -1
- package/dist/cjs/tools/search/search.cjs +83 -37
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +83 -57
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/search/utils.cjs +3 -1
- package/dist/cjs/tools/search/utils.cjs.map +1 -1
- package/dist/esm/tools/search/format.mjs +111 -80
- package/dist/esm/tools/search/format.mjs.map +1 -1
- package/dist/esm/tools/search/search.mjs +83 -37
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +82 -56
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/search/utils.mjs +3 -1
- package/dist/esm/tools/search/utils.mjs.map +1 -1
- package/dist/types/tools/search/search.d.ts +1 -1
- package/dist/types/tools/search/types.d.ts +22 -1
- package/package.json +1 -1
- package/src/scripts/search.ts +4 -1
- package/src/tools/search/format.ts +149 -86
- package/src/tools/search/search.ts +120 -47
- package/src/tools/search/tool.ts +137 -89
- package/src/tools/search/types.ts +30 -1
- package/src/tools/search/utils.ts +5 -1
|
@@ -2,10 +2,86 @@
|
|
|
2
2
|
|
|
3
3
|
var utils = require('./utils.cjs');
|
|
4
4
|
|
|
5
|
+
function addHighlightSection() {
|
|
6
|
+
return ['\n## Highlights', ''];
|
|
7
|
+
}
|
|
8
|
+
// Helper function to format a source (organic or top story)
|
|
9
|
+
function formatSource(source, index, turn, sourceType, references) {
|
|
10
|
+
/** Array of all lines to include in the output */
|
|
11
|
+
const outputLines = [];
|
|
12
|
+
// Add the title
|
|
13
|
+
outputLines.push(`# ${sourceType.charAt(0).toUpperCase() + sourceType.slice(1)} ${index}: ${source.title != null && source.title ? `"${source.title}"` : '(no title)'}`);
|
|
14
|
+
outputLines.push(`\nAnchor: \\ue202turn${turn}${sourceType}${index}`);
|
|
15
|
+
outputLines.push(`URL: ${source.link}`);
|
|
16
|
+
// Add optional fields
|
|
17
|
+
if ('snippet' in source && source.snippet != null) {
|
|
18
|
+
outputLines.push(`Summary: ${source.snippet}`);
|
|
19
|
+
}
|
|
20
|
+
if (source.date != null) {
|
|
21
|
+
outputLines.push(`Date: ${source.date}`);
|
|
22
|
+
}
|
|
23
|
+
if (source.attribution != null) {
|
|
24
|
+
outputLines.push(`Source: ${source.attribution}`);
|
|
25
|
+
}
|
|
26
|
+
// Add highlight section or empty line
|
|
27
|
+
if ((source.highlights?.length ?? 0) > 0) {
|
|
28
|
+
outputLines.push(...addHighlightSection());
|
|
29
|
+
}
|
|
30
|
+
else {
|
|
31
|
+
outputLines.push('');
|
|
32
|
+
}
|
|
33
|
+
// Process highlights if they exist
|
|
34
|
+
(source.highlights ?? [])
|
|
35
|
+
.filter((h) => h.text.trim().length > 0)
|
|
36
|
+
.forEach((h, hIndex) => {
|
|
37
|
+
outputLines.push(`### Highlight ${hIndex + 1} [Relevance: ${h.score.toFixed(2)}]`);
|
|
38
|
+
outputLines.push('');
|
|
39
|
+
outputLines.push('```text');
|
|
40
|
+
outputLines.push(h.text.trim());
|
|
41
|
+
outputLines.push('```');
|
|
42
|
+
outputLines.push('');
|
|
43
|
+
if (h.references != null && h.references.length) {
|
|
44
|
+
let hasHeader = false;
|
|
45
|
+
const refLines = [];
|
|
46
|
+
for (let j = 0; j < h.references.length; j++) {
|
|
47
|
+
const ref = h.references[j];
|
|
48
|
+
references.push({
|
|
49
|
+
type: ref.type,
|
|
50
|
+
link: ref.reference.originalUrl,
|
|
51
|
+
attribution: utils.getDomainName(ref.reference.originalUrl),
|
|
52
|
+
title: (((ref.reference.title ?? '') || ref.reference.text) ??
|
|
53
|
+
'').split('\n')[0],
|
|
54
|
+
});
|
|
55
|
+
if (ref.type !== 'link') {
|
|
56
|
+
continue;
|
|
57
|
+
}
|
|
58
|
+
if (!hasHeader) {
|
|
59
|
+
refLines.push('Core References:');
|
|
60
|
+
hasHeader = true;
|
|
61
|
+
}
|
|
62
|
+
refLines.push(`- ${ref.type}#${ref.originalIndex + 1}: ${ref.reference.originalUrl}`);
|
|
63
|
+
refLines.push(`\t- Anchor: \\ue202turn${turn}ref${references.length - 1}`);
|
|
64
|
+
}
|
|
65
|
+
if (hasHeader) {
|
|
66
|
+
outputLines.push(...refLines);
|
|
67
|
+
outputLines.push('');
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
if (hIndex < (source.highlights?.length ?? 0) - 1) {
|
|
71
|
+
outputLines.push('---');
|
|
72
|
+
outputLines.push('');
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
outputLines.push('');
|
|
76
|
+
return outputLines.join('\n');
|
|
77
|
+
}
|
|
5
78
|
function formatResultsForLLM(turn, results) {
|
|
6
|
-
|
|
79
|
+
/** Array to collect all output lines */
|
|
80
|
+
const outputLines = [];
|
|
7
81
|
const addSection = (title) => {
|
|
8
|
-
|
|
82
|
+
outputLines.push('');
|
|
83
|
+
outputLines.push(`=== ${title} ===`);
|
|
84
|
+
outputLines.push('');
|
|
9
85
|
};
|
|
10
86
|
const references = [];
|
|
11
87
|
// Organic (web) results
|
|
@@ -13,82 +89,38 @@ function formatResultsForLLM(turn, results) {
|
|
|
13
89
|
addSection(`Web Results, Turn ${turn}`);
|
|
14
90
|
for (let i = 0; i < results.organic.length; i++) {
|
|
15
91
|
const r = results.organic[i];
|
|
16
|
-
|
|
17
|
-
`# Source ${i}: "${r.title ?? '(no title)'}"`,
|
|
18
|
-
`Anchor: \\ue202turn${turn}search${i}`,
|
|
19
|
-
`URL: ${r.link}`,
|
|
20
|
-
r.snippet != null ? `Summary: ${r.snippet}` : '',
|
|
21
|
-
r.date != null ? `Date: ${r.date}` : '',
|
|
22
|
-
r.attribution != null ? `Source: ${r.attribution}` : '',
|
|
23
|
-
'',
|
|
24
|
-
'\n## Highlights\n\n',
|
|
25
|
-
'',
|
|
26
|
-
'',
|
|
27
|
-
]
|
|
28
|
-
.filter(Boolean)
|
|
29
|
-
.join('\n');
|
|
30
|
-
(r.highlights ?? [])
|
|
31
|
-
.filter((h) => h.text.trim().length > 0)
|
|
32
|
-
.forEach((h, hIndex) => {
|
|
33
|
-
output += `### Highlight ${hIndex + 1} [Relevance: ${h.score.toFixed(2)}]\n\n`;
|
|
34
|
-
output += '```text\n' + h.text.trim() + '\n```\n\n';
|
|
35
|
-
if (h.references != null && h.references.length) {
|
|
36
|
-
output += 'Core References:\n';
|
|
37
|
-
output += h.references
|
|
38
|
-
.map((ref) => {
|
|
39
|
-
references.push({
|
|
40
|
-
link: ref.reference.originalUrl,
|
|
41
|
-
attribution: utils.getDomainName(ref.reference.originalUrl),
|
|
42
|
-
title: (((ref.reference.title ?? '') || ref.reference.text) ??
|
|
43
|
-
'').split('\n')[0],
|
|
44
|
-
});
|
|
45
|
-
return `- ${ref.type}#${ref.originalIndex + 1}: ${ref.reference.originalUrl}\n\t- Anchor: \\ue202turn${turn}ref${references.length - 1}`;
|
|
46
|
-
})
|
|
47
|
-
.join('\n');
|
|
48
|
-
output += '\n\n';
|
|
49
|
-
}
|
|
50
|
-
if (hIndex < (r.highlights?.length ?? 0) - 1) {
|
|
51
|
-
output += '---\n\n';
|
|
52
|
-
}
|
|
53
|
-
});
|
|
92
|
+
outputLines.push(formatSource(r, i, turn, 'search', references));
|
|
54
93
|
delete results.organic[i].highlights;
|
|
55
|
-
output += '\n';
|
|
56
94
|
}
|
|
57
95
|
}
|
|
58
|
-
//
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
// r.attribution != null ? `Source: ${r.attribution}` : '',
|
|
71
|
-
// ''
|
|
72
|
-
// ].filter(Boolean).join('\n');
|
|
73
|
-
// });
|
|
74
|
-
// }
|
|
96
|
+
// Top stories (news)
|
|
97
|
+
const topStories = results.topStories ?? [];
|
|
98
|
+
if (topStories.length) {
|
|
99
|
+
addSection('News Results');
|
|
100
|
+
for (let i = 0; i < topStories.length; i++) {
|
|
101
|
+
const r = topStories[i];
|
|
102
|
+
outputLines.push(formatSource(r, i, turn, 'news', references));
|
|
103
|
+
if (results.topStories?.[i]?.highlights) {
|
|
104
|
+
delete results.topStories[i].highlights;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
75
108
|
// // Images
|
|
76
109
|
// const images = results.images ?? [];
|
|
77
110
|
// if (images.length) {
|
|
78
111
|
// addSection('Image Results');
|
|
79
|
-
// images.
|
|
80
|
-
//
|
|
81
|
-
//
|
|
82
|
-
//
|
|
83
|
-
//
|
|
84
|
-
//
|
|
85
|
-
//
|
|
86
|
-
// });
|
|
112
|
+
// const imageLines = images.map((img, i) => [
|
|
113
|
+
// `Anchor: \ue202turn0image${i}`,
|
|
114
|
+
// `Title: ${img.title ?? '(no title)'}`,
|
|
115
|
+
// `Image URL: ${img.imageUrl}`,
|
|
116
|
+
// ''
|
|
117
|
+
// ].join('\n'));
|
|
118
|
+
// outputLines.push(imageLines.join('\n'));
|
|
87
119
|
// }
|
|
88
120
|
// Knowledge Graph
|
|
89
121
|
if (results.knowledgeGraph != null) {
|
|
90
122
|
addSection('Knowledge Graph');
|
|
91
|
-
|
|
123
|
+
const kgLines = [
|
|
92
124
|
`**Title:** ${results.knowledgeGraph.title ?? '(no title)'}`,
|
|
93
125
|
results.knowledgeGraph.type != null
|
|
94
126
|
? `**Type:** ${results.knowledgeGraph.type}`
|
|
@@ -112,14 +144,13 @@ function formatResultsForLLM(turn, results) {
|
|
|
112
144
|
? `**Attributes:**\n\`\`\`json\n${JSON.stringify(results.knowledgeGraph.attributes, null, 2)}\n\`\`\``
|
|
113
145
|
: '',
|
|
114
146
|
'',
|
|
115
|
-
]
|
|
116
|
-
|
|
117
|
-
.join('\n\n');
|
|
147
|
+
].filter(Boolean);
|
|
148
|
+
outputLines.push(kgLines.join('\n\n'));
|
|
118
149
|
}
|
|
119
150
|
// Answer Box
|
|
120
151
|
if (results.answerBox != null) {
|
|
121
152
|
addSection('Answer Box');
|
|
122
|
-
|
|
153
|
+
const abLines = [
|
|
123
154
|
results.answerBox.title != null
|
|
124
155
|
? `**Title:** ${results.answerBox.title}`
|
|
125
156
|
: '',
|
|
@@ -135,29 +166,29 @@ function formatResultsForLLM(turn, results) {
|
|
|
135
166
|
? `**Link:** ${results.answerBox.link}`
|
|
136
167
|
: '',
|
|
137
168
|
'',
|
|
138
|
-
]
|
|
139
|
-
|
|
140
|
-
.join('\n\n');
|
|
169
|
+
].filter(Boolean);
|
|
170
|
+
outputLines.push(abLines.join('\n\n'));
|
|
141
171
|
}
|
|
142
172
|
// People also ask
|
|
143
173
|
const peopleAlsoAsk = results.peopleAlsoAsk ?? [];
|
|
144
174
|
if (peopleAlsoAsk.length) {
|
|
145
175
|
addSection('People Also Ask');
|
|
176
|
+
const paaLines = [];
|
|
146
177
|
peopleAlsoAsk.forEach((p, i) => {
|
|
147
|
-
|
|
178
|
+
const questionLines = [
|
|
148
179
|
`### Question ${i + 1}:`,
|
|
149
180
|
`"${p.question}"`,
|
|
150
|
-
`${p.snippet != null && p.snippet ? `Snippet: ${p.snippet}
|
|
181
|
+
`${p.snippet != null && p.snippet ? `Snippet: ${p.snippet}` : ''}`,
|
|
151
182
|
`${p.title != null && p.title ? `Title: ${p.title}` : ''}`,
|
|
152
183
|
`${p.link != null && p.link ? `Link: ${p.link}` : ''}`,
|
|
153
184
|
'',
|
|
154
|
-
]
|
|
155
|
-
|
|
156
|
-
.join('\n\n');
|
|
185
|
+
].filter(Boolean);
|
|
186
|
+
paaLines.push(questionLines.join('\n\n'));
|
|
157
187
|
});
|
|
188
|
+
outputLines.push(paaLines.join(''));
|
|
158
189
|
}
|
|
159
190
|
return {
|
|
160
|
-
output:
|
|
191
|
+
output: outputLines.join('\n').trim(),
|
|
161
192
|
references,
|
|
162
193
|
};
|
|
163
194
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"format.cjs","sources":["../../../../src/tools/search/format.ts"],"sourcesContent":["import type * as t from './types';\nimport { getDomainName } from './utils';\n\nexport function formatResultsForLLM(\n turn: number,\n results: t.SearchResultData\n): { output: string; references: t.ResultReference[] } {\n let output = '';\n\n const addSection = (title: string): void => {\n output += `\\n=== ${title} ===\\n`;\n };\n\n const references: t.ResultReference[] = [];\n // Organic (web) results\n if (results.organic?.length != null && results.organic.length > 0) {\n addSection(`Web Results, Turn ${turn}`);\n for (let i = 0; i < results.organic.length; i++) {\n const r = results.organic[i];\n output += [\n `# Source ${i}: \"${r.title ?? '(no title)'}\"`,\n `Anchor: \\\\ue202turn${turn}search${i}`,\n `URL: ${r.link}`,\n r.snippet != null ? `Summary: ${r.snippet}` : '',\n r.date != null ? `Date: ${r.date}` : '',\n r.attribution != null ? `Source: ${r.attribution}` : '',\n '',\n '\\n## Highlights\\n\\n',\n '',\n '',\n ]\n .filter(Boolean)\n .join('\\n');\n\n (r.highlights ?? [])\n .filter((h) => h.text.trim().length > 0)\n .forEach((h, hIndex) => {\n output += `### Highlight ${hIndex + 1} [Relevance: ${h.score.toFixed(2)}]\\n\\n`;\n output += '```text\\n' + h.text.trim() + '\\n```\\n\\n';\n\n if (h.references != null && h.references.length) {\n output += 'Core References:\\n';\n output += h.references\n .map((ref) => {\n references.push({\n link: ref.reference.originalUrl,\n attribution: getDomainName(ref.reference.originalUrl),\n title: (\n ((ref.reference.title ?? '') || ref.reference.text) ??\n ''\n ).split('\\n')[0],\n });\n return `- ${ref.type}#${ref.originalIndex + 1}: ${ref.reference.originalUrl}\\n\\t- Anchor: \\\\ue202turn${turn}ref${references.length - 1}`;\n })\n .join('\\n');\n output += '\\n\\n';\n }\n\n if (hIndex < (r.highlights?.length ?? 0) - 1) {\n output += '---\\n\\n';\n }\n });\n\n delete results.organic[i].highlights;\n output += '\\n';\n }\n }\n\n // Ignoring these sections for now\n // // Top stories (news)\n // const topStores = results.topStories ?? [];\n // if (topStores.length) {\n // addSection('News Results');\n // topStores.forEach((r, i) => {\n // output += [\n // `Anchor: \\ue202turn0news${i}`,\n // `Title: ${r.title ?? '(no title)'}`,\n // `URL: ${r.link}`,\n // r.snippet != null ? `Snippet: ${r.snippet}` : '',\n // r.date != null ? `Date: ${r.date}` : '',\n // r.attribution != null ? `Source: ${r.attribution}` : '',\n // ''\n // ].filter(Boolean).join('\\n');\n // });\n // }\n\n // // Images\n // const images = results.images ?? [];\n // if (images.length) {\n // addSection('Image Results');\n // images.forEach((img, i) => {\n // output += [\n // `Anchor: \\ue202turn0image${i}`,\n // `Title: ${img.title ?? '(no title)'}`,\n // `Image URL: ${img.imageUrl}`,\n // ''\n // ].join('\\n');\n // });\n // }\n\n // Knowledge Graph\n if (results.knowledgeGraph != null) {\n addSection('Knowledge Graph');\n output += [\n `**Title:** ${results.knowledgeGraph.title ?? '(no title)'}`,\n results.knowledgeGraph.type != null\n ? `**Type:** ${results.knowledgeGraph.type}`\n : '',\n results.knowledgeGraph.description != null\n ? `**Description:** ${results.knowledgeGraph.description}`\n : '',\n results.knowledgeGraph.descriptionSource != null\n ? `**Description Source:** ${results.knowledgeGraph.descriptionSource}`\n : '',\n results.knowledgeGraph.descriptionLink != null\n ? `**Description Link:** ${results.knowledgeGraph.descriptionLink}`\n : '',\n results.knowledgeGraph.imageUrl != null\n ? `**Image URL:** ${results.knowledgeGraph.imageUrl}`\n : '',\n results.knowledgeGraph.website != null\n ? `**Website:** ${results.knowledgeGraph.website}`\n : '',\n results.knowledgeGraph.attributes != null\n ? `**Attributes:**\\n\\`\\`\\`json\\n${JSON.stringify(\n results.knowledgeGraph.attributes,\n null,\n 2\n )}\\n\\`\\`\\``\n : '',\n '',\n ]\n .filter(Boolean)\n .join('\\n\\n');\n }\n\n // Answer Box\n if (results.answerBox != null) {\n addSection('Answer Box');\n output += [\n results.answerBox.title != null\n ? `**Title:** ${results.answerBox.title}`\n : '',\n results.answerBox.snippet != null\n ? `**Snippet:** ${results.answerBox.snippet}`\n : '',\n results.answerBox.snippetHighlighted != null\n ? `**Snippet Highlighted:** ${results.answerBox.snippetHighlighted\n .map((s) => `\\`${s}\\``)\n .join(' ')}`\n : '',\n results.answerBox.link != null\n ? `**Link:** ${results.answerBox.link}`\n : '',\n '',\n ]\n .filter(Boolean)\n .join('\\n\\n');\n }\n\n // People also ask\n const peopleAlsoAsk = results.peopleAlsoAsk ?? [];\n if (peopleAlsoAsk.length) {\n addSection('People Also Ask');\n peopleAlsoAsk.forEach((p, i) => {\n output += [\n `### Question ${i + 1}:`,\n `\"${p.question}\"`,\n `${p.snippet != null && p.snippet ? `Snippet: ${p.snippet}}` : ''}`,\n `${p.title != null && p.title ? `Title: ${p.title}` : ''}`,\n `${p.link != null && p.link ? `Link: ${p.link}` : ''}`,\n '',\n ]\n .filter(Boolean)\n .join('\\n\\n');\n });\n }\n return {\n output: output.trim(),\n references,\n };\n}\n"],"names":["getDomainName"],"mappings":";;;;AAGgB,SAAA,mBAAmB,CACjC,IAAY,EACZ,OAA2B,EAAA;IAE3B,IAAI,MAAM,GAAG,EAAE;AAEf,IAAA,MAAM,UAAU,GAAG,CAAC,KAAa,KAAU;AACzC,QAAA,MAAM,IAAI,CAAA,MAAA,EAAS,KAAK,CAAA,MAAA,CAAQ;AAClC,KAAC;IAED,MAAM,UAAU,GAAwB,EAAE;;AAE1C,IAAA,IAAI,OAAO,CAAC,OAAO,EAAE,MAAM,IAAI,IAAI,IAAI,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE;AACjE,QAAA,UAAU,CAAC,CAAA,kBAAA,EAAqB,IAAI,CAAA,CAAE,CAAC;AACvC,QAAA,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YAC/C,MAAM,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC;AAC5B,YAAA,MAAM,IAAI;AACR,gBAAA,CAAA,SAAA,EAAY,CAAC,CAAM,GAAA,EAAA,CAAC,CAAC,KAAK,IAAI,YAAY,CAAG,CAAA,CAAA;gBAC7C,CAAsB,mBAAA,EAAA,IAAI,CAAS,MAAA,EAAA,CAAC,CAAE,CAAA;gBACtC,CAAQ,KAAA,EAAA,CAAC,CAAC,IAAI,CAAE,CAAA;AAChB,gBAAA,CAAC,CAAC,OAAO,IAAI,IAAI,GAAG,CAAY,SAAA,EAAA,CAAC,CAAC,OAAO,CAAA,CAAE,GAAG,EAAE;AAChD,gBAAA,CAAC,CAAC,IAAI,IAAI,IAAI,GAAG,CAAS,MAAA,EAAA,CAAC,CAAC,IAAI,CAAA,CAAE,GAAG,EAAE;AACvC,gBAAA,CAAC,CAAC,WAAW,IAAI,IAAI,GAAG,CAAW,QAAA,EAAA,CAAC,CAAC,WAAW,CAAA,CAAE,GAAG,EAAE;gBACvD,EAAE;gBACF,qBAAqB;gBACrB,EAAE;gBACF,EAAE;AACH;iBACE,MAAM,CAAC,OAAO;iBACd,IAAI,CAAC,IAAI,CAAC;AAEb,YAAA,CAAC,CAAC,CAAC,UAAU,IAAI,EAAE;AAChB,iBAAA,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC;AACtC,iBAAA,OAAO,CAAC,CAAC,CAAC,EAAE,MAAM,KAAI;AACrB,gBAAA,MAAM,IAAI,CAAA,cAAA,EAAiB,MAAM,GAAG,CAAC,CAAgB,aAAA,EAAA,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO;gBAC9E,MAAM,IAAI,WAAW,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,GAAG,WAAW;AAEnD,gBAAA,IAAI,CAAC,CAAC,UAAU,IAAI,IAAI,IAAI,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE;oBAC/C,MAAM,IAAI,oBAAoB;oBAC9B,MAAM,IAAI,CAAC,CAAC;AACT,yBAAA,GAAG,CAAC,CAAC,GAAG,KAAI;wBACX,UAAU,CAAC,IAAI,CAAC;AACd,4BAAA,IAAI,EAAE,GAAG,CAAC,SAAS,CAAC,WAAW;4BAC/B,WAAW,EAAEA,mBAAa,CAAC,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC;AACrD,4BAAA,KAAK,EAAE,CACL,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,KAAK,IAAI,EAAE,KAAK,GAAG,CAAC,SAAS,CAAC,IAAI;gCAClD,EAAE,EACF,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACjB,yBAAA,CAAC;wBACF,OAAO,CAAA,EAAA,EAAK,GAAG,CAAC,IAAI,CAAA,CAAA,EAAI,GAAG,CAAC,aAAa,GAAG,CAAC,CAAK,EAAA,EAAA,GAAG,CAAC,SAAS,CAAC,WAAW,CAAA,yBAAA,EAA4B,IAAI,CAAA,GAAA,EAAM,UAAU,CAAC,MAAM,GAAG,CAAC,CAAA,CAAE;AAC1I,qBAAC;yBACA,IAAI,CAAC,IAAI,CAAC;oBACb,MAAM,IAAI,MAAM;;AAGlB,gBAAA,IAAI,MAAM,GAAG,CAAC,CAAC,CAAC,UAAU,EAAE,MAAM,IAAI,CAAC,IAAI,CAAC,EAAE;oBAC5C,MAAM,IAAI,SAAS;;AAEvB,aAAC,CAAC;YAEJ,OAAO,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,UAAU;YACpC,MAAM,IAAI,IAAI;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAqClB,IAAA,IAAI,OAAO,CAAC,cAAc,IAAI,IAAI,EAAE;QAClC,UAAU,CAAC,iBAAiB,CAAC;AAC7B,QAAA,MAAM,IAAI;AACR,YAAA,CAAA,WAAA,EAAc,OAAO,CAAC,cAAc,CAAC,KAAK,IAAI,YAAY,CAAE,CAAA;AAC5D,YAAA,OAAO,CAAC,cAAc,CAAC,IAAI,IAAI;AAC7B,kBAAE,CAAa,UAAA,EAAA,OAAO,CAAC,cAAc,CAAC,IAAI,CAAE;AAC5C,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,WAAW,IAAI;AACpC,kBAAE,CAAoB,iBAAA,EAAA,OAAO,CAAC,cAAc,CAAC,WAAW,CAAE;AAC1D,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,iBAAiB,IAAI;AAC1C,kBAAE,CAA2B,wBAAA,EAAA,OAAO,CAAC,cAAc,CAAC,iBAAiB,CAAE;AACvE,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,eAAe,IAAI;AACxC,kBAAE,CAAyB,sBAAA,EAAA,OAAO,CAAC,cAAc,CAAC,eAAe,CAAE;AACnE,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,QAAQ,IAAI;AACjC,kBAAE,CAAkB,eAAA,EAAA,OAAO,CAAC,cAAc,CAAC,QAAQ,CAAE;AACrD,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,OAAO,IAAI;AAChC,kBAAE,CAAgB,aAAA,EAAA,OAAO,CAAC,cAAc,CAAC,OAAO,CAAE;AAClD,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,UAAU,IAAI;AACnC,kBAAE,CAAgC,6BAAA,EAAA,IAAI,CAAC,SAAS,CAC9C,OAAO,CAAC,cAAc,CAAC,UAAU,EACjC,IAAI,EACJ,CAAC,CACF,CAAU,QAAA;AACX,kBAAE,EAAE;YACN,EAAE;AACH;aACE,MAAM,CAAC,OAAO;aACd,IAAI,CAAC,MAAM,CAAC;;;AAIjB,IAAA,IAAI,OAAO,CAAC,SAAS,IAAI,IAAI,EAAE;QAC7B,UAAU,CAAC,YAAY,CAAC;AACxB,QAAA,MAAM,IAAI;AACR,YAAA,OAAO,CAAC,SAAS,CAAC,KAAK,IAAI;AACzB,kBAAE,CAAc,WAAA,EAAA,OAAO,CAAC,SAAS,CAAC,KAAK,CAAE;AACzC,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,SAAS,CAAC,OAAO,IAAI;AAC3B,kBAAE,CAAgB,aAAA,EAAA,OAAO,CAAC,SAAS,CAAC,OAAO,CAAE;AAC7C,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,SAAS,CAAC,kBAAkB,IAAI;AACtC,kBAAE,CAA4B,yBAAA,EAAA,OAAO,CAAC,SAAS,CAAC;qBAC7C,GAAG,CAAC,CAAC,CAAC,KAAK,CAAA,EAAA,EAAK,CAAC,CAAA,EAAA,CAAI;qBACrB,IAAI,CAAC,GAAG,CAAC,CAAE;AACd,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,SAAS,CAAC,IAAI,IAAI;AACxB,kBAAE,CAAa,UAAA,EAAA,OAAO,CAAC,SAAS,CAAC,IAAI,CAAE;AACvC,kBAAE,EAAE;YACN,EAAE;AACH;aACE,MAAM,CAAC,OAAO;aACd,IAAI,CAAC,MAAM,CAAC;;;AAIjB,IAAA,MAAM,aAAa,GAAG,OAAO,CAAC,aAAa,IAAI,EAAE;AACjD,IAAA,IAAI,aAAa,CAAC,MAAM,EAAE;QACxB,UAAU,CAAC,iBAAiB,CAAC;QAC7B,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,KAAI;AAC7B,YAAA,MAAM,IAAI;gBACR,CAAgB,aAAA,EAAA,CAAC,GAAG,CAAC,CAAG,CAAA,CAAA;gBACxB,CAAI,CAAA,EAAA,CAAC,CAAC,QAAQ,CAAG,CAAA,CAAA;gBACjB,CAAG,EAAA,CAAC,CAAC,OAAO,IAAI,IAAI,IAAI,CAAC,CAAC,OAAO,GAAG,CAAA,SAAA,EAAY,CAAC,CAAC,OAAO,GAAG,GAAG,EAAE,CAAE,CAAA;gBACnE,CAAG,EAAA,CAAC,CAAC,KAAK,IAAI,IAAI,IAAI,CAAC,CAAC,KAAK,GAAG,CAAA,OAAA,EAAU,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,CAAE,CAAA;gBAC1D,CAAG,EAAA,CAAC,CAAC,IAAI,IAAI,IAAI,IAAI,CAAC,CAAC,IAAI,GAAG,CAAA,MAAA,EAAS,CAAC,CAAC,IAAI,EAAE,GAAG,EAAE,CAAE,CAAA;gBACtD,EAAE;AACH;iBACE,MAAM,CAAC,OAAO;iBACd,IAAI,CAAC,MAAM,CAAC;AACjB,SAAC,CAAC;;IAEJ,OAAO;AACL,QAAA,MAAM,EAAE,MAAM,CAAC,IAAI,EAAE;QACrB,UAAU;KACX;AACH;;;;"}
|
|
1
|
+
{"version":3,"file":"format.cjs","sources":["../../../../src/tools/search/format.ts"],"sourcesContent":["import type * as t from './types';\nimport { getDomainName } from './utils';\n\nfunction addHighlightSection(): string[] {\n return ['\\n## Highlights', ''];\n}\n\n// Helper function to format a source (organic or top story)\nfunction formatSource(\n source: t.ValidSource,\n index: number,\n turn: number,\n sourceType: 'search' | 'news',\n references: t.ResultReference[]\n): string {\n /** Array of all lines to include in the output */\n const outputLines: string[] = [];\n\n // Add the title\n outputLines.push(\n `# ${sourceType.charAt(0).toUpperCase() + sourceType.slice(1)} ${index}: ${source.title != null && source.title ? `\"${source.title}\"` : '(no title)'}`\n );\n outputLines.push(`\\nAnchor: \\\\ue202turn${turn}${sourceType}${index}`);\n outputLines.push(`URL: ${source.link}`);\n\n // Add optional fields\n if ('snippet' in source && source.snippet != null) {\n outputLines.push(`Summary: ${source.snippet}`);\n }\n\n if (source.date != null) {\n outputLines.push(`Date: ${source.date}`);\n }\n\n if (source.attribution != null) {\n outputLines.push(`Source: ${source.attribution}`);\n }\n\n // Add highlight section or empty line\n if ((source.highlights?.length ?? 0) > 0) {\n outputLines.push(...addHighlightSection());\n } else {\n outputLines.push('');\n }\n\n // Process highlights if they exist\n (source.highlights ?? [])\n .filter((h) => h.text.trim().length > 0)\n .forEach((h, hIndex) => {\n outputLines.push(\n `### Highlight ${hIndex + 1} [Relevance: ${h.score.toFixed(2)}]`\n );\n outputLines.push('');\n outputLines.push('```text');\n outputLines.push(h.text.trim());\n outputLines.push('```');\n outputLines.push('');\n\n if (h.references != null && h.references.length) {\n let hasHeader = false;\n const refLines: string[] = [];\n\n for (let j = 0; j < h.references.length; j++) {\n const ref = h.references[j];\n references.push({\n type: ref.type,\n link: ref.reference.originalUrl,\n attribution: getDomainName(ref.reference.originalUrl),\n title: (\n ((ref.reference.title ?? '') || ref.reference.text) ??\n ''\n ).split('\\n')[0],\n });\n\n if (ref.type !== 'link') {\n continue;\n }\n\n if (!hasHeader) {\n refLines.push('Core References:');\n hasHeader = true;\n }\n\n refLines.push(\n `- ${ref.type}#${ref.originalIndex + 1}: ${ref.reference.originalUrl}`\n );\n refLines.push(\n `\\t- Anchor: \\\\ue202turn${turn}ref${references.length - 1}`\n );\n }\n\n if (hasHeader) {\n outputLines.push(...refLines);\n outputLines.push('');\n }\n }\n\n if (hIndex < (source.highlights?.length ?? 0) - 1) {\n outputLines.push('---');\n outputLines.push('');\n }\n });\n\n outputLines.push('');\n return outputLines.join('\\n');\n}\n\nexport function formatResultsForLLM(\n turn: number,\n results: t.SearchResultData\n): { output: string; references: t.ResultReference[] } {\n /** Array to collect all output lines */\n const outputLines: string[] = [];\n\n const addSection = (title: string): void => {\n outputLines.push('');\n outputLines.push(`=== ${title} ===`);\n outputLines.push('');\n };\n\n const references: t.ResultReference[] = [];\n\n // Organic (web) results\n if (results.organic?.length != null && results.organic.length > 0) {\n addSection(`Web Results, Turn ${turn}`);\n for (let i = 0; i < results.organic.length; i++) {\n const r = results.organic[i];\n outputLines.push(formatSource(r, i, turn, 'search', references));\n delete results.organic[i].highlights;\n }\n }\n\n // Top stories (news)\n const topStories = results.topStories ?? [];\n if (topStories.length) {\n addSection('News Results');\n for (let i = 0; i < topStories.length; i++) {\n const r = topStories[i];\n outputLines.push(formatSource(r, i, turn, 'news', references));\n if (results.topStories?.[i]?.highlights) {\n delete results.topStories[i].highlights;\n }\n }\n }\n\n // // Images\n // const images = results.images ?? [];\n // if (images.length) {\n // addSection('Image Results');\n // const imageLines = images.map((img, i) => [\n // `Anchor: \\ue202turn0image${i}`,\n // `Title: ${img.title ?? '(no title)'}`,\n // `Image URL: ${img.imageUrl}`,\n // ''\n // ].join('\\n'));\n // outputLines.push(imageLines.join('\\n'));\n // }\n\n // Knowledge Graph\n if (results.knowledgeGraph != null) {\n addSection('Knowledge Graph');\n const kgLines = [\n `**Title:** ${results.knowledgeGraph.title ?? '(no title)'}`,\n results.knowledgeGraph.type != null\n ? `**Type:** ${results.knowledgeGraph.type}`\n : '',\n results.knowledgeGraph.description != null\n ? `**Description:** ${results.knowledgeGraph.description}`\n : '',\n results.knowledgeGraph.descriptionSource != null\n ? `**Description Source:** ${results.knowledgeGraph.descriptionSource}`\n : '',\n results.knowledgeGraph.descriptionLink != null\n ? `**Description Link:** ${results.knowledgeGraph.descriptionLink}`\n : '',\n results.knowledgeGraph.imageUrl != null\n ? `**Image URL:** ${results.knowledgeGraph.imageUrl}`\n : '',\n results.knowledgeGraph.website != null\n ? `**Website:** ${results.knowledgeGraph.website}`\n : '',\n results.knowledgeGraph.attributes != null\n ? `**Attributes:**\\n\\`\\`\\`json\\n${JSON.stringify(\n results.knowledgeGraph.attributes,\n null,\n 2\n )}\\n\\`\\`\\``\n : '',\n '',\n ].filter(Boolean);\n\n outputLines.push(kgLines.join('\\n\\n'));\n }\n\n // Answer Box\n if (results.answerBox != null) {\n addSection('Answer Box');\n const abLines = [\n results.answerBox.title != null\n ? `**Title:** ${results.answerBox.title}`\n : '',\n results.answerBox.snippet != null\n ? `**Snippet:** ${results.answerBox.snippet}`\n : '',\n results.answerBox.snippetHighlighted != null\n ? `**Snippet Highlighted:** ${results.answerBox.snippetHighlighted\n .map((s) => `\\`${s}\\``)\n .join(' ')}`\n : '',\n results.answerBox.link != null\n ? `**Link:** ${results.answerBox.link}`\n : '',\n '',\n ].filter(Boolean);\n\n outputLines.push(abLines.join('\\n\\n'));\n }\n\n // People also ask\n const peopleAlsoAsk = results.peopleAlsoAsk ?? [];\n if (peopleAlsoAsk.length) {\n addSection('People Also Ask');\n\n const paaLines: string[] = [];\n peopleAlsoAsk.forEach((p, i) => {\n const questionLines = [\n `### Question ${i + 1}:`,\n `\"${p.question}\"`,\n `${p.snippet != null && p.snippet ? `Snippet: ${p.snippet}` : ''}`,\n `${p.title != null && p.title ? `Title: ${p.title}` : ''}`,\n `${p.link != null && p.link ? `Link: ${p.link}` : ''}`,\n '',\n ].filter(Boolean);\n\n paaLines.push(questionLines.join('\\n\\n'));\n });\n\n outputLines.push(paaLines.join(''));\n }\n\n return {\n output: outputLines.join('\\n').trim(),\n references,\n };\n}\n"],"names":["getDomainName"],"mappings":";;;;AAGA,SAAS,mBAAmB,GAAA;AAC1B,IAAA,OAAO,CAAC,iBAAiB,EAAE,EAAE,CAAC;AAChC;AAEA;AACA,SAAS,YAAY,CACnB,MAAqB,EACrB,KAAa,EACb,IAAY,EACZ,UAA6B,EAC7B,UAA+B,EAAA;;IAG/B,MAAM,WAAW,GAAa,EAAE;;IAGhC,WAAW,CAAC,IAAI,CACd,CAAA,EAAA,EAAK,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAA,CAAA,EAAI,KAAK,CAAA,EAAA,EAAK,MAAM,CAAC,KAAK,IAAI,IAAI,IAAI,MAAM,CAAC,KAAK,GAAG,IAAI,MAAM,CAAC,KAAK,CAAG,CAAA,CAAA,GAAG,YAAY,CAAE,CAAA,CACvJ;IACD,WAAW,CAAC,IAAI,CAAC,CAAwB,qBAAA,EAAA,IAAI,CAAG,EAAA,UAAU,CAAG,EAAA,KAAK,CAAE,CAAA,CAAC;IACrE,WAAW,CAAC,IAAI,CAAC,CAAA,KAAA,EAAQ,MAAM,CAAC,IAAI,CAAE,CAAA,CAAC;;IAGvC,IAAI,SAAS,IAAI,MAAM,IAAI,MAAM,CAAC,OAAO,IAAI,IAAI,EAAE;QACjD,WAAW,CAAC,IAAI,CAAC,CAAA,SAAA,EAAY,MAAM,CAAC,OAAO,CAAE,CAAA,CAAC;;AAGhD,IAAA,IAAI,MAAM,CAAC,IAAI,IAAI,IAAI,EAAE;QACvB,WAAW,CAAC,IAAI,CAAC,CAAA,MAAA,EAAS,MAAM,CAAC,IAAI,CAAE,CAAA,CAAC;;AAG1C,IAAA,IAAI,MAAM,CAAC,WAAW,IAAI,IAAI,EAAE;QAC9B,WAAW,CAAC,IAAI,CAAC,CAAA,QAAA,EAAW,MAAM,CAAC,WAAW,CAAE,CAAA,CAAC;;;AAInD,IAAA,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,MAAM,IAAI,CAAC,IAAI,CAAC,EAAE;AACxC,QAAA,WAAW,CAAC,IAAI,CAAC,GAAG,mBAAmB,EAAE,CAAC;;SACrC;AACL,QAAA,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;;;AAItB,IAAA,CAAC,MAAM,CAAC,UAAU,IAAI,EAAE;AACrB,SAAA,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC;AACtC,SAAA,OAAO,CAAC,CAAC,CAAC,EAAE,MAAM,KAAI;AACrB,QAAA,WAAW,CAAC,IAAI,CACd,iBAAiB,MAAM,GAAG,CAAC,CAAgB,aAAA,EAAA,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAA,CAAA,CAAG,CACjE;AACD,QAAA,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;AACpB,QAAA,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC;QAC3B,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;AAC/B,QAAA,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC;AACvB,QAAA,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;AAEpB,QAAA,IAAI,CAAC,CAAC,UAAU,IAAI,IAAI,IAAI,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE;YAC/C,IAAI,SAAS,GAAG,KAAK;YACrB,MAAM,QAAQ,GAAa,EAAE;AAE7B,YAAA,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAC5C,MAAM,GAAG,GAAG,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC;gBAC3B,UAAU,CAAC,IAAI,CAAC;oBACd,IAAI,EAAE,GAAG,CAAC,IAAI;AACd,oBAAA,IAAI,EAAE,GAAG,CAAC,SAAS,CAAC,WAAW;oBAC/B,WAAW,EAAEA,mBAAa,CAAC,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC;AACrD,oBAAA,KAAK,EAAE,CACL,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,KAAK,IAAI,EAAE,KAAK,GAAG,CAAC,SAAS,CAAC,IAAI;wBAClD,EAAE,EACF,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACjB,iBAAA,CAAC;AAEF,gBAAA,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE;oBACvB;;gBAGF,IAAI,CAAC,SAAS,EAAE;AACd,oBAAA,QAAQ,CAAC,IAAI,CAAC,kBAAkB,CAAC;oBACjC,SAAS,GAAG,IAAI;;gBAGlB,QAAQ,CAAC,IAAI,CACX,CAAA,EAAA,EAAK,GAAG,CAAC,IAAI,IAAI,GAAG,CAAC,aAAa,GAAG,CAAC,KAAK,GAAG,CAAC,SAAS,CAAC,WAAW,CAAE,CAAA,CACvE;AACD,gBAAA,QAAQ,CAAC,IAAI,CACX,CAAA,uBAAA,EAA0B,IAAI,CAAA,GAAA,EAAM,UAAU,CAAC,MAAM,GAAG,CAAC,CAAA,CAAE,CAC5D;;YAGH,IAAI,SAAS,EAAE;AACb,gBAAA,WAAW,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC;AAC7B,gBAAA,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;;;AAIxB,QAAA,IAAI,MAAM,GAAG,CAAC,MAAM,CAAC,UAAU,EAAE,MAAM,IAAI,CAAC,IAAI,CAAC,EAAE;AACjD,YAAA,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC;AACvB,YAAA,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;;AAExB,KAAC,CAAC;AAEJ,IAAA,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;AACpB,IAAA,OAAO,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC;AAC/B;AAEgB,SAAA,mBAAmB,CACjC,IAAY,EACZ,OAA2B,EAAA;;IAG3B,MAAM,WAAW,GAAa,EAAE;AAEhC,IAAA,MAAM,UAAU,GAAG,CAAC,KAAa,KAAU;AACzC,QAAA,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;AACpB,QAAA,WAAW,CAAC,IAAI,CAAC,OAAO,KAAK,CAAA,IAAA,CAAM,CAAC;AACpC,QAAA,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;AACtB,KAAC;IAED,MAAM,UAAU,GAAwB,EAAE;;AAG1C,IAAA,IAAI,OAAO,CAAC,OAAO,EAAE,MAAM,IAAI,IAAI,IAAI,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE;AACjE,QAAA,UAAU,CAAC,CAAA,kBAAA,EAAqB,IAAI,CAAA,CAAE,CAAC;AACvC,QAAA,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YAC/C,MAAM,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC;AAC5B,YAAA,WAAW,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC;YAChE,OAAO,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,UAAU;;;;AAKxC,IAAA,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,EAAE;AAC3C,IAAA,IAAI,UAAU,CAAC,MAAM,EAAE;QACrB,UAAU,CAAC,cAAc,CAAC;AAC1B,QAAA,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;AAC1C,YAAA,MAAM,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC;AACvB,YAAA,WAAW,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;YAC9D,IAAI,OAAO,CAAC,UAAU,GAAG,CAAC,CAAC,EAAE,UAAU,EAAE;gBACvC,OAAO,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,UAAU;;;;;;;;;;;;;;;;;AAmB7C,IAAA,IAAI,OAAO,CAAC,cAAc,IAAI,IAAI,EAAE;QAClC,UAAU,CAAC,iBAAiB,CAAC;AAC7B,QAAA,MAAM,OAAO,GAAG;AACd,YAAA,CAAA,WAAA,EAAc,OAAO,CAAC,cAAc,CAAC,KAAK,IAAI,YAAY,CAAE,CAAA;AAC5D,YAAA,OAAO,CAAC,cAAc,CAAC,IAAI,IAAI;AAC7B,kBAAE,CAAa,UAAA,EAAA,OAAO,CAAC,cAAc,CAAC,IAAI,CAAE;AAC5C,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,WAAW,IAAI;AACpC,kBAAE,CAAoB,iBAAA,EAAA,OAAO,CAAC,cAAc,CAAC,WAAW,CAAE;AAC1D,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,iBAAiB,IAAI;AAC1C,kBAAE,CAA2B,wBAAA,EAAA,OAAO,CAAC,cAAc,CAAC,iBAAiB,CAAE;AACvE,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,eAAe,IAAI;AACxC,kBAAE,CAAyB,sBAAA,EAAA,OAAO,CAAC,cAAc,CAAC,eAAe,CAAE;AACnE,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,QAAQ,IAAI;AACjC,kBAAE,CAAkB,eAAA,EAAA,OAAO,CAAC,cAAc,CAAC,QAAQ,CAAE;AACrD,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,OAAO,IAAI;AAChC,kBAAE,CAAgB,aAAA,EAAA,OAAO,CAAC,cAAc,CAAC,OAAO,CAAE;AAClD,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,cAAc,CAAC,UAAU,IAAI;AACnC,kBAAE,CAAgC,6BAAA,EAAA,IAAI,CAAC,SAAS,CAC9C,OAAO,CAAC,cAAc,CAAC,UAAU,EACjC,IAAI,EACJ,CAAC,CACF,CAAU,QAAA;AACX,kBAAE,EAAE;YACN,EAAE;AACH,SAAA,CAAC,MAAM,CAAC,OAAO,CAAC;QAEjB,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;;;AAIxC,IAAA,IAAI,OAAO,CAAC,SAAS,IAAI,IAAI,EAAE;QAC7B,UAAU,CAAC,YAAY,CAAC;AACxB,QAAA,MAAM,OAAO,GAAG;AACd,YAAA,OAAO,CAAC,SAAS,CAAC,KAAK,IAAI;AACzB,kBAAE,CAAc,WAAA,EAAA,OAAO,CAAC,SAAS,CAAC,KAAK,CAAE;AACzC,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,SAAS,CAAC,OAAO,IAAI;AAC3B,kBAAE,CAAgB,aAAA,EAAA,OAAO,CAAC,SAAS,CAAC,OAAO,CAAE;AAC7C,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,SAAS,CAAC,kBAAkB,IAAI;AACtC,kBAAE,CAA4B,yBAAA,EAAA,OAAO,CAAC,SAAS,CAAC;qBAC7C,GAAG,CAAC,CAAC,CAAC,KAAK,CAAA,EAAA,EAAK,CAAC,CAAA,EAAA,CAAI;qBACrB,IAAI,CAAC,GAAG,CAAC,CAAE;AACd,kBAAE,EAAE;AACN,YAAA,OAAO,CAAC,SAAS,CAAC,IAAI,IAAI;AACxB,kBAAE,CAAa,UAAA,EAAA,OAAO,CAAC,SAAS,CAAC,IAAI,CAAE;AACvC,kBAAE,EAAE;YACN,EAAE;AACH,SAAA,CAAC,MAAM,CAAC,OAAO,CAAC;QAEjB,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;;;AAIxC,IAAA,MAAM,aAAa,GAAG,OAAO,CAAC,aAAa,IAAI,EAAE;AACjD,IAAA,IAAI,aAAa,CAAC,MAAM,EAAE;QACxB,UAAU,CAAC,iBAAiB,CAAC;QAE7B,MAAM,QAAQ,GAAa,EAAE;QAC7B,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,KAAI;AAC7B,YAAA,MAAM,aAAa,GAAG;gBACpB,CAAgB,aAAA,EAAA,CAAC,GAAG,CAAC,CAAG,CAAA,CAAA;gBACxB,CAAI,CAAA,EAAA,CAAC,CAAC,QAAQ,CAAG,CAAA,CAAA;gBACjB,CAAG,EAAA,CAAC,CAAC,OAAO,IAAI,IAAI,IAAI,CAAC,CAAC,OAAO,GAAG,CAAA,SAAA,EAAY,CAAC,CAAC,OAAO,EAAE,GAAG,EAAE,CAAE,CAAA;gBAClE,CAAG,EAAA,CAAC,CAAC,KAAK,IAAI,IAAI,IAAI,CAAC,CAAC,KAAK,GAAG,CAAA,OAAA,EAAU,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,CAAE,CAAA;gBAC1D,CAAG,EAAA,CAAC,CAAC,IAAI,IAAI,IAAI,IAAI,CAAC,CAAC,IAAI,GAAG,CAAA,MAAA,EAAS,CAAC,CAAC,IAAI,EAAE,GAAG,EAAE,CAAE,CAAA;gBACtD,EAAE;AACH,aAAA,CAAC,MAAM,CAAC,OAAO,CAAC;YAEjB,QAAQ,CAAC,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC3C,SAAC,CAAC;QAEF,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;;IAGrC,OAAO;QACL,MAAM,EAAE,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE;QACrC,UAAU;KACX;AACH;;;;"}
|
|
@@ -40,7 +40,7 @@ const chunker = {
|
|
|
40
40
|
return Promise.all(promises);
|
|
41
41
|
},
|
|
42
42
|
};
|
|
43
|
-
|
|
43
|
+
function createSourceUpdateCallback(sourceMap) {
|
|
44
44
|
return (link, update) => {
|
|
45
45
|
const source = sourceMap.get(link);
|
|
46
46
|
if (source) {
|
|
@@ -50,7 +50,7 @@ const createSourceUpdateCallback = (sourceMap) => {
|
|
|
50
50
|
});
|
|
51
51
|
}
|
|
52
52
|
};
|
|
53
|
-
}
|
|
53
|
+
}
|
|
54
54
|
const getHighlights = async ({ query, content, reranker, topResults = 5, }) => {
|
|
55
55
|
if (!content) {
|
|
56
56
|
console.warn('No content provided for highlights');
|
|
@@ -225,11 +225,12 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
225
225
|
reranker, } = config;
|
|
226
226
|
const firecrawlScraper = scraperInstance;
|
|
227
227
|
const webScraper = {
|
|
228
|
-
scrapeMany: async ({ query, links, }) => {
|
|
228
|
+
scrapeMany: async ({ query, links, onGetHighlights, }) => {
|
|
229
229
|
console.log(`Scraping ${links.length} links with Firecrawl`);
|
|
230
230
|
const promises = [];
|
|
231
231
|
try {
|
|
232
|
-
for (
|
|
232
|
+
for (let i = 0; i < links.length; i++) {
|
|
233
|
+
const currentLink = links[i];
|
|
233
234
|
const promise = firecrawlScraper
|
|
234
235
|
.scrapeUrl(currentLink, {})
|
|
235
236
|
.then(([url, response]) => {
|
|
@@ -247,7 +248,7 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
247
248
|
url,
|
|
248
249
|
attribution,
|
|
249
250
|
error: true,
|
|
250
|
-
content:
|
|
251
|
+
content: '',
|
|
251
252
|
};
|
|
252
253
|
})
|
|
253
254
|
.then(async (result) => {
|
|
@@ -263,6 +264,9 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
263
264
|
reranker,
|
|
264
265
|
content: result.content,
|
|
265
266
|
});
|
|
267
|
+
if (onGetHighlights) {
|
|
268
|
+
onGetHighlights(result.url);
|
|
269
|
+
}
|
|
266
270
|
return {
|
|
267
271
|
...result,
|
|
268
272
|
highlights,
|
|
@@ -280,7 +284,7 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
280
284
|
return {
|
|
281
285
|
url: currentLink,
|
|
282
286
|
error: true,
|
|
283
|
-
content:
|
|
287
|
+
content: '',
|
|
284
288
|
};
|
|
285
289
|
});
|
|
286
290
|
promises.push(promise);
|
|
@@ -293,10 +297,14 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
293
297
|
}
|
|
294
298
|
},
|
|
295
299
|
};
|
|
296
|
-
const fetchContents = async ({ links, query, target, onContentScraped, }) => {
|
|
300
|
+
const fetchContents = async ({ links, query, target, onGetHighlights, onContentScraped, }) => {
|
|
297
301
|
const initialLinks = links.slice(0, target);
|
|
298
302
|
// const remainingLinks = links.slice(target).reverse();
|
|
299
|
-
const results = await webScraper.scrapeMany({
|
|
303
|
+
const results = await webScraper.scrapeMany({
|
|
304
|
+
query,
|
|
305
|
+
links: initialLinks,
|
|
306
|
+
onGetHighlights,
|
|
307
|
+
});
|
|
300
308
|
for (const result of results) {
|
|
301
309
|
if (result.error === true) {
|
|
302
310
|
continue;
|
|
@@ -310,7 +318,7 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
310
318
|
});
|
|
311
319
|
}
|
|
312
320
|
};
|
|
313
|
-
const processSources = async (result, numElements, query, proMode =
|
|
321
|
+
const processSources = async ({ result, numElements, query, proMode = true, onGetHighlights, }) => {
|
|
314
322
|
try {
|
|
315
323
|
if (!result.data) {
|
|
316
324
|
return {
|
|
@@ -334,6 +342,7 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
334
342
|
await fetchContents({
|
|
335
343
|
query,
|
|
336
344
|
target: 1,
|
|
345
|
+
onGetHighlights,
|
|
337
346
|
onContentScraped,
|
|
338
347
|
links: [wikiSources[0].link],
|
|
339
348
|
});
|
|
@@ -350,38 +359,44 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
350
359
|
return result.data;
|
|
351
360
|
}
|
|
352
361
|
const sourceMap = new Map();
|
|
353
|
-
const
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
if (allLinks.length === 0) {
|
|
362
|
+
const organicLinksSet = new Set();
|
|
363
|
+
// Collect organic links
|
|
364
|
+
const organicLinks = collectLinks(result.data.organic, sourceMap, organicLinksSet);
|
|
365
|
+
// Collect top story links, excluding any that are already in organic links
|
|
366
|
+
const topStories = result.data.topStories ?? [];
|
|
367
|
+
const topStoryLinks = collectLinks(topStories, sourceMap, organicLinksSet);
|
|
368
|
+
if (organicLinks.length === 0 && topStoryLinks.length === 0) {
|
|
361
369
|
return result.data;
|
|
362
370
|
}
|
|
363
371
|
const onContentScraped = createSourceUpdateCallback(sourceMap);
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
result.data.organic[i] = {
|
|
375
|
-
...source,
|
|
376
|
-
...updatedSource,
|
|
377
|
-
};
|
|
378
|
-
}
|
|
372
|
+
const promises = [];
|
|
373
|
+
// Process organic links
|
|
374
|
+
if (organicLinks.length > 0) {
|
|
375
|
+
promises.push(fetchContents({
|
|
376
|
+
query,
|
|
377
|
+
onGetHighlights,
|
|
378
|
+
onContentScraped,
|
|
379
|
+
links: organicLinks,
|
|
380
|
+
target: numElements,
|
|
381
|
+
}));
|
|
379
382
|
}
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
.
|
|
383
|
-
|
|
384
|
-
|
|
383
|
+
// Process top story links
|
|
384
|
+
if (topStoryLinks.length > 0) {
|
|
385
|
+
promises.push(fetchContents({
|
|
386
|
+
query,
|
|
387
|
+
onGetHighlights,
|
|
388
|
+
onContentScraped,
|
|
389
|
+
links: topStoryLinks,
|
|
390
|
+
target: numElements,
|
|
391
|
+
}));
|
|
392
|
+
}
|
|
393
|
+
await Promise.all(promises);
|
|
394
|
+
// Update sources with scraped content
|
|
395
|
+
if (result.data.organic.length > 0) {
|
|
396
|
+
updateSourcesWithContent(result.data.organic, sourceMap);
|
|
397
|
+
}
|
|
398
|
+
if (topStories.length > 0) {
|
|
399
|
+
updateSourcesWithContent(topStories, sourceMap);
|
|
385
400
|
}
|
|
386
401
|
return result.data;
|
|
387
402
|
}
|
|
@@ -402,6 +417,37 @@ const createSourceProcessor = (config = {}, scraperInstance) => {
|
|
|
402
417
|
topResults,
|
|
403
418
|
};
|
|
404
419
|
};
|
|
420
|
+
/** Helper function to collect links and update sourceMap */
|
|
421
|
+
function collectLinks(sources, sourceMap, existingLinksSet) {
|
|
422
|
+
const links = [];
|
|
423
|
+
for (const source of sources) {
|
|
424
|
+
if (source.link) {
|
|
425
|
+
// For topStories, only add if not already in organic links
|
|
426
|
+
if (existingLinksSet && existingLinksSet.has(source.link)) {
|
|
427
|
+
continue;
|
|
428
|
+
}
|
|
429
|
+
links.push(source.link);
|
|
430
|
+
if (existingLinksSet) {
|
|
431
|
+
existingLinksSet.add(source.link);
|
|
432
|
+
}
|
|
433
|
+
sourceMap.set(source.link, source);
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
return links;
|
|
437
|
+
}
|
|
438
|
+
/** Helper function to update sources with scraped content */
|
|
439
|
+
function updateSourcesWithContent(sources, sourceMap) {
|
|
440
|
+
for (let i = 0; i < sources.length; i++) {
|
|
441
|
+
const source = sources[i];
|
|
442
|
+
const updatedSource = sourceMap.get(source.link);
|
|
443
|
+
if (updatedSource) {
|
|
444
|
+
sources[i] = {
|
|
445
|
+
...source,
|
|
446
|
+
...updatedSource,
|
|
447
|
+
};
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
}
|
|
405
451
|
|
|
406
452
|
exports.createSearchAPI = createSearchAPI;
|
|
407
453
|
exports.createSourceProcessor = createSourceProcessor;
|