@librechat/agents 2.4.319 → 2.4.321
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/tools/search/format.cjs +111 -80
- package/dist/cjs/tools/search/format.cjs.map +1 -1
- package/dist/cjs/tools/search/search.cjs +83 -37
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +83 -57
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/search/utils.cjs +3 -1
- package/dist/cjs/tools/search/utils.cjs.map +1 -1
- package/dist/esm/tools/search/format.mjs +111 -80
- package/dist/esm/tools/search/format.mjs.map +1 -1
- package/dist/esm/tools/search/search.mjs +83 -37
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +82 -56
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/search/utils.mjs +3 -1
- package/dist/esm/tools/search/utils.mjs.map +1 -1
- package/dist/types/tools/search/search.d.ts +1 -1
- package/dist/types/tools/search/types.d.ts +22 -1
- package/package.json +1 -1
- package/src/scripts/search.ts +4 -1
- package/src/tools/search/format.ts +149 -86
- package/src/tools/search/search.ts +120 -47
- package/src/tools/search/tool.ts +137 -89
- package/src/tools/search/types.ts +30 -1
- package/src/tools/search/utils.ts +5 -1
|
@@ -1,107 +1,165 @@
|
|
|
1
1
|
import type * as t from './types';
|
|
2
2
|
import { getDomainName } from './utils';
|
|
3
3
|
|
|
4
|
+
function addHighlightSection(): string[] {
|
|
5
|
+
return ['\n## Highlights', ''];
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
// Helper function to format a source (organic or top story)
|
|
9
|
+
function formatSource(
|
|
10
|
+
source: t.ValidSource,
|
|
11
|
+
index: number,
|
|
12
|
+
turn: number,
|
|
13
|
+
sourceType: 'search' | 'news',
|
|
14
|
+
references: t.ResultReference[]
|
|
15
|
+
): string {
|
|
16
|
+
/** Array of all lines to include in the output */
|
|
17
|
+
const outputLines: string[] = [];
|
|
18
|
+
|
|
19
|
+
// Add the title
|
|
20
|
+
outputLines.push(
|
|
21
|
+
`# ${sourceType.charAt(0).toUpperCase() + sourceType.slice(1)} ${index}: ${source.title != null && source.title ? `"${source.title}"` : '(no title)'}`
|
|
22
|
+
);
|
|
23
|
+
outputLines.push(`\nAnchor: \\ue202turn${turn}${sourceType}${index}`);
|
|
24
|
+
outputLines.push(`URL: ${source.link}`);
|
|
25
|
+
|
|
26
|
+
// Add optional fields
|
|
27
|
+
if ('snippet' in source && source.snippet != null) {
|
|
28
|
+
outputLines.push(`Summary: ${source.snippet}`);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
if (source.date != null) {
|
|
32
|
+
outputLines.push(`Date: ${source.date}`);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (source.attribution != null) {
|
|
36
|
+
outputLines.push(`Source: ${source.attribution}`);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Add highlight section or empty line
|
|
40
|
+
if ((source.highlights?.length ?? 0) > 0) {
|
|
41
|
+
outputLines.push(...addHighlightSection());
|
|
42
|
+
} else {
|
|
43
|
+
outputLines.push('');
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// Process highlights if they exist
|
|
47
|
+
(source.highlights ?? [])
|
|
48
|
+
.filter((h) => h.text.trim().length > 0)
|
|
49
|
+
.forEach((h, hIndex) => {
|
|
50
|
+
outputLines.push(
|
|
51
|
+
`### Highlight ${hIndex + 1} [Relevance: ${h.score.toFixed(2)}]`
|
|
52
|
+
);
|
|
53
|
+
outputLines.push('');
|
|
54
|
+
outputLines.push('```text');
|
|
55
|
+
outputLines.push(h.text.trim());
|
|
56
|
+
outputLines.push('```');
|
|
57
|
+
outputLines.push('');
|
|
58
|
+
|
|
59
|
+
if (h.references != null && h.references.length) {
|
|
60
|
+
let hasHeader = false;
|
|
61
|
+
const refLines: string[] = [];
|
|
62
|
+
|
|
63
|
+
for (let j = 0; j < h.references.length; j++) {
|
|
64
|
+
const ref = h.references[j];
|
|
65
|
+
references.push({
|
|
66
|
+
type: ref.type,
|
|
67
|
+
link: ref.reference.originalUrl,
|
|
68
|
+
attribution: getDomainName(ref.reference.originalUrl),
|
|
69
|
+
title: (
|
|
70
|
+
((ref.reference.title ?? '') || ref.reference.text) ??
|
|
71
|
+
''
|
|
72
|
+
).split('\n')[0],
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
if (ref.type !== 'link') {
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (!hasHeader) {
|
|
80
|
+
refLines.push('Core References:');
|
|
81
|
+
hasHeader = true;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
refLines.push(
|
|
85
|
+
`- ${ref.type}#${ref.originalIndex + 1}: ${ref.reference.originalUrl}`
|
|
86
|
+
);
|
|
87
|
+
refLines.push(
|
|
88
|
+
`\t- Anchor: \\ue202turn${turn}ref${references.length - 1}`
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
if (hasHeader) {
|
|
93
|
+
outputLines.push(...refLines);
|
|
94
|
+
outputLines.push('');
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (hIndex < (source.highlights?.length ?? 0) - 1) {
|
|
99
|
+
outputLines.push('---');
|
|
100
|
+
outputLines.push('');
|
|
101
|
+
}
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
outputLines.push('');
|
|
105
|
+
return outputLines.join('\n');
|
|
106
|
+
}
|
|
107
|
+
|
|
4
108
|
export function formatResultsForLLM(
|
|
5
109
|
turn: number,
|
|
6
110
|
results: t.SearchResultData
|
|
7
111
|
): { output: string; references: t.ResultReference[] } {
|
|
8
|
-
|
|
112
|
+
/** Array to collect all output lines */
|
|
113
|
+
const outputLines: string[] = [];
|
|
9
114
|
|
|
10
115
|
const addSection = (title: string): void => {
|
|
11
|
-
|
|
116
|
+
outputLines.push('');
|
|
117
|
+
outputLines.push(`=== ${title} ===`);
|
|
118
|
+
outputLines.push('');
|
|
12
119
|
};
|
|
13
120
|
|
|
14
121
|
const references: t.ResultReference[] = [];
|
|
122
|
+
|
|
15
123
|
// Organic (web) results
|
|
16
124
|
if (results.organic?.length != null && results.organic.length > 0) {
|
|
17
125
|
addSection(`Web Results, Turn ${turn}`);
|
|
18
126
|
for (let i = 0; i < results.organic.length; i++) {
|
|
19
127
|
const r = results.organic[i];
|
|
20
|
-
|
|
21
|
-
`# Source ${i}: "${r.title ?? '(no title)'}"`,
|
|
22
|
-
`Anchor: \\ue202turn${turn}search${i}`,
|
|
23
|
-
`URL: ${r.link}`,
|
|
24
|
-
r.snippet != null ? `Summary: ${r.snippet}` : '',
|
|
25
|
-
r.date != null ? `Date: ${r.date}` : '',
|
|
26
|
-
r.attribution != null ? `Source: ${r.attribution}` : '',
|
|
27
|
-
'',
|
|
28
|
-
'\n## Highlights\n\n',
|
|
29
|
-
'',
|
|
30
|
-
'',
|
|
31
|
-
]
|
|
32
|
-
.filter(Boolean)
|
|
33
|
-
.join('\n');
|
|
34
|
-
|
|
35
|
-
(r.highlights ?? [])
|
|
36
|
-
.filter((h) => h.text.trim().length > 0)
|
|
37
|
-
.forEach((h, hIndex) => {
|
|
38
|
-
output += `### Highlight ${hIndex + 1} [Relevance: ${h.score.toFixed(2)}]\n\n`;
|
|
39
|
-
output += '```text\n' + h.text.trim() + '\n```\n\n';
|
|
40
|
-
|
|
41
|
-
if (h.references != null && h.references.length) {
|
|
42
|
-
output += 'Core References:\n';
|
|
43
|
-
output += h.references
|
|
44
|
-
.map((ref) => {
|
|
45
|
-
references.push({
|
|
46
|
-
link: ref.reference.originalUrl,
|
|
47
|
-
attribution: getDomainName(ref.reference.originalUrl),
|
|
48
|
-
title: (
|
|
49
|
-
((ref.reference.title ?? '') || ref.reference.text) ??
|
|
50
|
-
''
|
|
51
|
-
).split('\n')[0],
|
|
52
|
-
});
|
|
53
|
-
return `- ${ref.type}#${ref.originalIndex + 1}: ${ref.reference.originalUrl}\n\t- Anchor: \\ue202turn${turn}ref${references.length - 1}`;
|
|
54
|
-
})
|
|
55
|
-
.join('\n');
|
|
56
|
-
output += '\n\n';
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
if (hIndex < (r.highlights?.length ?? 0) - 1) {
|
|
60
|
-
output += '---\n\n';
|
|
61
|
-
}
|
|
62
|
-
});
|
|
63
|
-
|
|
128
|
+
outputLines.push(formatSource(r, i, turn, 'search', references));
|
|
64
129
|
delete results.organic[i].highlights;
|
|
65
|
-
output += '\n';
|
|
66
130
|
}
|
|
67
131
|
}
|
|
68
132
|
|
|
69
|
-
//
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
// r.attribution != null ? `Source: ${r.attribution}` : '',
|
|
82
|
-
// ''
|
|
83
|
-
// ].filter(Boolean).join('\n');
|
|
84
|
-
// });
|
|
85
|
-
// }
|
|
133
|
+
// Top stories (news)
|
|
134
|
+
const topStories = results.topStories ?? [];
|
|
135
|
+
if (topStories.length) {
|
|
136
|
+
addSection('News Results');
|
|
137
|
+
for (let i = 0; i < topStories.length; i++) {
|
|
138
|
+
const r = topStories[i];
|
|
139
|
+
outputLines.push(formatSource(r, i, turn, 'news', references));
|
|
140
|
+
if (results.topStories?.[i]?.highlights) {
|
|
141
|
+
delete results.topStories[i].highlights;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
86
145
|
|
|
87
146
|
// // Images
|
|
88
147
|
// const images = results.images ?? [];
|
|
89
148
|
// if (images.length) {
|
|
90
149
|
// addSection('Image Results');
|
|
91
|
-
// images.
|
|
92
|
-
//
|
|
93
|
-
//
|
|
94
|
-
//
|
|
95
|
-
//
|
|
96
|
-
//
|
|
97
|
-
//
|
|
98
|
-
// });
|
|
150
|
+
// const imageLines = images.map((img, i) => [
|
|
151
|
+
// `Anchor: \ue202turn0image${i}`,
|
|
152
|
+
// `Title: ${img.title ?? '(no title)'}`,
|
|
153
|
+
// `Image URL: ${img.imageUrl}`,
|
|
154
|
+
// ''
|
|
155
|
+
// ].join('\n'));
|
|
156
|
+
// outputLines.push(imageLines.join('\n'));
|
|
99
157
|
// }
|
|
100
158
|
|
|
101
159
|
// Knowledge Graph
|
|
102
160
|
if (results.knowledgeGraph != null) {
|
|
103
161
|
addSection('Knowledge Graph');
|
|
104
|
-
|
|
162
|
+
const kgLines = [
|
|
105
163
|
`**Title:** ${results.knowledgeGraph.title ?? '(no title)'}`,
|
|
106
164
|
results.knowledgeGraph.type != null
|
|
107
165
|
? `**Type:** ${results.knowledgeGraph.type}`
|
|
@@ -129,15 +187,15 @@ export function formatResultsForLLM(
|
|
|
129
187
|
)}\n\`\`\``
|
|
130
188
|
: '',
|
|
131
189
|
'',
|
|
132
|
-
]
|
|
133
|
-
|
|
134
|
-
|
|
190
|
+
].filter(Boolean);
|
|
191
|
+
|
|
192
|
+
outputLines.push(kgLines.join('\n\n'));
|
|
135
193
|
}
|
|
136
194
|
|
|
137
195
|
// Answer Box
|
|
138
196
|
if (results.answerBox != null) {
|
|
139
197
|
addSection('Answer Box');
|
|
140
|
-
|
|
198
|
+
const abLines = [
|
|
141
199
|
results.answerBox.title != null
|
|
142
200
|
? `**Title:** ${results.answerBox.title}`
|
|
143
201
|
: '',
|
|
@@ -153,30 +211,35 @@ export function formatResultsForLLM(
|
|
|
153
211
|
? `**Link:** ${results.answerBox.link}`
|
|
154
212
|
: '',
|
|
155
213
|
'',
|
|
156
|
-
]
|
|
157
|
-
|
|
158
|
-
|
|
214
|
+
].filter(Boolean);
|
|
215
|
+
|
|
216
|
+
outputLines.push(abLines.join('\n\n'));
|
|
159
217
|
}
|
|
160
218
|
|
|
161
219
|
// People also ask
|
|
162
220
|
const peopleAlsoAsk = results.peopleAlsoAsk ?? [];
|
|
163
221
|
if (peopleAlsoAsk.length) {
|
|
164
222
|
addSection('People Also Ask');
|
|
223
|
+
|
|
224
|
+
const paaLines: string[] = [];
|
|
165
225
|
peopleAlsoAsk.forEach((p, i) => {
|
|
166
|
-
|
|
226
|
+
const questionLines = [
|
|
167
227
|
`### Question ${i + 1}:`,
|
|
168
228
|
`"${p.question}"`,
|
|
169
|
-
`${p.snippet != null && p.snippet ? `Snippet: ${p.snippet}
|
|
229
|
+
`${p.snippet != null && p.snippet ? `Snippet: ${p.snippet}` : ''}`,
|
|
170
230
|
`${p.title != null && p.title ? `Title: ${p.title}` : ''}`,
|
|
171
231
|
`${p.link != null && p.link ? `Link: ${p.link}` : ''}`,
|
|
172
232
|
'',
|
|
173
|
-
]
|
|
174
|
-
|
|
175
|
-
|
|
233
|
+
].filter(Boolean);
|
|
234
|
+
|
|
235
|
+
paaLines.push(questionLines.join('\n\n'));
|
|
176
236
|
});
|
|
237
|
+
|
|
238
|
+
outputLines.push(paaLines.join(''));
|
|
177
239
|
}
|
|
240
|
+
|
|
178
241
|
return {
|
|
179
|
-
output:
|
|
242
|
+
output: outputLines.join('\n').trim(),
|
|
180
243
|
references,
|
|
181
244
|
};
|
|
182
245
|
}
|
|
@@ -65,7 +65,7 @@ const chunker = {
|
|
|
65
65
|
},
|
|
66
66
|
};
|
|
67
67
|
|
|
68
|
-
|
|
68
|
+
function createSourceUpdateCallback(sourceMap: Map<string, t.ValidSource>) {
|
|
69
69
|
return (link: string, update?: Partial<t.ValidSource>): void => {
|
|
70
70
|
const source = sourceMap.get(link);
|
|
71
71
|
if (source) {
|
|
@@ -75,7 +75,7 @@ const createSourceUpdateCallback = (sourceMap: Map<string, t.ValidSource>) => {
|
|
|
75
75
|
});
|
|
76
76
|
}
|
|
77
77
|
};
|
|
78
|
-
}
|
|
78
|
+
}
|
|
79
79
|
|
|
80
80
|
const getHighlights = async ({
|
|
81
81
|
query,
|
|
@@ -315,10 +315,7 @@ export const createSourceProcessor = (
|
|
|
315
315
|
scraperInstance?: FirecrawlScraper
|
|
316
316
|
): {
|
|
317
317
|
processSources: (
|
|
318
|
-
|
|
319
|
-
numElements: number,
|
|
320
|
-
query: string,
|
|
321
|
-
proMode?: boolean
|
|
318
|
+
fields: t.ProcessSourcesFields
|
|
322
319
|
) => Promise<t.SearchResultData>;
|
|
323
320
|
topResults: number;
|
|
324
321
|
} => {
|
|
@@ -338,14 +335,17 @@ export const createSourceProcessor = (
|
|
|
338
335
|
scrapeMany: async ({
|
|
339
336
|
query,
|
|
340
337
|
links,
|
|
338
|
+
onGetHighlights,
|
|
341
339
|
}: {
|
|
342
340
|
query: string;
|
|
343
341
|
links: string[];
|
|
342
|
+
onGetHighlights: t.SearchToolConfig['onGetHighlights'];
|
|
344
343
|
}): Promise<Array<t.ScrapeResult>> => {
|
|
345
344
|
console.log(`Scraping ${links.length} links with Firecrawl`);
|
|
346
345
|
const promises: Array<Promise<t.ScrapeResult>> = [];
|
|
347
346
|
try {
|
|
348
|
-
for (
|
|
347
|
+
for (let i = 0; i < links.length; i++) {
|
|
348
|
+
const currentLink = links[i];
|
|
349
349
|
const promise: Promise<t.ScrapeResult> = firecrawlScraper
|
|
350
350
|
.scrapeUrl(currentLink, {})
|
|
351
351
|
.then(([url, response]) => {
|
|
@@ -365,7 +365,7 @@ export const createSourceProcessor = (
|
|
|
365
365
|
url,
|
|
366
366
|
attribution,
|
|
367
367
|
error: true,
|
|
368
|
-
content:
|
|
368
|
+
content: '',
|
|
369
369
|
} as t.ScrapeResult;
|
|
370
370
|
})
|
|
371
371
|
.then(async (result) => {
|
|
@@ -383,6 +383,9 @@ export const createSourceProcessor = (
|
|
|
383
383
|
reranker,
|
|
384
384
|
content: result.content,
|
|
385
385
|
});
|
|
386
|
+
if (onGetHighlights) {
|
|
387
|
+
onGetHighlights(result.url);
|
|
388
|
+
}
|
|
386
389
|
return {
|
|
387
390
|
...result,
|
|
388
391
|
highlights,
|
|
@@ -399,7 +402,7 @@ export const createSourceProcessor = (
|
|
|
399
402
|
return {
|
|
400
403
|
url: currentLink,
|
|
401
404
|
error: true,
|
|
402
|
-
content:
|
|
405
|
+
content: '',
|
|
403
406
|
};
|
|
404
407
|
});
|
|
405
408
|
promises.push(promise);
|
|
@@ -416,16 +419,22 @@ export const createSourceProcessor = (
|
|
|
416
419
|
links,
|
|
417
420
|
query,
|
|
418
421
|
target,
|
|
422
|
+
onGetHighlights,
|
|
419
423
|
onContentScraped,
|
|
420
424
|
}: {
|
|
421
425
|
links: string[];
|
|
422
426
|
query: string;
|
|
423
427
|
target: number;
|
|
428
|
+
onGetHighlights: t.SearchToolConfig['onGetHighlights'];
|
|
424
429
|
onContentScraped?: (link: string, update?: Partial<t.ValidSource>) => void;
|
|
425
430
|
}): Promise<void> => {
|
|
426
431
|
const initialLinks = links.slice(0, target);
|
|
427
432
|
// const remainingLinks = links.slice(target).reverse();
|
|
428
|
-
const results = await webScraper.scrapeMany({
|
|
433
|
+
const results = await webScraper.scrapeMany({
|
|
434
|
+
query,
|
|
435
|
+
links: initialLinks,
|
|
436
|
+
onGetHighlights,
|
|
437
|
+
});
|
|
429
438
|
for (const result of results) {
|
|
430
439
|
if (result.error === true) {
|
|
431
440
|
continue;
|
|
@@ -440,12 +449,13 @@ export const createSourceProcessor = (
|
|
|
440
449
|
}
|
|
441
450
|
};
|
|
442
451
|
|
|
443
|
-
const processSources = async (
|
|
444
|
-
result
|
|
445
|
-
numElements
|
|
446
|
-
query
|
|
447
|
-
proMode
|
|
448
|
-
|
|
452
|
+
const processSources = async ({
|
|
453
|
+
result,
|
|
454
|
+
numElements,
|
|
455
|
+
query,
|
|
456
|
+
proMode = true,
|
|
457
|
+
onGetHighlights,
|
|
458
|
+
}: t.ProcessSourcesFields): Promise<t.SearchResultData> => {
|
|
449
459
|
try {
|
|
450
460
|
if (!result.data) {
|
|
451
461
|
return {
|
|
@@ -473,6 +483,7 @@ export const createSourceProcessor = (
|
|
|
473
483
|
await fetchContents({
|
|
474
484
|
query,
|
|
475
485
|
target: 1,
|
|
486
|
+
onGetHighlights,
|
|
476
487
|
onContentScraped,
|
|
477
488
|
links: [wikiSources[0].link],
|
|
478
489
|
});
|
|
@@ -492,48 +503,67 @@ export const createSourceProcessor = (
|
|
|
492
503
|
}
|
|
493
504
|
|
|
494
505
|
const sourceMap = new Map<string, t.ValidSource>();
|
|
495
|
-
const
|
|
506
|
+
const organicLinksSet = new Set<string>();
|
|
496
507
|
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
508
|
+
// Collect organic links
|
|
509
|
+
const organicLinks = collectLinks(
|
|
510
|
+
result.data.organic,
|
|
511
|
+
sourceMap,
|
|
512
|
+
organicLinksSet
|
|
513
|
+
);
|
|
503
514
|
|
|
504
|
-
|
|
515
|
+
// Collect top story links, excluding any that are already in organic links
|
|
516
|
+
const topStories = result.data.topStories ?? [];
|
|
517
|
+
const topStoryLinks = collectLinks(
|
|
518
|
+
topStories,
|
|
519
|
+
sourceMap,
|
|
520
|
+
organicLinksSet
|
|
521
|
+
);
|
|
522
|
+
|
|
523
|
+
if (organicLinks.length === 0 && topStoryLinks.length === 0) {
|
|
505
524
|
return result.data;
|
|
506
525
|
}
|
|
507
526
|
|
|
508
527
|
const onContentScraped = createSourceUpdateCallback(sourceMap);
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
528
|
+
const promises: Promise<void>[] = [];
|
|
529
|
+
|
|
530
|
+
// Process organic links
|
|
531
|
+
if (organicLinks.length > 0) {
|
|
532
|
+
promises.push(
|
|
533
|
+
fetchContents({
|
|
534
|
+
query,
|
|
535
|
+
onGetHighlights,
|
|
536
|
+
onContentScraped,
|
|
537
|
+
links: organicLinks,
|
|
538
|
+
target: numElements,
|
|
539
|
+
})
|
|
540
|
+
);
|
|
541
|
+
}
|
|
515
542
|
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
543
|
+
// Process top story links
|
|
544
|
+
if (topStoryLinks.length > 0) {
|
|
545
|
+
promises.push(
|
|
546
|
+
fetchContents({
|
|
547
|
+
query,
|
|
548
|
+
onGetHighlights,
|
|
549
|
+
onContentScraped,
|
|
550
|
+
links: topStoryLinks,
|
|
551
|
+
target: numElements,
|
|
552
|
+
})
|
|
553
|
+
);
|
|
525
554
|
}
|
|
526
555
|
|
|
527
|
-
|
|
528
|
-
.filter(
|
|
529
|
-
(source) =>
|
|
530
|
-
source.content != null && !source.content.startsWith('Failed')
|
|
531
|
-
)
|
|
532
|
-
.slice(0, numElements);
|
|
556
|
+
await Promise.all(promises);
|
|
533
557
|
|
|
534
|
-
|
|
535
|
-
|
|
558
|
+
// Update sources with scraped content
|
|
559
|
+
if (result.data.organic.length > 0) {
|
|
560
|
+
updateSourcesWithContent(result.data.organic, sourceMap);
|
|
536
561
|
}
|
|
562
|
+
|
|
563
|
+
if (topStories.length > 0) {
|
|
564
|
+
updateSourcesWithContent(topStories, sourceMap);
|
|
565
|
+
}
|
|
566
|
+
|
|
537
567
|
return result.data;
|
|
538
568
|
} catch (error) {
|
|
539
569
|
console.error('Error in processSources:', error);
|
|
@@ -553,3 +583,46 @@ export const createSourceProcessor = (
|
|
|
553
583
|
topResults,
|
|
554
584
|
};
|
|
555
585
|
};
|
|
586
|
+
|
|
587
|
+
/** Helper function to collect links and update sourceMap */
|
|
588
|
+
function collectLinks(
|
|
589
|
+
sources: Array<t.OrganicResult | t.TopStoryResult>,
|
|
590
|
+
sourceMap: Map<string, t.ValidSource>,
|
|
591
|
+
existingLinksSet?: Set<string>
|
|
592
|
+
): string[] {
|
|
593
|
+
const links: string[] = [];
|
|
594
|
+
|
|
595
|
+
for (const source of sources) {
|
|
596
|
+
if (source.link) {
|
|
597
|
+
// For topStories, only add if not already in organic links
|
|
598
|
+
if (existingLinksSet && existingLinksSet.has(source.link)) {
|
|
599
|
+
continue;
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
links.push(source.link);
|
|
603
|
+
if (existingLinksSet) {
|
|
604
|
+
existingLinksSet.add(source.link);
|
|
605
|
+
}
|
|
606
|
+
sourceMap.set(source.link, source as t.ValidSource);
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
return links;
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
/** Helper function to update sources with scraped content */
|
|
614
|
+
function updateSourcesWithContent<T extends t.ValidSource>(
|
|
615
|
+
sources: T[],
|
|
616
|
+
sourceMap: Map<string, t.ValidSource>
|
|
617
|
+
): void {
|
|
618
|
+
for (let i = 0; i < sources.length; i++) {
|
|
619
|
+
const source = sources[i];
|
|
620
|
+
const updatedSource = sourceMap.get(source.link);
|
|
621
|
+
if (updatedSource) {
|
|
622
|
+
sources[i] = {
|
|
623
|
+
...source,
|
|
624
|
+
...updatedSource,
|
|
625
|
+
} as T;
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
}
|