@librechat/agents 2.4.320 → 2.4.321
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/tools/search/format.cjs +111 -80
- package/dist/cjs/tools/search/format.cjs.map +1 -1
- package/dist/cjs/tools/search/search.cjs +67 -31
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/esm/tools/search/format.mjs +111 -80
- package/dist/esm/tools/search/format.mjs.map +1 -1
- package/dist/esm/tools/search/search.mjs +67 -31
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/types/tools/search/types.d.ts +2 -0
- package/package.json +1 -1
- package/src/scripts/search.ts +4 -1
- package/src/tools/search/format.ts +149 -86
- package/src/tools/search/search.ts +95 -34
- package/src/tools/search/types.ts +2 -0
|
@@ -1,107 +1,165 @@
|
|
|
1
1
|
import type * as t from './types';
|
|
2
2
|
import { getDomainName } from './utils';
|
|
3
3
|
|
|
4
|
+
function addHighlightSection(): string[] {
|
|
5
|
+
return ['\n## Highlights', ''];
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
// Helper function to format a source (organic or top story)
|
|
9
|
+
function formatSource(
|
|
10
|
+
source: t.ValidSource,
|
|
11
|
+
index: number,
|
|
12
|
+
turn: number,
|
|
13
|
+
sourceType: 'search' | 'news',
|
|
14
|
+
references: t.ResultReference[]
|
|
15
|
+
): string {
|
|
16
|
+
/** Array of all lines to include in the output */
|
|
17
|
+
const outputLines: string[] = [];
|
|
18
|
+
|
|
19
|
+
// Add the title
|
|
20
|
+
outputLines.push(
|
|
21
|
+
`# ${sourceType.charAt(0).toUpperCase() + sourceType.slice(1)} ${index}: ${source.title != null && source.title ? `"${source.title}"` : '(no title)'}`
|
|
22
|
+
);
|
|
23
|
+
outputLines.push(`\nAnchor: \\ue202turn${turn}${sourceType}${index}`);
|
|
24
|
+
outputLines.push(`URL: ${source.link}`);
|
|
25
|
+
|
|
26
|
+
// Add optional fields
|
|
27
|
+
if ('snippet' in source && source.snippet != null) {
|
|
28
|
+
outputLines.push(`Summary: ${source.snippet}`);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
if (source.date != null) {
|
|
32
|
+
outputLines.push(`Date: ${source.date}`);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (source.attribution != null) {
|
|
36
|
+
outputLines.push(`Source: ${source.attribution}`);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Add highlight section or empty line
|
|
40
|
+
if ((source.highlights?.length ?? 0) > 0) {
|
|
41
|
+
outputLines.push(...addHighlightSection());
|
|
42
|
+
} else {
|
|
43
|
+
outputLines.push('');
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// Process highlights if they exist
|
|
47
|
+
(source.highlights ?? [])
|
|
48
|
+
.filter((h) => h.text.trim().length > 0)
|
|
49
|
+
.forEach((h, hIndex) => {
|
|
50
|
+
outputLines.push(
|
|
51
|
+
`### Highlight ${hIndex + 1} [Relevance: ${h.score.toFixed(2)}]`
|
|
52
|
+
);
|
|
53
|
+
outputLines.push('');
|
|
54
|
+
outputLines.push('```text');
|
|
55
|
+
outputLines.push(h.text.trim());
|
|
56
|
+
outputLines.push('```');
|
|
57
|
+
outputLines.push('');
|
|
58
|
+
|
|
59
|
+
if (h.references != null && h.references.length) {
|
|
60
|
+
let hasHeader = false;
|
|
61
|
+
const refLines: string[] = [];
|
|
62
|
+
|
|
63
|
+
for (let j = 0; j < h.references.length; j++) {
|
|
64
|
+
const ref = h.references[j];
|
|
65
|
+
references.push({
|
|
66
|
+
type: ref.type,
|
|
67
|
+
link: ref.reference.originalUrl,
|
|
68
|
+
attribution: getDomainName(ref.reference.originalUrl),
|
|
69
|
+
title: (
|
|
70
|
+
((ref.reference.title ?? '') || ref.reference.text) ??
|
|
71
|
+
''
|
|
72
|
+
).split('\n')[0],
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
if (ref.type !== 'link') {
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (!hasHeader) {
|
|
80
|
+
refLines.push('Core References:');
|
|
81
|
+
hasHeader = true;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
refLines.push(
|
|
85
|
+
`- ${ref.type}#${ref.originalIndex + 1}: ${ref.reference.originalUrl}`
|
|
86
|
+
);
|
|
87
|
+
refLines.push(
|
|
88
|
+
`\t- Anchor: \\ue202turn${turn}ref${references.length - 1}`
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
if (hasHeader) {
|
|
93
|
+
outputLines.push(...refLines);
|
|
94
|
+
outputLines.push('');
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (hIndex < (source.highlights?.length ?? 0) - 1) {
|
|
99
|
+
outputLines.push('---');
|
|
100
|
+
outputLines.push('');
|
|
101
|
+
}
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
outputLines.push('');
|
|
105
|
+
return outputLines.join('\n');
|
|
106
|
+
}
|
|
107
|
+
|
|
4
108
|
export function formatResultsForLLM(
|
|
5
109
|
turn: number,
|
|
6
110
|
results: t.SearchResultData
|
|
7
111
|
): { output: string; references: t.ResultReference[] } {
|
|
8
|
-
|
|
112
|
+
/** Array to collect all output lines */
|
|
113
|
+
const outputLines: string[] = [];
|
|
9
114
|
|
|
10
115
|
const addSection = (title: string): void => {
|
|
11
|
-
|
|
116
|
+
outputLines.push('');
|
|
117
|
+
outputLines.push(`=== ${title} ===`);
|
|
118
|
+
outputLines.push('');
|
|
12
119
|
};
|
|
13
120
|
|
|
14
121
|
const references: t.ResultReference[] = [];
|
|
122
|
+
|
|
15
123
|
// Organic (web) results
|
|
16
124
|
if (results.organic?.length != null && results.organic.length > 0) {
|
|
17
125
|
addSection(`Web Results, Turn ${turn}`);
|
|
18
126
|
for (let i = 0; i < results.organic.length; i++) {
|
|
19
127
|
const r = results.organic[i];
|
|
20
|
-
|
|
21
|
-
`# Source ${i}: "${r.title ?? '(no title)'}"`,
|
|
22
|
-
`Anchor: \\ue202turn${turn}search${i}`,
|
|
23
|
-
`URL: ${r.link}`,
|
|
24
|
-
r.snippet != null ? `Summary: ${r.snippet}` : '',
|
|
25
|
-
r.date != null ? `Date: ${r.date}` : '',
|
|
26
|
-
r.attribution != null ? `Source: ${r.attribution}` : '',
|
|
27
|
-
'',
|
|
28
|
-
'\n## Highlights\n\n',
|
|
29
|
-
'',
|
|
30
|
-
'',
|
|
31
|
-
]
|
|
32
|
-
.filter(Boolean)
|
|
33
|
-
.join('\n');
|
|
34
|
-
|
|
35
|
-
(r.highlights ?? [])
|
|
36
|
-
.filter((h) => h.text.trim().length > 0)
|
|
37
|
-
.forEach((h, hIndex) => {
|
|
38
|
-
output += `### Highlight ${hIndex + 1} [Relevance: ${h.score.toFixed(2)}]\n\n`;
|
|
39
|
-
output += '```text\n' + h.text.trim() + '\n```\n\n';
|
|
40
|
-
|
|
41
|
-
if (h.references != null && h.references.length) {
|
|
42
|
-
output += 'Core References:\n';
|
|
43
|
-
output += h.references
|
|
44
|
-
.map((ref) => {
|
|
45
|
-
references.push({
|
|
46
|
-
link: ref.reference.originalUrl,
|
|
47
|
-
attribution: getDomainName(ref.reference.originalUrl),
|
|
48
|
-
title: (
|
|
49
|
-
((ref.reference.title ?? '') || ref.reference.text) ??
|
|
50
|
-
''
|
|
51
|
-
).split('\n')[0],
|
|
52
|
-
});
|
|
53
|
-
return `- ${ref.type}#${ref.originalIndex + 1}: ${ref.reference.originalUrl}\n\t- Anchor: \\ue202turn${turn}ref${references.length - 1}`;
|
|
54
|
-
})
|
|
55
|
-
.join('\n');
|
|
56
|
-
output += '\n\n';
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
if (hIndex < (r.highlights?.length ?? 0) - 1) {
|
|
60
|
-
output += '---\n\n';
|
|
61
|
-
}
|
|
62
|
-
});
|
|
63
|
-
|
|
128
|
+
outputLines.push(formatSource(r, i, turn, 'search', references));
|
|
64
129
|
delete results.organic[i].highlights;
|
|
65
|
-
output += '\n';
|
|
66
130
|
}
|
|
67
131
|
}
|
|
68
132
|
|
|
69
|
-
//
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
// r.attribution != null ? `Source: ${r.attribution}` : '',
|
|
82
|
-
// ''
|
|
83
|
-
// ].filter(Boolean).join('\n');
|
|
84
|
-
// });
|
|
85
|
-
// }
|
|
133
|
+
// Top stories (news)
|
|
134
|
+
const topStories = results.topStories ?? [];
|
|
135
|
+
if (topStories.length) {
|
|
136
|
+
addSection('News Results');
|
|
137
|
+
for (let i = 0; i < topStories.length; i++) {
|
|
138
|
+
const r = topStories[i];
|
|
139
|
+
outputLines.push(formatSource(r, i, turn, 'news', references));
|
|
140
|
+
if (results.topStories?.[i]?.highlights) {
|
|
141
|
+
delete results.topStories[i].highlights;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
86
145
|
|
|
87
146
|
// // Images
|
|
88
147
|
// const images = results.images ?? [];
|
|
89
148
|
// if (images.length) {
|
|
90
149
|
// addSection('Image Results');
|
|
91
|
-
// images.
|
|
92
|
-
//
|
|
93
|
-
//
|
|
94
|
-
//
|
|
95
|
-
//
|
|
96
|
-
//
|
|
97
|
-
//
|
|
98
|
-
// });
|
|
150
|
+
// const imageLines = images.map((img, i) => [
|
|
151
|
+
// `Anchor: \ue202turn0image${i}`,
|
|
152
|
+
// `Title: ${img.title ?? '(no title)'}`,
|
|
153
|
+
// `Image URL: ${img.imageUrl}`,
|
|
154
|
+
// ''
|
|
155
|
+
// ].join('\n'));
|
|
156
|
+
// outputLines.push(imageLines.join('\n'));
|
|
99
157
|
// }
|
|
100
158
|
|
|
101
159
|
// Knowledge Graph
|
|
102
160
|
if (results.knowledgeGraph != null) {
|
|
103
161
|
addSection('Knowledge Graph');
|
|
104
|
-
|
|
162
|
+
const kgLines = [
|
|
105
163
|
`**Title:** ${results.knowledgeGraph.title ?? '(no title)'}`,
|
|
106
164
|
results.knowledgeGraph.type != null
|
|
107
165
|
? `**Type:** ${results.knowledgeGraph.type}`
|
|
@@ -129,15 +187,15 @@ export function formatResultsForLLM(
|
|
|
129
187
|
)}\n\`\`\``
|
|
130
188
|
: '',
|
|
131
189
|
'',
|
|
132
|
-
]
|
|
133
|
-
|
|
134
|
-
|
|
190
|
+
].filter(Boolean);
|
|
191
|
+
|
|
192
|
+
outputLines.push(kgLines.join('\n\n'));
|
|
135
193
|
}
|
|
136
194
|
|
|
137
195
|
// Answer Box
|
|
138
196
|
if (results.answerBox != null) {
|
|
139
197
|
addSection('Answer Box');
|
|
140
|
-
|
|
198
|
+
const abLines = [
|
|
141
199
|
results.answerBox.title != null
|
|
142
200
|
? `**Title:** ${results.answerBox.title}`
|
|
143
201
|
: '',
|
|
@@ -153,30 +211,35 @@ export function formatResultsForLLM(
|
|
|
153
211
|
? `**Link:** ${results.answerBox.link}`
|
|
154
212
|
: '',
|
|
155
213
|
'',
|
|
156
|
-
]
|
|
157
|
-
|
|
158
|
-
|
|
214
|
+
].filter(Boolean);
|
|
215
|
+
|
|
216
|
+
outputLines.push(abLines.join('\n\n'));
|
|
159
217
|
}
|
|
160
218
|
|
|
161
219
|
// People also ask
|
|
162
220
|
const peopleAlsoAsk = results.peopleAlsoAsk ?? [];
|
|
163
221
|
if (peopleAlsoAsk.length) {
|
|
164
222
|
addSection('People Also Ask');
|
|
223
|
+
|
|
224
|
+
const paaLines: string[] = [];
|
|
165
225
|
peopleAlsoAsk.forEach((p, i) => {
|
|
166
|
-
|
|
226
|
+
const questionLines = [
|
|
167
227
|
`### Question ${i + 1}:`,
|
|
168
228
|
`"${p.question}"`,
|
|
169
|
-
`${p.snippet != null && p.snippet ? `Snippet: ${p.snippet}
|
|
229
|
+
`${p.snippet != null && p.snippet ? `Snippet: ${p.snippet}` : ''}`,
|
|
170
230
|
`${p.title != null && p.title ? `Title: ${p.title}` : ''}`,
|
|
171
231
|
`${p.link != null && p.link ? `Link: ${p.link}` : ''}`,
|
|
172
232
|
'',
|
|
173
|
-
]
|
|
174
|
-
|
|
175
|
-
|
|
233
|
+
].filter(Boolean);
|
|
234
|
+
|
|
235
|
+
paaLines.push(questionLines.join('\n\n'));
|
|
176
236
|
});
|
|
237
|
+
|
|
238
|
+
outputLines.push(paaLines.join(''));
|
|
177
239
|
}
|
|
240
|
+
|
|
178
241
|
return {
|
|
179
|
-
output:
|
|
242
|
+
output: outputLines.join('\n').trim(),
|
|
180
243
|
references,
|
|
181
244
|
};
|
|
182
245
|
}
|
|
@@ -365,7 +365,7 @@ export const createSourceProcessor = (
|
|
|
365
365
|
url,
|
|
366
366
|
attribution,
|
|
367
367
|
error: true,
|
|
368
|
-
content:
|
|
368
|
+
content: '',
|
|
369
369
|
} as t.ScrapeResult;
|
|
370
370
|
})
|
|
371
371
|
.then(async (result) => {
|
|
@@ -402,7 +402,7 @@ export const createSourceProcessor = (
|
|
|
402
402
|
return {
|
|
403
403
|
url: currentLink,
|
|
404
404
|
error: true,
|
|
405
|
-
content:
|
|
405
|
+
content: '',
|
|
406
406
|
};
|
|
407
407
|
});
|
|
408
408
|
promises.push(promise);
|
|
@@ -503,49 +503,67 @@ export const createSourceProcessor = (
|
|
|
503
503
|
}
|
|
504
504
|
|
|
505
505
|
const sourceMap = new Map<string, t.ValidSource>();
|
|
506
|
-
const
|
|
506
|
+
const organicLinksSet = new Set<string>();
|
|
507
507
|
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
508
|
+
// Collect organic links
|
|
509
|
+
const organicLinks = collectLinks(
|
|
510
|
+
result.data.organic,
|
|
511
|
+
sourceMap,
|
|
512
|
+
organicLinksSet
|
|
513
|
+
);
|
|
514
514
|
|
|
515
|
-
|
|
515
|
+
// Collect top story links, excluding any that are already in organic links
|
|
516
|
+
const topStories = result.data.topStories ?? [];
|
|
517
|
+
const topStoryLinks = collectLinks(
|
|
518
|
+
topStories,
|
|
519
|
+
sourceMap,
|
|
520
|
+
organicLinksSet
|
|
521
|
+
);
|
|
522
|
+
|
|
523
|
+
if (organicLinks.length === 0 && topStoryLinks.length === 0) {
|
|
516
524
|
return result.data;
|
|
517
525
|
}
|
|
518
526
|
|
|
519
527
|
const onContentScraped = createSourceUpdateCallback(sourceMap);
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
528
|
+
const promises: Promise<void>[] = [];
|
|
529
|
+
|
|
530
|
+
// Process organic links
|
|
531
|
+
if (organicLinks.length > 0) {
|
|
532
|
+
promises.push(
|
|
533
|
+
fetchContents({
|
|
534
|
+
query,
|
|
535
|
+
onGetHighlights,
|
|
536
|
+
onContentScraped,
|
|
537
|
+
links: organicLinks,
|
|
538
|
+
target: numElements,
|
|
539
|
+
})
|
|
540
|
+
);
|
|
541
|
+
}
|
|
527
542
|
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
543
|
+
// Process top story links
|
|
544
|
+
if (topStoryLinks.length > 0) {
|
|
545
|
+
promises.push(
|
|
546
|
+
fetchContents({
|
|
547
|
+
query,
|
|
548
|
+
onGetHighlights,
|
|
549
|
+
onContentScraped,
|
|
550
|
+
links: topStoryLinks,
|
|
551
|
+
target: numElements,
|
|
552
|
+
})
|
|
553
|
+
);
|
|
537
554
|
}
|
|
538
555
|
|
|
539
|
-
|
|
540
|
-
.filter(
|
|
541
|
-
(source) =>
|
|
542
|
-
source.content != null && !source.content.startsWith('Failed')
|
|
543
|
-
)
|
|
544
|
-
.slice(0, numElements);
|
|
556
|
+
await Promise.all(promises);
|
|
545
557
|
|
|
546
|
-
|
|
547
|
-
|
|
558
|
+
// Update sources with scraped content
|
|
559
|
+
if (result.data.organic.length > 0) {
|
|
560
|
+
updateSourcesWithContent(result.data.organic, sourceMap);
|
|
548
561
|
}
|
|
562
|
+
|
|
563
|
+
if (topStories.length > 0) {
|
|
564
|
+
updateSourcesWithContent(topStories, sourceMap);
|
|
565
|
+
}
|
|
566
|
+
|
|
549
567
|
return result.data;
|
|
550
568
|
} catch (error) {
|
|
551
569
|
console.error('Error in processSources:', error);
|
|
@@ -565,3 +583,46 @@ export const createSourceProcessor = (
|
|
|
565
583
|
topResults,
|
|
566
584
|
};
|
|
567
585
|
};
|
|
586
|
+
|
|
587
|
+
/** Helper function to collect links and update sourceMap */
|
|
588
|
+
function collectLinks(
|
|
589
|
+
sources: Array<t.OrganicResult | t.TopStoryResult>,
|
|
590
|
+
sourceMap: Map<string, t.ValidSource>,
|
|
591
|
+
existingLinksSet?: Set<string>
|
|
592
|
+
): string[] {
|
|
593
|
+
const links: string[] = [];
|
|
594
|
+
|
|
595
|
+
for (const source of sources) {
|
|
596
|
+
if (source.link) {
|
|
597
|
+
// For topStories, only add if not already in organic links
|
|
598
|
+
if (existingLinksSet && existingLinksSet.has(source.link)) {
|
|
599
|
+
continue;
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
links.push(source.link);
|
|
603
|
+
if (existingLinksSet) {
|
|
604
|
+
existingLinksSet.add(source.link);
|
|
605
|
+
}
|
|
606
|
+
sourceMap.set(source.link, source as t.ValidSource);
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
return links;
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
/** Helper function to update sources with scraped content */
|
|
614
|
+
function updateSourcesWithContent<T extends t.ValidSource>(
|
|
615
|
+
sources: T[],
|
|
616
|
+
sourceMap: Map<string, t.ValidSource>
|
|
617
|
+
): void {
|
|
618
|
+
for (let i = 0; i < sources.length; i++) {
|
|
619
|
+
const source = sources[i];
|
|
620
|
+
const updatedSource = sourceMap.get(source.link);
|
|
621
|
+
if (updatedSource) {
|
|
622
|
+
sources[i] = {
|
|
623
|
+
...source,
|
|
624
|
+
...updatedSource,
|
|
625
|
+
} as T;
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
}
|
|
@@ -16,6 +16,7 @@ export type ProcessedSource = {
|
|
|
16
16
|
attribution?: string;
|
|
17
17
|
references?: References;
|
|
18
18
|
highlights?: Highlight[];
|
|
19
|
+
processed?: boolean;
|
|
19
20
|
};
|
|
20
21
|
|
|
21
22
|
export type ProcessedOrganic = OrganicResult & ProcessedSource;
|
|
@@ -24,6 +25,7 @@ export type ValidSource = ProcessedOrganic | ProcessedTopStory;
|
|
|
24
25
|
|
|
25
26
|
export type ResultReference = {
|
|
26
27
|
link: string;
|
|
28
|
+
type: 'link' | 'image' | 'video';
|
|
27
29
|
title?: string;
|
|
28
30
|
attribution?: string;
|
|
29
31
|
};
|