@librechat/agents 2.4.320 → 2.4.321

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,107 +1,165 @@
1
1
  import type * as t from './types';
2
2
  import { getDomainName } from './utils';
3
3
 
4
+ function addHighlightSection(): string[] {
5
+ return ['\n## Highlights', ''];
6
+ }
7
+
8
+ // Helper function to format a source (organic or top story)
9
+ function formatSource(
10
+ source: t.ValidSource,
11
+ index: number,
12
+ turn: number,
13
+ sourceType: 'search' | 'news',
14
+ references: t.ResultReference[]
15
+ ): string {
16
+ /** Array of all lines to include in the output */
17
+ const outputLines: string[] = [];
18
+
19
+ // Add the title
20
+ outputLines.push(
21
+ `# ${sourceType.charAt(0).toUpperCase() + sourceType.slice(1)} ${index}: ${source.title != null && source.title ? `"${source.title}"` : '(no title)'}`
22
+ );
23
+ outputLines.push(`\nAnchor: \\ue202turn${turn}${sourceType}${index}`);
24
+ outputLines.push(`URL: ${source.link}`);
25
+
26
+ // Add optional fields
27
+ if ('snippet' in source && source.snippet != null) {
28
+ outputLines.push(`Summary: ${source.snippet}`);
29
+ }
30
+
31
+ if (source.date != null) {
32
+ outputLines.push(`Date: ${source.date}`);
33
+ }
34
+
35
+ if (source.attribution != null) {
36
+ outputLines.push(`Source: ${source.attribution}`);
37
+ }
38
+
39
+ // Add highlight section or empty line
40
+ if ((source.highlights?.length ?? 0) > 0) {
41
+ outputLines.push(...addHighlightSection());
42
+ } else {
43
+ outputLines.push('');
44
+ }
45
+
46
+ // Process highlights if they exist
47
+ (source.highlights ?? [])
48
+ .filter((h) => h.text.trim().length > 0)
49
+ .forEach((h, hIndex) => {
50
+ outputLines.push(
51
+ `### Highlight ${hIndex + 1} [Relevance: ${h.score.toFixed(2)}]`
52
+ );
53
+ outputLines.push('');
54
+ outputLines.push('```text');
55
+ outputLines.push(h.text.trim());
56
+ outputLines.push('```');
57
+ outputLines.push('');
58
+
59
+ if (h.references != null && h.references.length) {
60
+ let hasHeader = false;
61
+ const refLines: string[] = [];
62
+
63
+ for (let j = 0; j < h.references.length; j++) {
64
+ const ref = h.references[j];
65
+ references.push({
66
+ type: ref.type,
67
+ link: ref.reference.originalUrl,
68
+ attribution: getDomainName(ref.reference.originalUrl),
69
+ title: (
70
+ ((ref.reference.title ?? '') || ref.reference.text) ??
71
+ ''
72
+ ).split('\n')[0],
73
+ });
74
+
75
+ if (ref.type !== 'link') {
76
+ continue;
77
+ }
78
+
79
+ if (!hasHeader) {
80
+ refLines.push('Core References:');
81
+ hasHeader = true;
82
+ }
83
+
84
+ refLines.push(
85
+ `- ${ref.type}#${ref.originalIndex + 1}: ${ref.reference.originalUrl}`
86
+ );
87
+ refLines.push(
88
+ `\t- Anchor: \\ue202turn${turn}ref${references.length - 1}`
89
+ );
90
+ }
91
+
92
+ if (hasHeader) {
93
+ outputLines.push(...refLines);
94
+ outputLines.push('');
95
+ }
96
+ }
97
+
98
+ if (hIndex < (source.highlights?.length ?? 0) - 1) {
99
+ outputLines.push('---');
100
+ outputLines.push('');
101
+ }
102
+ });
103
+
104
+ outputLines.push('');
105
+ return outputLines.join('\n');
106
+ }
107
+
4
108
  export function formatResultsForLLM(
5
109
  turn: number,
6
110
  results: t.SearchResultData
7
111
  ): { output: string; references: t.ResultReference[] } {
8
- let output = '';
112
+ /** Array to collect all output lines */
113
+ const outputLines: string[] = [];
9
114
 
10
115
  const addSection = (title: string): void => {
11
- output += `\n=== ${title} ===\n`;
116
+ outputLines.push('');
117
+ outputLines.push(`=== ${title} ===`);
118
+ outputLines.push('');
12
119
  };
13
120
 
14
121
  const references: t.ResultReference[] = [];
122
+
15
123
  // Organic (web) results
16
124
  if (results.organic?.length != null && results.organic.length > 0) {
17
125
  addSection(`Web Results, Turn ${turn}`);
18
126
  for (let i = 0; i < results.organic.length; i++) {
19
127
  const r = results.organic[i];
20
- output += [
21
- `# Source ${i}: "${r.title ?? '(no title)'}"`,
22
- `Anchor: \\ue202turn${turn}search${i}`,
23
- `URL: ${r.link}`,
24
- r.snippet != null ? `Summary: ${r.snippet}` : '',
25
- r.date != null ? `Date: ${r.date}` : '',
26
- r.attribution != null ? `Source: ${r.attribution}` : '',
27
- '',
28
- '\n## Highlights\n\n',
29
- '',
30
- '',
31
- ]
32
- .filter(Boolean)
33
- .join('\n');
34
-
35
- (r.highlights ?? [])
36
- .filter((h) => h.text.trim().length > 0)
37
- .forEach((h, hIndex) => {
38
- output += `### Highlight ${hIndex + 1} [Relevance: ${h.score.toFixed(2)}]\n\n`;
39
- output += '```text\n' + h.text.trim() + '\n```\n\n';
40
-
41
- if (h.references != null && h.references.length) {
42
- output += 'Core References:\n';
43
- output += h.references
44
- .map((ref) => {
45
- references.push({
46
- link: ref.reference.originalUrl,
47
- attribution: getDomainName(ref.reference.originalUrl),
48
- title: (
49
- ((ref.reference.title ?? '') || ref.reference.text) ??
50
- ''
51
- ).split('\n')[0],
52
- });
53
- return `- ${ref.type}#${ref.originalIndex + 1}: ${ref.reference.originalUrl}\n\t- Anchor: \\ue202turn${turn}ref${references.length - 1}`;
54
- })
55
- .join('\n');
56
- output += '\n\n';
57
- }
58
-
59
- if (hIndex < (r.highlights?.length ?? 0) - 1) {
60
- output += '---\n\n';
61
- }
62
- });
63
-
128
+ outputLines.push(formatSource(r, i, turn, 'search', references));
64
129
  delete results.organic[i].highlights;
65
- output += '\n';
66
130
  }
67
131
  }
68
132
 
69
- // Ignoring these sections for now
70
- // // Top stories (news)
71
- // const topStores = results.topStories ?? [];
72
- // if (topStores.length) {
73
- // addSection('News Results');
74
- // topStores.forEach((r, i) => {
75
- // output += [
76
- // `Anchor: \ue202turn0news${i}`,
77
- // `Title: ${r.title ?? '(no title)'}`,
78
- // `URL: ${r.link}`,
79
- // r.snippet != null ? `Snippet: ${r.snippet}` : '',
80
- // r.date != null ? `Date: ${r.date}` : '',
81
- // r.attribution != null ? `Source: ${r.attribution}` : '',
82
- // ''
83
- // ].filter(Boolean).join('\n');
84
- // });
85
- // }
133
+ // Top stories (news)
134
+ const topStories = results.topStories ?? [];
135
+ if (topStories.length) {
136
+ addSection('News Results');
137
+ for (let i = 0; i < topStories.length; i++) {
138
+ const r = topStories[i];
139
+ outputLines.push(formatSource(r, i, turn, 'news', references));
140
+ if (results.topStories?.[i]?.highlights) {
141
+ delete results.topStories[i].highlights;
142
+ }
143
+ }
144
+ }
86
145
 
87
146
  // // Images
88
147
  // const images = results.images ?? [];
89
148
  // if (images.length) {
90
149
  // addSection('Image Results');
91
- // images.forEach((img, i) => {
92
- // output += [
93
- // `Anchor: \ue202turn0image${i}`,
94
- // `Title: ${img.title ?? '(no title)'}`,
95
- // `Image URL: ${img.imageUrl}`,
96
- // ''
97
- // ].join('\n');
98
- // });
150
+ // const imageLines = images.map((img, i) => [
151
+ // `Anchor: \ue202turn0image${i}`,
152
+ // `Title: ${img.title ?? '(no title)'}`,
153
+ // `Image URL: ${img.imageUrl}`,
154
+ // ''
155
+ // ].join('\n'));
156
+ // outputLines.push(imageLines.join('\n'));
99
157
  // }
100
158
 
101
159
  // Knowledge Graph
102
160
  if (results.knowledgeGraph != null) {
103
161
  addSection('Knowledge Graph');
104
- output += [
162
+ const kgLines = [
105
163
  `**Title:** ${results.knowledgeGraph.title ?? '(no title)'}`,
106
164
  results.knowledgeGraph.type != null
107
165
  ? `**Type:** ${results.knowledgeGraph.type}`
@@ -129,15 +187,15 @@ export function formatResultsForLLM(
129
187
  )}\n\`\`\``
130
188
  : '',
131
189
  '',
132
- ]
133
- .filter(Boolean)
134
- .join('\n\n');
190
+ ].filter(Boolean);
191
+
192
+ outputLines.push(kgLines.join('\n\n'));
135
193
  }
136
194
 
137
195
  // Answer Box
138
196
  if (results.answerBox != null) {
139
197
  addSection('Answer Box');
140
- output += [
198
+ const abLines = [
141
199
  results.answerBox.title != null
142
200
  ? `**Title:** ${results.answerBox.title}`
143
201
  : '',
@@ -153,30 +211,35 @@ export function formatResultsForLLM(
153
211
  ? `**Link:** ${results.answerBox.link}`
154
212
  : '',
155
213
  '',
156
- ]
157
- .filter(Boolean)
158
- .join('\n\n');
214
+ ].filter(Boolean);
215
+
216
+ outputLines.push(abLines.join('\n\n'));
159
217
  }
160
218
 
161
219
  // People also ask
162
220
  const peopleAlsoAsk = results.peopleAlsoAsk ?? [];
163
221
  if (peopleAlsoAsk.length) {
164
222
  addSection('People Also Ask');
223
+
224
+ const paaLines: string[] = [];
165
225
  peopleAlsoAsk.forEach((p, i) => {
166
- output += [
226
+ const questionLines = [
167
227
  `### Question ${i + 1}:`,
168
228
  `"${p.question}"`,
169
- `${p.snippet != null && p.snippet ? `Snippet: ${p.snippet}}` : ''}`,
229
+ `${p.snippet != null && p.snippet ? `Snippet: ${p.snippet}` : ''}`,
170
230
  `${p.title != null && p.title ? `Title: ${p.title}` : ''}`,
171
231
  `${p.link != null && p.link ? `Link: ${p.link}` : ''}`,
172
232
  '',
173
- ]
174
- .filter(Boolean)
175
- .join('\n\n');
233
+ ].filter(Boolean);
234
+
235
+ paaLines.push(questionLines.join('\n\n'));
176
236
  });
237
+
238
+ outputLines.push(paaLines.join(''));
177
239
  }
240
+
178
241
  return {
179
- output: output.trim(),
242
+ output: outputLines.join('\n').trim(),
180
243
  references,
181
244
  };
182
245
  }
@@ -365,7 +365,7 @@ export const createSourceProcessor = (
365
365
  url,
366
366
  attribution,
367
367
  error: true,
368
- content: `Failed to scrape ${url}: ${response.error ?? 'Unknown error'}`,
368
+ content: '',
369
369
  } as t.ScrapeResult;
370
370
  })
371
371
  .then(async (result) => {
@@ -402,7 +402,7 @@ export const createSourceProcessor = (
402
402
  return {
403
403
  url: currentLink,
404
404
  error: true,
405
- content: `Failed to scrape ${currentLink}: ${error.message ?? 'Unknown error'}`,
405
+ content: '',
406
406
  };
407
407
  });
408
408
  promises.push(promise);
@@ -503,49 +503,67 @@ export const createSourceProcessor = (
503
503
  }
504
504
 
505
505
  const sourceMap = new Map<string, t.ValidSource>();
506
- const allLinks: string[] = [];
506
+ const organicLinksSet = new Set<string>();
507
507
 
508
- for (const source of result.data.organic) {
509
- if (source.link) {
510
- allLinks.push(source.link);
511
- sourceMap.set(source.link, source);
512
- }
513
- }
508
+ // Collect organic links
509
+ const organicLinks = collectLinks(
510
+ result.data.organic,
511
+ sourceMap,
512
+ organicLinksSet
513
+ );
514
514
 
515
- if (allLinks.length === 0) {
515
+ // Collect top story links, excluding any that are already in organic links
516
+ const topStories = result.data.topStories ?? [];
517
+ const topStoryLinks = collectLinks(
518
+ topStories,
519
+ sourceMap,
520
+ organicLinksSet
521
+ );
522
+
523
+ if (organicLinks.length === 0 && topStoryLinks.length === 0) {
516
524
  return result.data;
517
525
  }
518
526
 
519
527
  const onContentScraped = createSourceUpdateCallback(sourceMap);
520
- await fetchContents({
521
- query,
522
- links: allLinks,
523
- onGetHighlights,
524
- onContentScraped,
525
- target: numElements,
526
- });
528
+ const promises: Promise<void>[] = [];
529
+
530
+ // Process organic links
531
+ if (organicLinks.length > 0) {
532
+ promises.push(
533
+ fetchContents({
534
+ query,
535
+ onGetHighlights,
536
+ onContentScraped,
537
+ links: organicLinks,
538
+ target: numElements,
539
+ })
540
+ );
541
+ }
527
542
 
528
- for (let i = 0; i < result.data.organic.length; i++) {
529
- const source = result.data.organic[i];
530
- const updatedSource = sourceMap.get(source.link);
531
- if (updatedSource) {
532
- result.data.organic[i] = {
533
- ...source,
534
- ...updatedSource,
535
- };
536
- }
543
+ // Process top story links
544
+ if (topStoryLinks.length > 0) {
545
+ promises.push(
546
+ fetchContents({
547
+ query,
548
+ onGetHighlights,
549
+ onContentScraped,
550
+ links: topStoryLinks,
551
+ target: numElements,
552
+ })
553
+ );
537
554
  }
538
555
 
539
- const successfulSources = result.data.organic
540
- .filter(
541
- (source) =>
542
- source.content != null && !source.content.startsWith('Failed')
543
- )
544
- .slice(0, numElements);
556
+ await Promise.all(promises);
545
557
 
546
- if (successfulSources.length > 0) {
547
- result.data.organic = successfulSources;
558
+ // Update sources with scraped content
559
+ if (result.data.organic.length > 0) {
560
+ updateSourcesWithContent(result.data.organic, sourceMap);
548
561
  }
562
+
563
+ if (topStories.length > 0) {
564
+ updateSourcesWithContent(topStories, sourceMap);
565
+ }
566
+
549
567
  return result.data;
550
568
  } catch (error) {
551
569
  console.error('Error in processSources:', error);
@@ -565,3 +583,46 @@ export const createSourceProcessor = (
565
583
  topResults,
566
584
  };
567
585
  };
586
+
587
+ /** Helper function to collect links and update sourceMap */
588
+ function collectLinks(
589
+ sources: Array<t.OrganicResult | t.TopStoryResult>,
590
+ sourceMap: Map<string, t.ValidSource>,
591
+ existingLinksSet?: Set<string>
592
+ ): string[] {
593
+ const links: string[] = [];
594
+
595
+ for (const source of sources) {
596
+ if (source.link) {
597
+ // For topStories, only add if not already in organic links
598
+ if (existingLinksSet && existingLinksSet.has(source.link)) {
599
+ continue;
600
+ }
601
+
602
+ links.push(source.link);
603
+ if (existingLinksSet) {
604
+ existingLinksSet.add(source.link);
605
+ }
606
+ sourceMap.set(source.link, source as t.ValidSource);
607
+ }
608
+ }
609
+
610
+ return links;
611
+ }
612
+
613
+ /** Helper function to update sources with scraped content */
614
+ function updateSourcesWithContent<T extends t.ValidSource>(
615
+ sources: T[],
616
+ sourceMap: Map<string, t.ValidSource>
617
+ ): void {
618
+ for (let i = 0; i < sources.length; i++) {
619
+ const source = sources[i];
620
+ const updatedSource = sourceMap.get(source.link);
621
+ if (updatedSource) {
622
+ sources[i] = {
623
+ ...source,
624
+ ...updatedSource,
625
+ } as T;
626
+ }
627
+ }
628
+ }
@@ -16,6 +16,7 @@ export type ProcessedSource = {
16
16
  attribution?: string;
17
17
  references?: References;
18
18
  highlights?: Highlight[];
19
+ processed?: boolean;
19
20
  };
20
21
 
21
22
  export type ProcessedOrganic = OrganicResult & ProcessedSource;
@@ -24,6 +25,7 @@ export type ValidSource = ProcessedOrganic | ProcessedTopStory;
24
25
 
25
26
  export type ResultReference = {
26
27
  link: string;
28
+ type: 'link' | 'image' | 'video';
27
29
  title?: string;
28
30
  attribution?: string;
29
31
  };