@librechat/agents 2.4.319 → 2.4.321

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,107 +1,165 @@
1
1
  import type * as t from './types';
2
2
  import { getDomainName } from './utils';
3
3
 
4
+ function addHighlightSection(): string[] {
5
+ return ['\n## Highlights', ''];
6
+ }
7
+
8
+ // Helper function to format a source (organic or top story)
9
+ function formatSource(
10
+ source: t.ValidSource,
11
+ index: number,
12
+ turn: number,
13
+ sourceType: 'search' | 'news',
14
+ references: t.ResultReference[]
15
+ ): string {
16
+ /** Array of all lines to include in the output */
17
+ const outputLines: string[] = [];
18
+
19
+ // Add the title
20
+ outputLines.push(
21
+ `# ${sourceType.charAt(0).toUpperCase() + sourceType.slice(1)} ${index}: ${source.title != null && source.title ? `"${source.title}"` : '(no title)'}`
22
+ );
23
+ outputLines.push(`\nAnchor: \\ue202turn${turn}${sourceType}${index}`);
24
+ outputLines.push(`URL: ${source.link}`);
25
+
26
+ // Add optional fields
27
+ if ('snippet' in source && source.snippet != null) {
28
+ outputLines.push(`Summary: ${source.snippet}`);
29
+ }
30
+
31
+ if (source.date != null) {
32
+ outputLines.push(`Date: ${source.date}`);
33
+ }
34
+
35
+ if (source.attribution != null) {
36
+ outputLines.push(`Source: ${source.attribution}`);
37
+ }
38
+
39
+ // Add highlight section or empty line
40
+ if ((source.highlights?.length ?? 0) > 0) {
41
+ outputLines.push(...addHighlightSection());
42
+ } else {
43
+ outputLines.push('');
44
+ }
45
+
46
+ // Process highlights if they exist
47
+ (source.highlights ?? [])
48
+ .filter((h) => h.text.trim().length > 0)
49
+ .forEach((h, hIndex) => {
50
+ outputLines.push(
51
+ `### Highlight ${hIndex + 1} [Relevance: ${h.score.toFixed(2)}]`
52
+ );
53
+ outputLines.push('');
54
+ outputLines.push('```text');
55
+ outputLines.push(h.text.trim());
56
+ outputLines.push('```');
57
+ outputLines.push('');
58
+
59
+ if (h.references != null && h.references.length) {
60
+ let hasHeader = false;
61
+ const refLines: string[] = [];
62
+
63
+ for (let j = 0; j < h.references.length; j++) {
64
+ const ref = h.references[j];
65
+ references.push({
66
+ type: ref.type,
67
+ link: ref.reference.originalUrl,
68
+ attribution: getDomainName(ref.reference.originalUrl),
69
+ title: (
70
+ ((ref.reference.title ?? '') || ref.reference.text) ??
71
+ ''
72
+ ).split('\n')[0],
73
+ });
74
+
75
+ if (ref.type !== 'link') {
76
+ continue;
77
+ }
78
+
79
+ if (!hasHeader) {
80
+ refLines.push('Core References:');
81
+ hasHeader = true;
82
+ }
83
+
84
+ refLines.push(
85
+ `- ${ref.type}#${ref.originalIndex + 1}: ${ref.reference.originalUrl}`
86
+ );
87
+ refLines.push(
88
+ `\t- Anchor: \\ue202turn${turn}ref${references.length - 1}`
89
+ );
90
+ }
91
+
92
+ if (hasHeader) {
93
+ outputLines.push(...refLines);
94
+ outputLines.push('');
95
+ }
96
+ }
97
+
98
+ if (hIndex < (source.highlights?.length ?? 0) - 1) {
99
+ outputLines.push('---');
100
+ outputLines.push('');
101
+ }
102
+ });
103
+
104
+ outputLines.push('');
105
+ return outputLines.join('\n');
106
+ }
107
+
4
108
  export function formatResultsForLLM(
5
109
  turn: number,
6
110
  results: t.SearchResultData
7
111
  ): { output: string; references: t.ResultReference[] } {
8
- let output = '';
112
+ /** Array to collect all output lines */
113
+ const outputLines: string[] = [];
9
114
 
10
115
  const addSection = (title: string): void => {
11
- output += `\n=== ${title} ===\n`;
116
+ outputLines.push('');
117
+ outputLines.push(`=== ${title} ===`);
118
+ outputLines.push('');
12
119
  };
13
120
 
14
121
  const references: t.ResultReference[] = [];
122
+
15
123
  // Organic (web) results
16
124
  if (results.organic?.length != null && results.organic.length > 0) {
17
125
  addSection(`Web Results, Turn ${turn}`);
18
126
  for (let i = 0; i < results.organic.length; i++) {
19
127
  const r = results.organic[i];
20
- output += [
21
- `# Source ${i}: "${r.title ?? '(no title)'}"`,
22
- `Anchor: \\ue202turn${turn}search${i}`,
23
- `URL: ${r.link}`,
24
- r.snippet != null ? `Summary: ${r.snippet}` : '',
25
- r.date != null ? `Date: ${r.date}` : '',
26
- r.attribution != null ? `Source: ${r.attribution}` : '',
27
- '',
28
- '\n## Highlights\n\n',
29
- '',
30
- '',
31
- ]
32
- .filter(Boolean)
33
- .join('\n');
34
-
35
- (r.highlights ?? [])
36
- .filter((h) => h.text.trim().length > 0)
37
- .forEach((h, hIndex) => {
38
- output += `### Highlight ${hIndex + 1} [Relevance: ${h.score.toFixed(2)}]\n\n`;
39
- output += '```text\n' + h.text.trim() + '\n```\n\n';
40
-
41
- if (h.references != null && h.references.length) {
42
- output += 'Core References:\n';
43
- output += h.references
44
- .map((ref) => {
45
- references.push({
46
- link: ref.reference.originalUrl,
47
- attribution: getDomainName(ref.reference.originalUrl),
48
- title: (
49
- ((ref.reference.title ?? '') || ref.reference.text) ??
50
- ''
51
- ).split('\n')[0],
52
- });
53
- return `- ${ref.type}#${ref.originalIndex + 1}: ${ref.reference.originalUrl}\n\t- Anchor: \\ue202turn${turn}ref${references.length - 1}`;
54
- })
55
- .join('\n');
56
- output += '\n\n';
57
- }
58
-
59
- if (hIndex < (r.highlights?.length ?? 0) - 1) {
60
- output += '---\n\n';
61
- }
62
- });
63
-
128
+ outputLines.push(formatSource(r, i, turn, 'search', references));
64
129
  delete results.organic[i].highlights;
65
- output += '\n';
66
130
  }
67
131
  }
68
132
 
69
- // Ignoring these sections for now
70
- // // Top stories (news)
71
- // const topStores = results.topStories ?? [];
72
- // if (topStores.length) {
73
- // addSection('News Results');
74
- // topStores.forEach((r, i) => {
75
- // output += [
76
- // `Anchor: \ue202turn0news${i}`,
77
- // `Title: ${r.title ?? '(no title)'}`,
78
- // `URL: ${r.link}`,
79
- // r.snippet != null ? `Snippet: ${r.snippet}` : '',
80
- // r.date != null ? `Date: ${r.date}` : '',
81
- // r.attribution != null ? `Source: ${r.attribution}` : '',
82
- // ''
83
- // ].filter(Boolean).join('\n');
84
- // });
85
- // }
133
+ // Top stories (news)
134
+ const topStories = results.topStories ?? [];
135
+ if (topStories.length) {
136
+ addSection('News Results');
137
+ for (let i = 0; i < topStories.length; i++) {
138
+ const r = topStories[i];
139
+ outputLines.push(formatSource(r, i, turn, 'news', references));
140
+ if (results.topStories?.[i]?.highlights) {
141
+ delete results.topStories[i].highlights;
142
+ }
143
+ }
144
+ }
86
145
 
87
146
  // // Images
88
147
  // const images = results.images ?? [];
89
148
  // if (images.length) {
90
149
  // addSection('Image Results');
91
- // images.forEach((img, i) => {
92
- // output += [
93
- // `Anchor: \ue202turn0image${i}`,
94
- // `Title: ${img.title ?? '(no title)'}`,
95
- // `Image URL: ${img.imageUrl}`,
96
- // ''
97
- // ].join('\n');
98
- // });
150
+ // const imageLines = images.map((img, i) => [
151
+ // `Anchor: \ue202turn0image${i}`,
152
+ // `Title: ${img.title ?? '(no title)'}`,
153
+ // `Image URL: ${img.imageUrl}`,
154
+ // ''
155
+ // ].join('\n'));
156
+ // outputLines.push(imageLines.join('\n'));
99
157
  // }
100
158
 
101
159
  // Knowledge Graph
102
160
  if (results.knowledgeGraph != null) {
103
161
  addSection('Knowledge Graph');
104
- output += [
162
+ const kgLines = [
105
163
  `**Title:** ${results.knowledgeGraph.title ?? '(no title)'}`,
106
164
  results.knowledgeGraph.type != null
107
165
  ? `**Type:** ${results.knowledgeGraph.type}`
@@ -129,15 +187,15 @@ export function formatResultsForLLM(
129
187
  )}\n\`\`\``
130
188
  : '',
131
189
  '',
132
- ]
133
- .filter(Boolean)
134
- .join('\n\n');
190
+ ].filter(Boolean);
191
+
192
+ outputLines.push(kgLines.join('\n\n'));
135
193
  }
136
194
 
137
195
  // Answer Box
138
196
  if (results.answerBox != null) {
139
197
  addSection('Answer Box');
140
- output += [
198
+ const abLines = [
141
199
  results.answerBox.title != null
142
200
  ? `**Title:** ${results.answerBox.title}`
143
201
  : '',
@@ -153,30 +211,35 @@ export function formatResultsForLLM(
153
211
  ? `**Link:** ${results.answerBox.link}`
154
212
  : '',
155
213
  '',
156
- ]
157
- .filter(Boolean)
158
- .join('\n\n');
214
+ ].filter(Boolean);
215
+
216
+ outputLines.push(abLines.join('\n\n'));
159
217
  }
160
218
 
161
219
  // People also ask
162
220
  const peopleAlsoAsk = results.peopleAlsoAsk ?? [];
163
221
  if (peopleAlsoAsk.length) {
164
222
  addSection('People Also Ask');
223
+
224
+ const paaLines: string[] = [];
165
225
  peopleAlsoAsk.forEach((p, i) => {
166
- output += [
226
+ const questionLines = [
167
227
  `### Question ${i + 1}:`,
168
228
  `"${p.question}"`,
169
- `${p.snippet != null && p.snippet ? `Snippet: ${p.snippet}}` : ''}`,
229
+ `${p.snippet != null && p.snippet ? `Snippet: ${p.snippet}` : ''}`,
170
230
  `${p.title != null && p.title ? `Title: ${p.title}` : ''}`,
171
231
  `${p.link != null && p.link ? `Link: ${p.link}` : ''}`,
172
232
  '',
173
- ]
174
- .filter(Boolean)
175
- .join('\n\n');
233
+ ].filter(Boolean);
234
+
235
+ paaLines.push(questionLines.join('\n\n'));
176
236
  });
237
+
238
+ outputLines.push(paaLines.join(''));
177
239
  }
240
+
178
241
  return {
179
- output: output.trim(),
242
+ output: outputLines.join('\n').trim(),
180
243
  references,
181
244
  };
182
245
  }
@@ -65,7 +65,7 @@ const chunker = {
65
65
  },
66
66
  };
67
67
 
68
- const createSourceUpdateCallback = (sourceMap: Map<string, t.ValidSource>) => {
68
+ function createSourceUpdateCallback(sourceMap: Map<string, t.ValidSource>) {
69
69
  return (link: string, update?: Partial<t.ValidSource>): void => {
70
70
  const source = sourceMap.get(link);
71
71
  if (source) {
@@ -75,7 +75,7 @@ const createSourceUpdateCallback = (sourceMap: Map<string, t.ValidSource>) => {
75
75
  });
76
76
  }
77
77
  };
78
- };
78
+ }
79
79
 
80
80
  const getHighlights = async ({
81
81
  query,
@@ -315,10 +315,7 @@ export const createSourceProcessor = (
315
315
  scraperInstance?: FirecrawlScraper
316
316
  ): {
317
317
  processSources: (
318
- result: t.SearchResult,
319
- numElements: number,
320
- query: string,
321
- proMode?: boolean
318
+ fields: t.ProcessSourcesFields
322
319
  ) => Promise<t.SearchResultData>;
323
320
  topResults: number;
324
321
  } => {
@@ -338,14 +335,17 @@ export const createSourceProcessor = (
338
335
  scrapeMany: async ({
339
336
  query,
340
337
  links,
338
+ onGetHighlights,
341
339
  }: {
342
340
  query: string;
343
341
  links: string[];
342
+ onGetHighlights: t.SearchToolConfig['onGetHighlights'];
344
343
  }): Promise<Array<t.ScrapeResult>> => {
345
344
  console.log(`Scraping ${links.length} links with Firecrawl`);
346
345
  const promises: Array<Promise<t.ScrapeResult>> = [];
347
346
  try {
348
- for (const currentLink of links) {
347
+ for (let i = 0; i < links.length; i++) {
348
+ const currentLink = links[i];
349
349
  const promise: Promise<t.ScrapeResult> = firecrawlScraper
350
350
  .scrapeUrl(currentLink, {})
351
351
  .then(([url, response]) => {
@@ -365,7 +365,7 @@ export const createSourceProcessor = (
365
365
  url,
366
366
  attribution,
367
367
  error: true,
368
- content: `Failed to scrape ${url}: ${response.error ?? 'Unknown error'}`,
368
+ content: '',
369
369
  } as t.ScrapeResult;
370
370
  })
371
371
  .then(async (result) => {
@@ -383,6 +383,9 @@ export const createSourceProcessor = (
383
383
  reranker,
384
384
  content: result.content,
385
385
  });
386
+ if (onGetHighlights) {
387
+ onGetHighlights(result.url);
388
+ }
386
389
  return {
387
390
  ...result,
388
391
  highlights,
@@ -399,7 +402,7 @@ export const createSourceProcessor = (
399
402
  return {
400
403
  url: currentLink,
401
404
  error: true,
402
- content: `Failed to scrape ${currentLink}: ${error.message ?? 'Unknown error'}`,
405
+ content: '',
403
406
  };
404
407
  });
405
408
  promises.push(promise);
@@ -416,16 +419,22 @@ export const createSourceProcessor = (
416
419
  links,
417
420
  query,
418
421
  target,
422
+ onGetHighlights,
419
423
  onContentScraped,
420
424
  }: {
421
425
  links: string[];
422
426
  query: string;
423
427
  target: number;
428
+ onGetHighlights: t.SearchToolConfig['onGetHighlights'];
424
429
  onContentScraped?: (link: string, update?: Partial<t.ValidSource>) => void;
425
430
  }): Promise<void> => {
426
431
  const initialLinks = links.slice(0, target);
427
432
  // const remainingLinks = links.slice(target).reverse();
428
- const results = await webScraper.scrapeMany({ query, links: initialLinks });
433
+ const results = await webScraper.scrapeMany({
434
+ query,
435
+ links: initialLinks,
436
+ onGetHighlights,
437
+ });
429
438
  for (const result of results) {
430
439
  if (result.error === true) {
431
440
  continue;
@@ -440,12 +449,13 @@ export const createSourceProcessor = (
440
449
  }
441
450
  };
442
451
 
443
- const processSources = async (
444
- result: t.SearchResult,
445
- numElements: number,
446
- query: string,
447
- proMode: boolean = false
448
- ): Promise<t.SearchResultData> => {
452
+ const processSources = async ({
453
+ result,
454
+ numElements,
455
+ query,
456
+ proMode = true,
457
+ onGetHighlights,
458
+ }: t.ProcessSourcesFields): Promise<t.SearchResultData> => {
449
459
  try {
450
460
  if (!result.data) {
451
461
  return {
@@ -473,6 +483,7 @@ export const createSourceProcessor = (
473
483
  await fetchContents({
474
484
  query,
475
485
  target: 1,
486
+ onGetHighlights,
476
487
  onContentScraped,
477
488
  links: [wikiSources[0].link],
478
489
  });
@@ -492,48 +503,67 @@ export const createSourceProcessor = (
492
503
  }
493
504
 
494
505
  const sourceMap = new Map<string, t.ValidSource>();
495
- const allLinks: string[] = [];
506
+ const organicLinksSet = new Set<string>();
496
507
 
497
- for (const source of result.data.organic) {
498
- if (source.link) {
499
- allLinks.push(source.link);
500
- sourceMap.set(source.link, source);
501
- }
502
- }
508
+ // Collect organic links
509
+ const organicLinks = collectLinks(
510
+ result.data.organic,
511
+ sourceMap,
512
+ organicLinksSet
513
+ );
503
514
 
504
- if (allLinks.length === 0) {
515
+ // Collect top story links, excluding any that are already in organic links
516
+ const topStories = result.data.topStories ?? [];
517
+ const topStoryLinks = collectLinks(
518
+ topStories,
519
+ sourceMap,
520
+ organicLinksSet
521
+ );
522
+
523
+ if (organicLinks.length === 0 && topStoryLinks.length === 0) {
505
524
  return result.data;
506
525
  }
507
526
 
508
527
  const onContentScraped = createSourceUpdateCallback(sourceMap);
509
- await fetchContents({
510
- links: allLinks,
511
- query,
512
- onContentScraped,
513
- target: numElements,
514
- });
528
+ const promises: Promise<void>[] = [];
529
+
530
+ // Process organic links
531
+ if (organicLinks.length > 0) {
532
+ promises.push(
533
+ fetchContents({
534
+ query,
535
+ onGetHighlights,
536
+ onContentScraped,
537
+ links: organicLinks,
538
+ target: numElements,
539
+ })
540
+ );
541
+ }
515
542
 
516
- for (let i = 0; i < result.data.organic.length; i++) {
517
- const source = result.data.organic[i];
518
- const updatedSource = sourceMap.get(source.link);
519
- if (updatedSource) {
520
- result.data.organic[i] = {
521
- ...source,
522
- ...updatedSource,
523
- };
524
- }
543
+ // Process top story links
544
+ if (topStoryLinks.length > 0) {
545
+ promises.push(
546
+ fetchContents({
547
+ query,
548
+ onGetHighlights,
549
+ onContentScraped,
550
+ links: topStoryLinks,
551
+ target: numElements,
552
+ })
553
+ );
525
554
  }
526
555
 
527
- const successfulSources = result.data.organic
528
- .filter(
529
- (source) =>
530
- source.content != null && !source.content.startsWith('Failed')
531
- )
532
- .slice(0, numElements);
556
+ await Promise.all(promises);
533
557
 
534
- if (successfulSources.length > 0) {
535
- result.data.organic = successfulSources;
558
+ // Update sources with scraped content
559
+ if (result.data.organic.length > 0) {
560
+ updateSourcesWithContent(result.data.organic, sourceMap);
536
561
  }
562
+
563
+ if (topStories.length > 0) {
564
+ updateSourcesWithContent(topStories, sourceMap);
565
+ }
566
+
537
567
  return result.data;
538
568
  } catch (error) {
539
569
  console.error('Error in processSources:', error);
@@ -553,3 +583,46 @@ export const createSourceProcessor = (
553
583
  topResults,
554
584
  };
555
585
  };
586
+
587
+ /** Helper function to collect links and update sourceMap */
588
+ function collectLinks(
589
+ sources: Array<t.OrganicResult | t.TopStoryResult>,
590
+ sourceMap: Map<string, t.ValidSource>,
591
+ existingLinksSet?: Set<string>
592
+ ): string[] {
593
+ const links: string[] = [];
594
+
595
+ for (const source of sources) {
596
+ if (source.link) {
597
+ // For topStories, only add if not already in organic links
598
+ if (existingLinksSet && existingLinksSet.has(source.link)) {
599
+ continue;
600
+ }
601
+
602
+ links.push(source.link);
603
+ if (existingLinksSet) {
604
+ existingLinksSet.add(source.link);
605
+ }
606
+ sourceMap.set(source.link, source as t.ValidSource);
607
+ }
608
+ }
609
+
610
+ return links;
611
+ }
612
+
613
+ /** Helper function to update sources with scraped content */
614
+ function updateSourcesWithContent<T extends t.ValidSource>(
615
+ sources: T[],
616
+ sourceMap: Map<string, t.ValidSource>
617
+ ): void {
618
+ for (let i = 0; i < sources.length; i++) {
619
+ const source = sources[i];
620
+ const updatedSource = sourceMap.get(source.link);
621
+ if (updatedSource) {
622
+ sources[i] = {
623
+ ...source,
624
+ ...updatedSource,
625
+ } as T;
626
+ }
627
+ }
628
+ }