@librechat/agents 2.4.320 → 2.4.322

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/cjs/tools/search/firecrawl.cjs +6 -4
  2. package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
  3. package/dist/cjs/tools/search/format.cjs +117 -80
  4. package/dist/cjs/tools/search/format.cjs.map +1 -1
  5. package/dist/cjs/tools/search/rerankers.cjs +43 -36
  6. package/dist/cjs/tools/search/rerankers.cjs.map +1 -1
  7. package/dist/cjs/tools/search/schema.cjs +70 -0
  8. package/dist/cjs/tools/search/schema.cjs.map +1 -0
  9. package/dist/cjs/tools/search/search.cjs +125 -52
  10. package/dist/cjs/tools/search/search.cjs.map +1 -1
  11. package/dist/cjs/tools/search/tool.cjs +162 -47
  12. package/dist/cjs/tools/search/tool.cjs.map +1 -1
  13. package/dist/cjs/tools/search/utils.cjs +34 -5
  14. package/dist/cjs/tools/search/utils.cjs.map +1 -1
  15. package/dist/esm/tools/search/firecrawl.mjs +6 -4
  16. package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
  17. package/dist/esm/tools/search/format.mjs +118 -81
  18. package/dist/esm/tools/search/format.mjs.map +1 -1
  19. package/dist/esm/tools/search/rerankers.mjs +43 -36
  20. package/dist/esm/tools/search/rerankers.mjs.map +1 -1
  21. package/dist/esm/tools/search/schema.mjs +61 -0
  22. package/dist/esm/tools/search/schema.mjs.map +1 -0
  23. package/dist/esm/tools/search/search.mjs +126 -53
  24. package/dist/esm/tools/search/search.mjs.map +1 -1
  25. package/dist/esm/tools/search/tool.mjs +161 -46
  26. package/dist/esm/tools/search/tool.mjs.map +1 -1
  27. package/dist/esm/tools/search/utils.mjs +33 -6
  28. package/dist/esm/tools/search/utils.mjs.map +1 -1
  29. package/dist/types/tools/search/firecrawl.d.ts +1 -0
  30. package/dist/types/tools/search/rerankers.d.ts +8 -4
  31. package/dist/types/tools/search/schema.d.ts +16 -0
  32. package/dist/types/tools/search/tool.d.ts +13 -0
  33. package/dist/types/tools/search/types.d.ts +36 -0
  34. package/dist/types/tools/search/utils.d.ts +9 -2
  35. package/package.json +3 -2
  36. package/src/scripts/search.ts +3 -0
  37. package/src/tools/search/firecrawl.ts +9 -4
  38. package/src/tools/search/format.ts +157 -87
  39. package/src/tools/search/rerankers.ts +57 -36
  40. package/src/tools/search/schema.ts +63 -0
  41. package/src/tools/search/search.ts +165 -52
  42. package/src/tools/search/tool.ts +217 -44
  43. package/src/tools/search/types.ts +37 -0
  44. package/src/tools/search/utils.ts +37 -5
  45. package/src/utils/llmConfig.ts +1 -1
@@ -1,10 +1,9 @@
1
- /* eslint-disable no-console */
2
1
  import axios from 'axios';
3
2
  import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
4
3
  import type * as t from './types';
4
+ import { getAttribution, createDefaultLogger } from './utils';
5
5
  import { FirecrawlScraper } from './firecrawl';
6
6
  import { BaseReranker } from './rerankers';
7
- import { getAttribution } from './utils';
8
7
 
9
8
  const chunker = {
10
9
  cleanText: (text: string): string => {
@@ -52,12 +51,14 @@ const chunker = {
52
51
  chunkSize?: number;
53
52
  chunkOverlap?: number;
54
53
  separators?: string[];
55
- }
54
+ },
55
+ logger?: t.Logger
56
56
  ): Promise<string[][]> => {
57
57
  // Split multiple texts
58
+ const logger_ = logger || createDefaultLogger();
58
59
  const promises = texts.map((text) =>
59
60
  chunker.splitText(text, options).catch((error) => {
60
- console.error('Error splitting text:', error);
61
+ logger_.error('Error splitting text:', error);
61
62
  return [text];
62
63
  })
63
64
  );
@@ -82,18 +83,22 @@ const getHighlights = async ({
82
83
  content,
83
84
  reranker,
84
85
  topResults = 5,
86
+ logger,
85
87
  }: {
86
88
  content: string;
87
89
  query: string;
88
90
  reranker?: BaseReranker;
89
91
  topResults?: number;
92
+ logger?: t.Logger;
90
93
  }): Promise<t.Highlight[] | undefined> => {
94
+ const logger_ = logger || createDefaultLogger();
95
+
91
96
  if (!content) {
92
- console.warn('No content provided for highlights');
97
+ logger_.warn('No content provided for highlights');
93
98
  return;
94
99
  }
95
100
  if (!reranker) {
96
- console.warn('No reranker provided for highlights');
101
+ logger_.warn('No reranker provided for highlights');
97
102
  return;
98
103
  }
99
104
 
@@ -102,14 +107,14 @@ const getHighlights = async ({
102
107
  if (Array.isArray(documents)) {
103
108
  return await reranker.rerank(query, documents, topResults);
104
109
  } else {
105
- console.error(
110
+ logger_.error(
106
111
  'Expected documents to be an array, got:',
107
112
  typeof documents
108
113
  );
109
114
  return;
110
115
  }
111
116
  } catch (error) {
112
- console.error('Error in content processing:', error);
117
+ logger_.error('Error in content processing:', error);
113
118
  return;
114
119
  }
115
120
  };
@@ -131,25 +136,49 @@ const createSerperAPI = (
131
136
 
132
137
  const getSources = async ({
133
138
  query,
139
+ date,
134
140
  country,
141
+ safeSearch,
135
142
  numResults = 8,
143
+ type,
136
144
  }: t.GetSourcesParams): Promise<t.SearchResult> => {
137
145
  if (!query.trim()) {
138
146
  return { success: false, error: 'Query cannot be empty' };
139
147
  }
140
148
 
141
149
  try {
150
+ const safe = ['off', 'moderate', 'active'] as const;
142
151
  const payload: t.SerperSearchPayload = {
143
152
  q: query,
153
+ safe: safe[safeSearch ?? 1],
144
154
  num: Math.min(Math.max(1, numResults), 10),
145
155
  };
146
156
 
157
+ // Set the search type if provided
158
+ if (type) {
159
+ payload.type = type;
160
+ }
161
+
162
+ if (date != null) {
163
+ payload.tbs = `qdr:${date}`;
164
+ }
165
+
147
166
  if (country != null && country !== '') {
148
167
  payload['gl'] = country.toLowerCase();
149
168
  }
150
169
 
170
+ // Determine the API endpoint based on the search type
171
+ let apiEndpoint = config.apiUrl;
172
+ if (type === 'images') {
173
+ apiEndpoint = 'https://google.serper.dev/images';
174
+ } else if (type === 'videos') {
175
+ apiEndpoint = 'https://google.serper.dev/videos';
176
+ } else if (type === 'news') {
177
+ apiEndpoint = 'https://google.serper.dev/news';
178
+ }
179
+
151
180
  const response = await axios.post<t.SerperResultData>(
152
- config.apiUrl,
181
+ apiEndpoint,
153
182
  payload,
154
183
  {
155
184
  headers: {
@@ -169,6 +198,8 @@ const createSerperAPI = (
169
198
  peopleAlsoAsk: data.peopleAlsoAsk,
170
199
  knowledgeGraph: data.knowledgeGraph,
171
200
  relatedSearches: data.relatedSearches,
201
+ videos: data.videos ?? [],
202
+ news: data.news ?? [],
172
203
  };
173
204
 
174
205
  return { success: true, data: results };
@@ -202,6 +233,7 @@ const createSearXNGAPI = (
202
233
  const getSources = async ({
203
234
  query,
204
235
  numResults = 8,
236
+ type,
205
237
  }: t.GetSourcesParams): Promise<t.SearchResult> => {
206
238
  if (!query.trim()) {
207
239
  return { success: false, error: 'Query cannot be empty' };
@@ -218,12 +250,22 @@ const createSearXNGAPI = (
218
250
  searchUrl = searchUrl.replace(/\/$/, '') + '/search';
219
251
  }
220
252
 
253
+ // Determine the search category based on the type
254
+ let category = 'general';
255
+ if (type === 'images') {
256
+ category = 'images';
257
+ } else if (type === 'videos') {
258
+ category = 'videos';
259
+ } else if (type === 'news') {
260
+ category = 'news';
261
+ }
262
+
221
263
  // Prepare parameters for SearXNG
222
264
  const params: t.SearxNGSearchPayload = {
223
265
  q: query,
224
266
  format: 'json',
225
267
  pageno: 1,
226
- categories: 'general',
268
+ categories: category,
227
269
  language: 'all',
228
270
  safesearch: 0,
229
271
  engines: 'google,bing,duckduckgo',
@@ -271,6 +313,8 @@ const createSearXNGAPI = (
271
313
  topStories: [],
272
314
  // Use undefined instead of null for optional properties
273
315
  relatedSearches: data.suggestions ?? [],
316
+ videos: [],
317
+ news: [],
274
318
  };
275
319
 
276
320
  return { success: true, data: results };
@@ -327,8 +371,10 @@ export const createSourceProcessor = (
327
371
  // strategies = ['no_extraction'],
328
372
  // filterContent = true,
329
373
  reranker,
374
+ logger,
330
375
  } = config;
331
376
 
377
+ const logger_ = logger || createDefaultLogger();
332
378
  const firecrawlScraper = scraperInstance;
333
379
 
334
380
  const webScraper = {
@@ -341,7 +387,7 @@ export const createSourceProcessor = (
341
387
  links: string[];
342
388
  onGetHighlights: t.SearchToolConfig['onGetHighlights'];
343
389
  }): Promise<Array<t.ScrapeResult>> => {
344
- console.log(`Scraping ${links.length} links with Firecrawl`);
390
+ logger_.debug(`Scraping ${links.length} links with Firecrawl`);
345
391
  const promises: Array<Promise<t.ScrapeResult>> = [];
346
392
  try {
347
393
  for (let i = 0; i < links.length; i++) {
@@ -349,7 +395,11 @@ export const createSourceProcessor = (
349
395
  const promise: Promise<t.ScrapeResult> = firecrawlScraper
350
396
  .scrapeUrl(currentLink, {})
351
397
  .then(([url, response]) => {
352
- const attribution = getAttribution(url, response.data?.metadata);
398
+ const attribution = getAttribution(
399
+ url,
400
+ response.data?.metadata,
401
+ logger_
402
+ );
353
403
  if (response.success && response.data) {
354
404
  const [content, references] =
355
405
  firecrawlScraper.extractContent(response);
@@ -365,14 +415,15 @@ export const createSourceProcessor = (
365
415
  url,
366
416
  attribution,
367
417
  error: true,
368
- content: `Failed to scrape ${url}: ${response.error ?? 'Unknown error'}`,
418
+ content: '',
369
419
  } as t.ScrapeResult;
370
420
  })
371
421
  .then(async (result) => {
372
422
  try {
373
423
  if (result.error != null) {
374
- console.error(
375
- `Error scraping ${result.url}: ${result.content}`
424
+ logger_.error(
425
+ `Error scraping ${result.url}: ${result.content}`,
426
+ result.error
376
427
  );
377
428
  return {
378
429
  ...result,
@@ -382,6 +433,7 @@ export const createSourceProcessor = (
382
433
  query,
383
434
  reranker,
384
435
  content: result.content,
436
+ logger: logger_,
385
437
  });
386
438
  if (onGetHighlights) {
387
439
  onGetHighlights(result.url);
@@ -391,25 +443,25 @@ export const createSourceProcessor = (
391
443
  highlights,
392
444
  };
393
445
  } catch (error) {
394
- console.error('Error processing scraped content:', error);
446
+ logger_.error('Error processing scraped content:', error);
395
447
  return {
396
448
  ...result,
397
449
  };
398
450
  }
399
451
  })
400
452
  .catch((error) => {
401
- console.error(`Error scraping ${currentLink}:`, error);
453
+ logger_.error(`Error scraping ${currentLink}:`, error);
402
454
  return {
403
455
  url: currentLink,
404
456
  error: true,
405
- content: `Failed to scrape ${currentLink}: ${error.message ?? 'Unknown error'}`,
457
+ content: '',
406
458
  };
407
459
  });
408
460
  promises.push(promise);
409
461
  }
410
462
  return await Promise.all(promises);
411
463
  } catch (error) {
412
- console.error('Error in scrapeMany:', error);
464
+ logger_.error('Error in scrapeMany:', error);
413
465
  return [];
414
466
  }
415
467
  },
@@ -453,6 +505,7 @@ export const createSourceProcessor = (
453
505
  result,
454
506
  numElements,
455
507
  query,
508
+ news,
456
509
  proMode = true,
457
510
  onGetHighlights,
458
511
  }: t.ProcessSourcesFields): Promise<t.SearchResultData> => {
@@ -503,52 +556,69 @@ export const createSourceProcessor = (
503
556
  }
504
557
 
505
558
  const sourceMap = new Map<string, t.ValidSource>();
506
- const allLinks: string[] = [];
559
+ const organicLinksSet = new Set<string>();
507
560
 
508
- for (const source of result.data.organic) {
509
- if (source.link) {
510
- allLinks.push(source.link);
511
- sourceMap.set(source.link, source);
512
- }
513
- }
561
+ // Collect organic links
562
+ const organicLinks = collectLinks(
563
+ result.data.organic,
564
+ sourceMap,
565
+ organicLinksSet
566
+ );
514
567
 
515
- if (allLinks.length === 0) {
568
+ // Collect top story links, excluding any that are already in organic links
569
+ const topStories = result.data.topStories ?? [];
570
+ const topStoryLinks = collectLinks(
571
+ topStories,
572
+ sourceMap,
573
+ organicLinksSet
574
+ );
575
+
576
+ if (organicLinks.length === 0 && (topStoryLinks.length === 0 || !news)) {
516
577
  return result.data;
517
578
  }
518
579
 
519
580
  const onContentScraped = createSourceUpdateCallback(sourceMap);
520
- await fetchContents({
521
- query,
522
- links: allLinks,
523
- onGetHighlights,
524
- onContentScraped,
525
- target: numElements,
526
- });
581
+ const promises: Promise<void>[] = [];
582
+
583
+ // Process organic links
584
+ if (organicLinks.length > 0) {
585
+ promises.push(
586
+ fetchContents({
587
+ query,
588
+ onGetHighlights,
589
+ onContentScraped,
590
+ links: organicLinks,
591
+ target: numElements,
592
+ })
593
+ );
594
+ }
527
595
 
528
- for (let i = 0; i < result.data.organic.length; i++) {
529
- const source = result.data.organic[i];
530
- const updatedSource = sourceMap.get(source.link);
531
- if (updatedSource) {
532
- result.data.organic[i] = {
533
- ...source,
534
- ...updatedSource,
535
- };
536
- }
596
+ // Process top story links
597
+ if (news && topStoryLinks.length > 0) {
598
+ promises.push(
599
+ fetchContents({
600
+ query,
601
+ onGetHighlights,
602
+ onContentScraped,
603
+ links: topStoryLinks,
604
+ target: numElements,
605
+ })
606
+ );
537
607
  }
538
608
 
539
- const successfulSources = result.data.organic
540
- .filter(
541
- (source) =>
542
- source.content != null && !source.content.startsWith('Failed')
543
- )
544
- .slice(0, numElements);
609
+ await Promise.all(promises);
545
610
 
546
- if (successfulSources.length > 0) {
547
- result.data.organic = successfulSources;
611
+ if (result.data.organic.length > 0) {
612
+ updateSourcesWithContent(result.data.organic, sourceMap);
548
613
  }
614
+
615
+ if (news && topStories.length > 0) {
616
+ updateSourcesWithContent(topStories, sourceMap);
617
+ }
618
+
549
619
  return result.data;
550
620
  } catch (error) {
551
- console.error('Error in processSources:', error);
621
+ logger_.error('Error in processSources:', error);
552
622
  return {
553
623
  organic: [],
554
624
  topStories: [],
@@ -565,3 +635,46 @@ export const createSourceProcessor = (
565
635
  topResults,
566
636
  };
567
637
  };
638
+
639
+ /** Helper function to collect links and update sourceMap */
640
+ function collectLinks(
641
+ sources: Array<t.OrganicResult | t.TopStoryResult>,
642
+ sourceMap: Map<string, t.ValidSource>,
643
+ existingLinksSet?: Set<string>
644
+ ): string[] {
645
+ const links: string[] = [];
646
+
647
+ for (const source of sources) {
648
+ if (source.link) {
649
+ // For topStories, only add if not already in organic links
650
+ if (existingLinksSet && existingLinksSet.has(source.link)) {
651
+ continue;
652
+ }
653
+
654
+ links.push(source.link);
655
+ if (existingLinksSet) {
656
+ existingLinksSet.add(source.link);
657
+ }
658
+ sourceMap.set(source.link, source as t.ValidSource);
659
+ }
660
+ }
661
+
662
+ return links;
663
+ }
664
+
665
+ /** Helper function to update sources with scraped content */
666
+ function updateSourcesWithContent<T extends t.ValidSource>(
667
+ sources: T[],
668
+ sourceMap: Map<string, t.ValidSource>
669
+ ): void {
670
+ for (let i = 0; i < sources.length; i++) {
671
+ const source = sources[i];
672
+ const updatedSource = sourceMap.get(source.link);
673
+ if (updatedSource) {
674
+ sources[i] = {
675
+ ...source,
676
+ ...updatedSource,
677
+ } as T;
678
+ }
679
+ }
680
+ }