@librechat/agents 2.4.320 → 2.4.322
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/tools/search/firecrawl.cjs +6 -4
- package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
- package/dist/cjs/tools/search/format.cjs +117 -80
- package/dist/cjs/tools/search/format.cjs.map +1 -1
- package/dist/cjs/tools/search/rerankers.cjs +43 -36
- package/dist/cjs/tools/search/rerankers.cjs.map +1 -1
- package/dist/cjs/tools/search/schema.cjs +70 -0
- package/dist/cjs/tools/search/schema.cjs.map +1 -0
- package/dist/cjs/tools/search/search.cjs +125 -52
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +162 -47
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/search/utils.cjs +34 -5
- package/dist/cjs/tools/search/utils.cjs.map +1 -1
- package/dist/esm/tools/search/firecrawl.mjs +6 -4
- package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
- package/dist/esm/tools/search/format.mjs +118 -81
- package/dist/esm/tools/search/format.mjs.map +1 -1
- package/dist/esm/tools/search/rerankers.mjs +43 -36
- package/dist/esm/tools/search/rerankers.mjs.map +1 -1
- package/dist/esm/tools/search/schema.mjs +61 -0
- package/dist/esm/tools/search/schema.mjs.map +1 -0
- package/dist/esm/tools/search/search.mjs +126 -53
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +161 -46
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/search/utils.mjs +33 -6
- package/dist/esm/tools/search/utils.mjs.map +1 -1
- package/dist/types/tools/search/firecrawl.d.ts +1 -0
- package/dist/types/tools/search/rerankers.d.ts +8 -4
- package/dist/types/tools/search/schema.d.ts +16 -0
- package/dist/types/tools/search/tool.d.ts +13 -0
- package/dist/types/tools/search/types.d.ts +36 -0
- package/dist/types/tools/search/utils.d.ts +9 -2
- package/package.json +3 -2
- package/src/scripts/search.ts +3 -0
- package/src/tools/search/firecrawl.ts +9 -4
- package/src/tools/search/format.ts +157 -87
- package/src/tools/search/rerankers.ts +57 -36
- package/src/tools/search/schema.ts +63 -0
- package/src/tools/search/search.ts +165 -52
- package/src/tools/search/tool.ts +217 -44
- package/src/tools/search/types.ts +37 -0
- package/src/tools/search/utils.ts +37 -5
- package/src/utils/llmConfig.ts +1 -1
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
/* eslint-disable no-console */
|
|
2
1
|
import axios from 'axios';
|
|
3
2
|
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
|
|
4
3
|
import type * as t from './types';
|
|
4
|
+
import { getAttribution, createDefaultLogger } from './utils';
|
|
5
5
|
import { FirecrawlScraper } from './firecrawl';
|
|
6
6
|
import { BaseReranker } from './rerankers';
|
|
7
|
-
import { getAttribution } from './utils';
|
|
8
7
|
|
|
9
8
|
const chunker = {
|
|
10
9
|
cleanText: (text: string): string => {
|
|
@@ -52,12 +51,14 @@ const chunker = {
|
|
|
52
51
|
chunkSize?: number;
|
|
53
52
|
chunkOverlap?: number;
|
|
54
53
|
separators?: string[];
|
|
55
|
-
}
|
|
54
|
+
},
|
|
55
|
+
logger?: t.Logger
|
|
56
56
|
): Promise<string[][]> => {
|
|
57
57
|
// Split multiple texts
|
|
58
|
+
const logger_ = logger || createDefaultLogger();
|
|
58
59
|
const promises = texts.map((text) =>
|
|
59
60
|
chunker.splitText(text, options).catch((error) => {
|
|
60
|
-
|
|
61
|
+
logger_.error('Error splitting text:', error);
|
|
61
62
|
return [text];
|
|
62
63
|
})
|
|
63
64
|
);
|
|
@@ -82,18 +83,22 @@ const getHighlights = async ({
|
|
|
82
83
|
content,
|
|
83
84
|
reranker,
|
|
84
85
|
topResults = 5,
|
|
86
|
+
logger,
|
|
85
87
|
}: {
|
|
86
88
|
content: string;
|
|
87
89
|
query: string;
|
|
88
90
|
reranker?: BaseReranker;
|
|
89
91
|
topResults?: number;
|
|
92
|
+
logger?: t.Logger;
|
|
90
93
|
}): Promise<t.Highlight[] | undefined> => {
|
|
94
|
+
const logger_ = logger || createDefaultLogger();
|
|
95
|
+
|
|
91
96
|
if (!content) {
|
|
92
|
-
|
|
97
|
+
logger_.warn('No content provided for highlights');
|
|
93
98
|
return;
|
|
94
99
|
}
|
|
95
100
|
if (!reranker) {
|
|
96
|
-
|
|
101
|
+
logger_.warn('No reranker provided for highlights');
|
|
97
102
|
return;
|
|
98
103
|
}
|
|
99
104
|
|
|
@@ -102,14 +107,14 @@ const getHighlights = async ({
|
|
|
102
107
|
if (Array.isArray(documents)) {
|
|
103
108
|
return await reranker.rerank(query, documents, topResults);
|
|
104
109
|
} else {
|
|
105
|
-
|
|
110
|
+
logger_.error(
|
|
106
111
|
'Expected documents to be an array, got:',
|
|
107
112
|
typeof documents
|
|
108
113
|
);
|
|
109
114
|
return;
|
|
110
115
|
}
|
|
111
116
|
} catch (error) {
|
|
112
|
-
|
|
117
|
+
logger_.error('Error in content processing:', error);
|
|
113
118
|
return;
|
|
114
119
|
}
|
|
115
120
|
};
|
|
@@ -131,25 +136,49 @@ const createSerperAPI = (
|
|
|
131
136
|
|
|
132
137
|
const getSources = async ({
|
|
133
138
|
query,
|
|
139
|
+
date,
|
|
134
140
|
country,
|
|
141
|
+
safeSearch,
|
|
135
142
|
numResults = 8,
|
|
143
|
+
type,
|
|
136
144
|
}: t.GetSourcesParams): Promise<t.SearchResult> => {
|
|
137
145
|
if (!query.trim()) {
|
|
138
146
|
return { success: false, error: 'Query cannot be empty' };
|
|
139
147
|
}
|
|
140
148
|
|
|
141
149
|
try {
|
|
150
|
+
const safe = ['off', 'moderate', 'active'] as const;
|
|
142
151
|
const payload: t.SerperSearchPayload = {
|
|
143
152
|
q: query,
|
|
153
|
+
safe: safe[safeSearch ?? 1],
|
|
144
154
|
num: Math.min(Math.max(1, numResults), 10),
|
|
145
155
|
};
|
|
146
156
|
|
|
157
|
+
// Set the search type if provided
|
|
158
|
+
if (type) {
|
|
159
|
+
payload.type = type;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if (date != null) {
|
|
163
|
+
payload.tbs = `qdr:${date}`;
|
|
164
|
+
}
|
|
165
|
+
|
|
147
166
|
if (country != null && country !== '') {
|
|
148
167
|
payload['gl'] = country.toLowerCase();
|
|
149
168
|
}
|
|
150
169
|
|
|
170
|
+
// Determine the API endpoint based on the search type
|
|
171
|
+
let apiEndpoint = config.apiUrl;
|
|
172
|
+
if (type === 'images') {
|
|
173
|
+
apiEndpoint = 'https://google.serper.dev/images';
|
|
174
|
+
} else if (type === 'videos') {
|
|
175
|
+
apiEndpoint = 'https://google.serper.dev/videos';
|
|
176
|
+
} else if (type === 'news') {
|
|
177
|
+
apiEndpoint = 'https://google.serper.dev/news';
|
|
178
|
+
}
|
|
179
|
+
|
|
151
180
|
const response = await axios.post<t.SerperResultData>(
|
|
152
|
-
|
|
181
|
+
apiEndpoint,
|
|
153
182
|
payload,
|
|
154
183
|
{
|
|
155
184
|
headers: {
|
|
@@ -169,6 +198,8 @@ const createSerperAPI = (
|
|
|
169
198
|
peopleAlsoAsk: data.peopleAlsoAsk,
|
|
170
199
|
knowledgeGraph: data.knowledgeGraph,
|
|
171
200
|
relatedSearches: data.relatedSearches,
|
|
201
|
+
videos: data.videos ?? [],
|
|
202
|
+
news: data.news ?? [],
|
|
172
203
|
};
|
|
173
204
|
|
|
174
205
|
return { success: true, data: results };
|
|
@@ -202,6 +233,7 @@ const createSearXNGAPI = (
|
|
|
202
233
|
const getSources = async ({
|
|
203
234
|
query,
|
|
204
235
|
numResults = 8,
|
|
236
|
+
type,
|
|
205
237
|
}: t.GetSourcesParams): Promise<t.SearchResult> => {
|
|
206
238
|
if (!query.trim()) {
|
|
207
239
|
return { success: false, error: 'Query cannot be empty' };
|
|
@@ -218,12 +250,22 @@ const createSearXNGAPI = (
|
|
|
218
250
|
searchUrl = searchUrl.replace(/\/$/, '') + '/search';
|
|
219
251
|
}
|
|
220
252
|
|
|
253
|
+
// Determine the search category based on the type
|
|
254
|
+
let category = 'general';
|
|
255
|
+
if (type === 'images') {
|
|
256
|
+
category = 'images';
|
|
257
|
+
} else if (type === 'videos') {
|
|
258
|
+
category = 'videos';
|
|
259
|
+
} else if (type === 'news') {
|
|
260
|
+
category = 'news';
|
|
261
|
+
}
|
|
262
|
+
|
|
221
263
|
// Prepare parameters for SearXNG
|
|
222
264
|
const params: t.SearxNGSearchPayload = {
|
|
223
265
|
q: query,
|
|
224
266
|
format: 'json',
|
|
225
267
|
pageno: 1,
|
|
226
|
-
categories:
|
|
268
|
+
categories: category,
|
|
227
269
|
language: 'all',
|
|
228
270
|
safesearch: 0,
|
|
229
271
|
engines: 'google,bing,duckduckgo',
|
|
@@ -271,6 +313,8 @@ const createSearXNGAPI = (
|
|
|
271
313
|
topStories: [],
|
|
272
314
|
// Use undefined instead of null for optional properties
|
|
273
315
|
relatedSearches: data.suggestions ?? [],
|
|
316
|
+
videos: [],
|
|
317
|
+
news: [],
|
|
274
318
|
};
|
|
275
319
|
|
|
276
320
|
return { success: true, data: results };
|
|
@@ -327,8 +371,10 @@ export const createSourceProcessor = (
|
|
|
327
371
|
// strategies = ['no_extraction'],
|
|
328
372
|
// filterContent = true,
|
|
329
373
|
reranker,
|
|
374
|
+
logger,
|
|
330
375
|
} = config;
|
|
331
376
|
|
|
377
|
+
const logger_ = logger || createDefaultLogger();
|
|
332
378
|
const firecrawlScraper = scraperInstance;
|
|
333
379
|
|
|
334
380
|
const webScraper = {
|
|
@@ -341,7 +387,7 @@ export const createSourceProcessor = (
|
|
|
341
387
|
links: string[];
|
|
342
388
|
onGetHighlights: t.SearchToolConfig['onGetHighlights'];
|
|
343
389
|
}): Promise<Array<t.ScrapeResult>> => {
|
|
344
|
-
|
|
390
|
+
logger_.debug(`Scraping ${links.length} links with Firecrawl`);
|
|
345
391
|
const promises: Array<Promise<t.ScrapeResult>> = [];
|
|
346
392
|
try {
|
|
347
393
|
for (let i = 0; i < links.length; i++) {
|
|
@@ -349,7 +395,11 @@ export const createSourceProcessor = (
|
|
|
349
395
|
const promise: Promise<t.ScrapeResult> = firecrawlScraper
|
|
350
396
|
.scrapeUrl(currentLink, {})
|
|
351
397
|
.then(([url, response]) => {
|
|
352
|
-
const attribution = getAttribution(
|
|
398
|
+
const attribution = getAttribution(
|
|
399
|
+
url,
|
|
400
|
+
response.data?.metadata,
|
|
401
|
+
logger_
|
|
402
|
+
);
|
|
353
403
|
if (response.success && response.data) {
|
|
354
404
|
const [content, references] =
|
|
355
405
|
firecrawlScraper.extractContent(response);
|
|
@@ -365,14 +415,15 @@ export const createSourceProcessor = (
|
|
|
365
415
|
url,
|
|
366
416
|
attribution,
|
|
367
417
|
error: true,
|
|
368
|
-
content:
|
|
418
|
+
content: '',
|
|
369
419
|
} as t.ScrapeResult;
|
|
370
420
|
})
|
|
371
421
|
.then(async (result) => {
|
|
372
422
|
try {
|
|
373
423
|
if (result.error != null) {
|
|
374
|
-
|
|
375
|
-
`Error scraping ${result.url}: ${result.content}
|
|
424
|
+
logger_.error(
|
|
425
|
+
`Error scraping ${result.url}: ${result.content}`,
|
|
426
|
+
result.error
|
|
376
427
|
);
|
|
377
428
|
return {
|
|
378
429
|
...result,
|
|
@@ -382,6 +433,7 @@ export const createSourceProcessor = (
|
|
|
382
433
|
query,
|
|
383
434
|
reranker,
|
|
384
435
|
content: result.content,
|
|
436
|
+
logger: logger_,
|
|
385
437
|
});
|
|
386
438
|
if (onGetHighlights) {
|
|
387
439
|
onGetHighlights(result.url);
|
|
@@ -391,25 +443,25 @@ export const createSourceProcessor = (
|
|
|
391
443
|
highlights,
|
|
392
444
|
};
|
|
393
445
|
} catch (error) {
|
|
394
|
-
|
|
446
|
+
logger_.error('Error processing scraped content:', error);
|
|
395
447
|
return {
|
|
396
448
|
...result,
|
|
397
449
|
};
|
|
398
450
|
}
|
|
399
451
|
})
|
|
400
452
|
.catch((error) => {
|
|
401
|
-
|
|
453
|
+
logger_.error(`Error scraping ${currentLink}:`, error);
|
|
402
454
|
return {
|
|
403
455
|
url: currentLink,
|
|
404
456
|
error: true,
|
|
405
|
-
content:
|
|
457
|
+
content: '',
|
|
406
458
|
};
|
|
407
459
|
});
|
|
408
460
|
promises.push(promise);
|
|
409
461
|
}
|
|
410
462
|
return await Promise.all(promises);
|
|
411
463
|
} catch (error) {
|
|
412
|
-
|
|
464
|
+
logger_.error('Error in scrapeMany:', error);
|
|
413
465
|
return [];
|
|
414
466
|
}
|
|
415
467
|
},
|
|
@@ -453,6 +505,7 @@ export const createSourceProcessor = (
|
|
|
453
505
|
result,
|
|
454
506
|
numElements,
|
|
455
507
|
query,
|
|
508
|
+
news,
|
|
456
509
|
proMode = true,
|
|
457
510
|
onGetHighlights,
|
|
458
511
|
}: t.ProcessSourcesFields): Promise<t.SearchResultData> => {
|
|
@@ -503,52 +556,69 @@ export const createSourceProcessor = (
|
|
|
503
556
|
}
|
|
504
557
|
|
|
505
558
|
const sourceMap = new Map<string, t.ValidSource>();
|
|
506
|
-
const
|
|
559
|
+
const organicLinksSet = new Set<string>();
|
|
507
560
|
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
561
|
+
// Collect organic links
|
|
562
|
+
const organicLinks = collectLinks(
|
|
563
|
+
result.data.organic,
|
|
564
|
+
sourceMap,
|
|
565
|
+
organicLinksSet
|
|
566
|
+
);
|
|
514
567
|
|
|
515
|
-
|
|
568
|
+
// Collect top story links, excluding any that are already in organic links
|
|
569
|
+
const topStories = result.data.topStories ?? [];
|
|
570
|
+
const topStoryLinks = collectLinks(
|
|
571
|
+
topStories,
|
|
572
|
+
sourceMap,
|
|
573
|
+
organicLinksSet
|
|
574
|
+
);
|
|
575
|
+
|
|
576
|
+
if (organicLinks.length === 0 && (topStoryLinks.length === 0 || !news)) {
|
|
516
577
|
return result.data;
|
|
517
578
|
}
|
|
518
579
|
|
|
519
580
|
const onContentScraped = createSourceUpdateCallback(sourceMap);
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
581
|
+
const promises: Promise<void>[] = [];
|
|
582
|
+
|
|
583
|
+
// Process organic links
|
|
584
|
+
if (organicLinks.length > 0) {
|
|
585
|
+
promises.push(
|
|
586
|
+
fetchContents({
|
|
587
|
+
query,
|
|
588
|
+
onGetHighlights,
|
|
589
|
+
onContentScraped,
|
|
590
|
+
links: organicLinks,
|
|
591
|
+
target: numElements,
|
|
592
|
+
})
|
|
593
|
+
);
|
|
594
|
+
}
|
|
527
595
|
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
596
|
+
// Process top story links
|
|
597
|
+
if (news && topStoryLinks.length > 0) {
|
|
598
|
+
promises.push(
|
|
599
|
+
fetchContents({
|
|
600
|
+
query,
|
|
601
|
+
onGetHighlights,
|
|
602
|
+
onContentScraped,
|
|
603
|
+
links: topStoryLinks,
|
|
604
|
+
target: numElements,
|
|
605
|
+
})
|
|
606
|
+
);
|
|
537
607
|
}
|
|
538
608
|
|
|
539
|
-
|
|
540
|
-
.filter(
|
|
541
|
-
(source) =>
|
|
542
|
-
source.content != null && !source.content.startsWith('Failed')
|
|
543
|
-
)
|
|
544
|
-
.slice(0, numElements);
|
|
609
|
+
await Promise.all(promises);
|
|
545
610
|
|
|
546
|
-
if (
|
|
547
|
-
result.data.organic
|
|
611
|
+
if (result.data.organic.length > 0) {
|
|
612
|
+
updateSourcesWithContent(result.data.organic, sourceMap);
|
|
548
613
|
}
|
|
614
|
+
|
|
615
|
+
if (news && topStories.length > 0) {
|
|
616
|
+
updateSourcesWithContent(topStories, sourceMap);
|
|
617
|
+
}
|
|
618
|
+
|
|
549
619
|
return result.data;
|
|
550
620
|
} catch (error) {
|
|
551
|
-
|
|
621
|
+
logger_.error('Error in processSources:', error);
|
|
552
622
|
return {
|
|
553
623
|
organic: [],
|
|
554
624
|
topStories: [],
|
|
@@ -565,3 +635,46 @@ export const createSourceProcessor = (
|
|
|
565
635
|
topResults,
|
|
566
636
|
};
|
|
567
637
|
};
|
|
638
|
+
|
|
639
|
+
/** Helper function to collect links and update sourceMap */
|
|
640
|
+
function collectLinks(
|
|
641
|
+
sources: Array<t.OrganicResult | t.TopStoryResult>,
|
|
642
|
+
sourceMap: Map<string, t.ValidSource>,
|
|
643
|
+
existingLinksSet?: Set<string>
|
|
644
|
+
): string[] {
|
|
645
|
+
const links: string[] = [];
|
|
646
|
+
|
|
647
|
+
for (const source of sources) {
|
|
648
|
+
if (source.link) {
|
|
649
|
+
// For topStories, only add if not already in organic links
|
|
650
|
+
if (existingLinksSet && existingLinksSet.has(source.link)) {
|
|
651
|
+
continue;
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
links.push(source.link);
|
|
655
|
+
if (existingLinksSet) {
|
|
656
|
+
existingLinksSet.add(source.link);
|
|
657
|
+
}
|
|
658
|
+
sourceMap.set(source.link, source as t.ValidSource);
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
return links;
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
/** Helper function to update sources with scraped content */
|
|
666
|
+
function updateSourcesWithContent<T extends t.ValidSource>(
|
|
667
|
+
sources: T[],
|
|
668
|
+
sourceMap: Map<string, t.ValidSource>
|
|
669
|
+
): void {
|
|
670
|
+
for (let i = 0; i < sources.length; i++) {
|
|
671
|
+
const source = sources[i];
|
|
672
|
+
const updatedSource = sourceMap.get(source.link);
|
|
673
|
+
if (updatedSource) {
|
|
674
|
+
sources[i] = {
|
|
675
|
+
...source,
|
|
676
|
+
...updatedSource,
|
|
677
|
+
} as T;
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
}
|