@librechat/agents 2.4.31 → 2.4.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/dist/cjs/events.cjs +3 -3
  2. package/dist/cjs/events.cjs.map +1 -1
  3. package/dist/cjs/graphs/Graph.cjs +2 -1
  4. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  5. package/dist/cjs/main.cjs +5 -2
  6. package/dist/cjs/main.cjs.map +1 -1
  7. package/dist/cjs/messages/ids.cjs +23 -0
  8. package/dist/cjs/messages/ids.cjs.map +1 -0
  9. package/dist/cjs/splitStream.cjs +2 -1
  10. package/dist/cjs/splitStream.cjs.map +1 -1
  11. package/dist/cjs/stream.cjs +87 -154
  12. package/dist/cjs/stream.cjs.map +1 -1
  13. package/dist/cjs/tools/ToolNode.cjs +14 -3
  14. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  15. package/dist/cjs/tools/handlers.cjs +144 -0
  16. package/dist/cjs/tools/handlers.cjs.map +1 -0
  17. package/dist/cjs/tools/search/content.cjs +140 -0
  18. package/dist/cjs/tools/search/content.cjs.map +1 -0
  19. package/dist/cjs/tools/search/firecrawl.cjs +23 -41
  20. package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
  21. package/dist/cjs/tools/search/format.cjs +161 -74
  22. package/dist/cjs/tools/search/format.cjs.map +1 -1
  23. package/dist/cjs/tools/search/highlights.cjs +64 -12
  24. package/dist/cjs/tools/search/highlights.cjs.map +1 -1
  25. package/dist/cjs/tools/search/rerankers.cjs +35 -50
  26. package/dist/cjs/tools/search/rerankers.cjs.map +1 -1
  27. package/dist/cjs/tools/search/schema.cjs +70 -0
  28. package/dist/cjs/tools/search/schema.cjs.map +1 -0
  29. package/dist/cjs/tools/search/search.cjs +153 -69
  30. package/dist/cjs/tools/search/search.cjs.map +1 -1
  31. package/dist/cjs/tools/search/tool.cjs +247 -58
  32. package/dist/cjs/tools/search/tool.cjs.map +1 -1
  33. package/dist/cjs/tools/search/utils.cjs +66 -0
  34. package/dist/cjs/tools/search/utils.cjs.map +1 -0
  35. package/dist/esm/events.mjs +1 -1
  36. package/dist/esm/events.mjs.map +1 -1
  37. package/dist/esm/graphs/Graph.mjs +2 -1
  38. package/dist/esm/graphs/Graph.mjs.map +1 -1
  39. package/dist/esm/main.mjs +3 -1
  40. package/dist/esm/main.mjs.map +1 -1
  41. package/dist/esm/messages/ids.mjs +21 -0
  42. package/dist/esm/messages/ids.mjs.map +1 -0
  43. package/dist/esm/splitStream.mjs +2 -1
  44. package/dist/esm/splitStream.mjs.map +1 -1
  45. package/dist/esm/stream.mjs +87 -152
  46. package/dist/esm/stream.mjs.map +1 -1
  47. package/dist/esm/tools/ToolNode.mjs +14 -3
  48. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  49. package/dist/esm/tools/handlers.mjs +141 -0
  50. package/dist/esm/tools/handlers.mjs.map +1 -0
  51. package/dist/esm/tools/search/content.mjs +119 -0
  52. package/dist/esm/tools/search/content.mjs.map +1 -0
  53. package/dist/esm/tools/search/firecrawl.mjs +24 -41
  54. package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
  55. package/dist/esm/tools/search/format.mjs +161 -74
  56. package/dist/esm/tools/search/format.mjs.map +1 -1
  57. package/dist/esm/tools/search/highlights.mjs +64 -12
  58. package/dist/esm/tools/search/highlights.mjs.map +1 -1
  59. package/dist/esm/tools/search/rerankers.mjs +35 -50
  60. package/dist/esm/tools/search/rerankers.mjs.map +1 -1
  61. package/dist/esm/tools/search/schema.mjs +61 -0
  62. package/dist/esm/tools/search/schema.mjs.map +1 -0
  63. package/dist/esm/tools/search/search.mjs +153 -69
  64. package/dist/esm/tools/search/search.mjs.map +1 -1
  65. package/dist/esm/tools/search/tool.mjs +246 -57
  66. package/dist/esm/tools/search/tool.mjs.map +1 -1
  67. package/dist/esm/tools/search/utils.mjs +61 -0
  68. package/dist/esm/tools/search/utils.mjs.map +1 -0
  69. package/dist/types/graphs/Graph.d.ts +1 -1
  70. package/dist/types/index.d.ts +1 -0
  71. package/dist/types/messages/ids.d.ts +3 -0
  72. package/dist/types/messages/index.d.ts +1 -0
  73. package/dist/types/stream.d.ts +0 -8
  74. package/dist/types/tools/ToolNode.d.ts +6 -0
  75. package/dist/types/tools/example.d.ts +23 -3
  76. package/dist/types/tools/handlers.d.ts +8 -0
  77. package/dist/types/tools/search/content.d.ts +4 -0
  78. package/dist/types/tools/search/firecrawl.d.ts +7 -86
  79. package/dist/types/tools/search/format.d.ts +4 -1
  80. package/dist/types/tools/search/highlights.d.ts +1 -1
  81. package/dist/types/tools/search/rerankers.d.ts +8 -5
  82. package/dist/types/tools/search/schema.d.ts +16 -0
  83. package/dist/types/tools/search/search.d.ts +2 -2
  84. package/dist/types/tools/search/test.d.ts +1 -0
  85. package/dist/types/tools/search/tool.d.ts +25 -4
  86. package/dist/types/tools/search/types.d.ts +443 -53
  87. package/dist/types/tools/search/utils.d.ts +10 -0
  88. package/package.json +9 -7
  89. package/src/events.ts +49 -15
  90. package/src/graphs/Graph.ts +6 -2
  91. package/src/index.ts +1 -0
  92. package/src/messages/ids.ts +26 -0
  93. package/src/messages/index.ts +1 -0
  94. package/src/scripts/search.ts +8 -3
  95. package/src/splitStream.test.ts +132 -71
  96. package/src/splitStream.ts +2 -1
  97. package/src/stream.ts +94 -183
  98. package/src/tools/ToolNode.ts +37 -14
  99. package/src/tools/handlers.ts +167 -0
  100. package/src/tools/search/content.test.ts +173 -0
  101. package/src/tools/search/content.ts +147 -0
  102. package/src/tools/search/firecrawl.ts +36 -148
  103. package/src/tools/search/format.ts +205 -74
  104. package/src/tools/search/highlights.ts +99 -16
  105. package/src/tools/search/output.md +2775 -0
  106. package/src/tools/search/rerankers.ts +50 -62
  107. package/src/tools/search/schema.ts +63 -0
  108. package/src/tools/search/search.ts +232 -116
  109. package/src/tools/search/test.html +884 -0
  110. package/src/tools/search/test.md +643 -0
  111. package/src/tools/search/test.ts +159 -0
  112. package/src/tools/search/tool.ts +363 -87
  113. package/src/tools/search/types.ts +503 -61
  114. package/src/tools/search/utils.ts +79 -0
  115. package/src/utils/llmConfig.ts +1 -1
@@ -1,8 +1,8 @@
1
- /* eslint-disable no-console */
2
1
  import axios from 'axios';
3
2
  import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
4
3
  import type * as t from './types';
5
- import { getAttribution, FirecrawlScraper } from './firecrawl';
4
+ import { getAttribution, createDefaultLogger } from './utils';
5
+ import { FirecrawlScraper } from './firecrawl';
6
6
  import { BaseReranker } from './rerankers';
7
7
 
8
8
  const chunker = {
@@ -51,12 +51,14 @@ const chunker = {
51
51
  chunkSize?: number;
52
52
  chunkOverlap?: number;
53
53
  separators?: string[];
54
- }
54
+ },
55
+ logger?: t.Logger
55
56
  ): Promise<string[][]> => {
56
57
  // Split multiple texts
58
+ const logger_ = logger || createDefaultLogger();
57
59
  const promises = texts.map((text) =>
58
60
  chunker.splitText(text, options).catch((error) => {
59
- console.error('Error splitting text:', error);
61
+ logger_.error('Error splitting text:', error);
60
62
  return [text];
61
63
  })
62
64
  );
@@ -64,7 +66,7 @@ const chunker = {
64
66
  },
65
67
  };
66
68
 
67
- const createSourceUpdateCallback = (sourceMap: Map<string, t.ValidSource>) => {
69
+ function createSourceUpdateCallback(sourceMap: Map<string, t.ValidSource>) {
68
70
  return (link: string, update?: Partial<t.ValidSource>): void => {
69
71
  const source = sourceMap.get(link);
70
72
  if (source) {
@@ -74,25 +76,29 @@ const createSourceUpdateCallback = (sourceMap: Map<string, t.ValidSource>) => {
74
76
  });
75
77
  }
76
78
  };
77
- };
79
+ }
78
80
 
79
81
  const getHighlights = async ({
80
82
  query,
81
83
  content,
82
84
  reranker,
83
85
  topResults = 5,
86
+ logger,
84
87
  }: {
85
88
  content: string;
86
89
  query: string;
87
90
  reranker?: BaseReranker;
88
91
  topResults?: number;
92
+ logger?: t.Logger;
89
93
  }): Promise<t.Highlight[] | undefined> => {
94
+ const logger_ = logger || createDefaultLogger();
95
+
90
96
  if (!content) {
91
- console.warn('No content provided for highlights');
97
+ logger_.warn('No content provided for highlights');
92
98
  return;
93
99
  }
94
100
  if (!reranker) {
95
- console.warn('No reranker provided for highlights');
101
+ logger_.warn('No reranker provided for highlights');
96
102
  return;
97
103
  }
98
104
 
@@ -101,14 +107,14 @@ const getHighlights = async ({
101
107
  if (Array.isArray(documents)) {
102
108
  return await reranker.rerank(query, documents, topResults);
103
109
  } else {
104
- console.error(
110
+ logger_.error(
105
111
  'Expected documents to be an array, got:',
106
112
  typeof documents
107
113
  );
108
114
  return;
109
115
  }
110
116
  } catch (error) {
111
- console.error('Error in content processing:', error);
117
+ logger_.error('Error in content processing:', error);
112
118
  return;
113
119
  }
114
120
  };
@@ -116,16 +122,11 @@ const getHighlights = async ({
116
122
  const createSerperAPI = (
117
123
  apiKey?: string
118
124
  ): {
119
- getSources: (
120
- query: string,
121
- numResults?: number,
122
- storedLocation?: string
123
- ) => Promise<t.SearchResult>;
125
+ getSources: (params: t.GetSourcesParams) => Promise<t.SearchResult>;
124
126
  } => {
125
127
  const config = {
126
128
  apiKey: apiKey ?? process.env.SERPER_API_KEY,
127
129
  apiUrl: 'https://google.serper.dev/search',
128
- defaultLocation: 'us',
129
130
  timeout: 10000,
130
131
  };
131
132
 
@@ -133,43 +134,72 @@ const createSerperAPI = (
133
134
  throw new Error('SERPER_API_KEY is required for SerperAPI');
134
135
  }
135
136
 
136
- const getSources = async (
137
- query: string,
138
- numResults: number = 8,
139
- storedLocation?: string
140
- ): Promise<t.SearchResult> => {
137
+ const getSources = async ({
138
+ query,
139
+ date,
140
+ country,
141
+ safeSearch,
142
+ numResults = 8,
143
+ type,
144
+ }: t.GetSourcesParams): Promise<t.SearchResult> => {
141
145
  if (!query.trim()) {
142
146
  return { success: false, error: 'Query cannot be empty' };
143
147
  }
144
148
 
145
149
  try {
146
- const searchLocation = (
147
- storedLocation ?? config.defaultLocation
148
- ).toLowerCase();
149
-
150
- const payload = {
150
+ const safe = ['off', 'moderate', 'active'] as const;
151
+ const payload: t.SerperSearchPayload = {
151
152
  q: query,
153
+ safe: safe[safeSearch ?? 1],
152
154
  num: Math.min(Math.max(1, numResults), 10),
153
- gl: searchLocation,
154
155
  };
155
156
 
156
- const response = await axios.post(config.apiUrl, payload, {
157
- headers: {
158
- 'X-API-KEY': config.apiKey,
159
- 'Content-Type': 'application/json',
160
- },
161
- timeout: config.timeout,
162
- });
157
+ // Set the search type if provided
158
+ if (type) {
159
+ payload.type = type;
160
+ }
161
+
162
+ if (date != null) {
163
+ payload.tbs = `qdr:${date}`;
164
+ }
165
+
166
+ if (country != null && country !== '') {
167
+ payload['gl'] = country.toLowerCase();
168
+ }
169
+
170
+ // Determine the API endpoint based on the search type
171
+ let apiEndpoint = config.apiUrl;
172
+ if (type === 'images') {
173
+ apiEndpoint = 'https://google.serper.dev/images';
174
+ } else if (type === 'videos') {
175
+ apiEndpoint = 'https://google.serper.dev/videos';
176
+ } else if (type === 'news') {
177
+ apiEndpoint = 'https://google.serper.dev/news';
178
+ }
179
+
180
+ const response = await axios.post<t.SerperResultData>(
181
+ apiEndpoint,
182
+ payload,
183
+ {
184
+ headers: {
185
+ 'X-API-KEY': config.apiKey,
186
+ 'Content-Type': 'application/json',
187
+ },
188
+ timeout: config.timeout,
189
+ }
190
+ );
163
191
 
164
192
  const data = response.data;
165
193
  const results: t.SearchResultData = {
166
194
  organic: data.organic,
167
195
  images: data.images ?? [],
196
+ answerBox: data.answerBox,
168
197
  topStories: data.topStories ?? [],
169
- knowledgeGraph: data.knowledgeGraph as t.KnowledgeGraphResult,
170
- answerBox: data.answerBox as t.AnswerBoxResult,
171
- peopleAlsoAsk: data.peopleAlsoAsk as t.PeopleAlsoAskResult[],
172
- relatedSearches: data.relatedSearches as string[],
198
+ peopleAlsoAsk: data.peopleAlsoAsk,
199
+ knowledgeGraph: data.knowledgeGraph,
200
+ relatedSearches: data.relatedSearches,
201
+ videos: data.videos ?? [],
202
+ news: data.news ?? [],
173
203
  };
174
204
 
175
205
  return { success: true, data: results };
@@ -187,11 +217,7 @@ const createSearXNGAPI = (
187
217
  instanceUrl?: string,
188
218
  apiKey?: string
189
219
  ): {
190
- getSources: (
191
- query: string,
192
- numResults?: number,
193
- storedLocation?: string
194
- ) => Promise<t.SearchResult>;
220
+ getSources: (params: t.GetSourcesParams) => Promise<t.SearchResult>;
195
221
  } => {
196
222
  const config = {
197
223
  instanceUrl: instanceUrl ?? process.env.SEARXNG_INSTANCE_URL,
@@ -204,11 +230,11 @@ const createSearXNGAPI = (
204
230
  throw new Error('SEARXNG_INSTANCE_URL is required for SearXNG API');
205
231
  }
206
232
 
207
- const getSources = async (
208
- query: string,
209
- numResults: number = 8,
210
- storedLocation?: string
211
- ): Promise<t.SearchResult> => {
233
+ const getSources = async ({
234
+ query,
235
+ numResults = 8,
236
+ type,
237
+ }: t.GetSourcesParams): Promise<t.SearchResult> => {
212
238
  if (!query.trim()) {
213
239
  return { success: false, error: 'Query cannot be empty' };
214
240
  }
@@ -224,22 +250,27 @@ const createSearXNGAPI = (
224
250
  searchUrl = searchUrl.replace(/\/$/, '') + '/search';
225
251
  }
226
252
 
253
+ // Determine the search category based on the type
254
+ let category = 'general';
255
+ if (type === 'images') {
256
+ category = 'images';
257
+ } else if (type === 'videos') {
258
+ category = 'videos';
259
+ } else if (type === 'news') {
260
+ category = 'news';
261
+ }
262
+
227
263
  // Prepare parameters for SearXNG
228
- const params: Record<string, string | number> = {
264
+ const params: t.SearxNGSearchPayload = {
229
265
  q: query,
230
266
  format: 'json',
231
267
  pageno: 1,
232
- categories: 'general',
268
+ categories: category,
233
269
  language: 'all',
234
270
  safesearch: 0,
235
271
  engines: 'google,bing,duckduckgo',
236
- max_results: Math.min(Math.max(1, numResults), 20),
237
272
  };
238
273
 
239
- if (storedLocation != null && storedLocation !== 'all') {
240
- params.language = storedLocation;
241
- }
242
-
243
274
  const headers: Record<string, string> = {
244
275
  'Content-Type': 'application/json',
245
276
  };
@@ -282,6 +313,8 @@ const createSearXNGAPI = (
282
313
  topStories: [],
283
314
  // Use undefined instead of null for optional properties
284
315
  relatedSearches: data.suggestions ?? [],
316
+ videos: [],
317
+ news: [],
285
318
  };
286
319
 
287
320
  return { success: true, data: results };
@@ -301,11 +334,7 @@ const createSearXNGAPI = (
301
334
  export const createSearchAPI = (
302
335
  config: t.SearchConfig
303
336
  ): {
304
- getSources: (
305
- query: string,
306
- numResults?: number,
307
- storedLocation?: string
308
- ) => Promise<t.SearchResult>;
337
+ getSources: (params: t.GetSourcesParams) => Promise<t.SearchResult>;
309
338
  } => {
310
339
  const {
311
340
  searchProvider = 'serper',
@@ -330,10 +359,7 @@ export const createSourceProcessor = (
330
359
  scraperInstance?: FirecrawlScraper
331
360
  ): {
332
361
  processSources: (
333
- result: t.SearchResult,
334
- numElements: number,
335
- query: string,
336
- proMode?: boolean
362
+ fields: t.ProcessSourcesFields
337
363
  ) => Promise<t.SearchResultData>;
338
364
  topResults: number;
339
365
  } => {
@@ -345,46 +371,61 @@ export const createSourceProcessor = (
345
371
  // strategies = ['no_extraction'],
346
372
  // filterContent = true,
347
373
  reranker,
374
+ logger,
348
375
  } = config;
349
376
 
377
+ const logger_ = logger || createDefaultLogger();
350
378
  const firecrawlScraper = scraperInstance;
351
379
 
352
380
  const webScraper = {
353
381
  scrapeMany: async ({
354
382
  query,
355
383
  links,
384
+ onGetHighlights,
356
385
  }: {
357
386
  query: string;
358
387
  links: string[];
388
+ onGetHighlights: t.SearchToolConfig['onGetHighlights'];
359
389
  }): Promise<Array<t.ScrapeResult>> => {
360
- console.log(`Scraping ${links.length} links with Firecrawl`);
390
+ logger_.debug(`Scraping ${links.length} links with Firecrawl`);
361
391
  const promises: Array<Promise<t.ScrapeResult>> = [];
362
392
  try {
363
- for (const currentLink of links) {
393
+ for (let i = 0; i < links.length; i++) {
394
+ const currentLink = links[i];
364
395
  const promise: Promise<t.ScrapeResult> = firecrawlScraper
365
396
  .scrapeUrl(currentLink, {})
366
397
  .then(([url, response]) => {
367
- const attribution = getAttribution(url, response.data?.metadata);
398
+ const attribution = getAttribution(
399
+ url,
400
+ response.data?.metadata,
401
+ logger_
402
+ );
368
403
  if (response.success && response.data) {
369
- const content = firecrawlScraper.extractContent(response);
404
+ const [content, references] =
405
+ firecrawlScraper.extractContent(response);
370
406
  return {
371
407
  url,
408
+ references,
372
409
  attribution,
373
410
  content: chunker.cleanText(content),
374
- };
411
+ } as t.ScrapeResult;
412
+ } else {
413
+ logger_.error(
414
+ `Error scraping ${url}: ${response.error ?? 'Unknown error'}`
415
+ );
375
416
  }
376
417
 
377
418
  return {
378
419
  url,
379
420
  attribution,
380
421
  error: true,
381
- content: `Failed to scrape ${url}: ${response.error ?? 'Unknown error'}`,
382
- };
422
+ content: '',
423
+ } as t.ScrapeResult;
383
424
  })
384
425
  .then(async (result) => {
385
426
  try {
386
427
  if (result.error != null) {
387
- console.error(
428
+ logger_.error(
388
429
  `Error scraping ${result.url}: ${result.content}`
389
430
  );
390
431
  return {
@@ -395,31 +436,35 @@ export const createSourceProcessor = (
395
436
  query,
396
437
  reranker,
397
438
  content: result.content,
439
+ logger: logger_,
398
440
  });
441
+ if (onGetHighlights) {
442
+ onGetHighlights(result.url);
443
+ }
399
444
  return {
400
445
  ...result,
401
446
  highlights,
402
447
  };
403
448
  } catch (error) {
404
- console.error('Error processing scraped content:', error);
449
+ logger_.error('Error processing scraped content:', error);
405
450
  return {
406
451
  ...result,
407
452
  };
408
453
  }
409
454
  })
410
455
  .catch((error) => {
411
- console.error(`Error scraping ${currentLink}:`, error);
456
+ logger_.error(`Error scraping ${currentLink}:`, error);
412
457
  return {
413
458
  url: currentLink,
414
459
  error: true,
415
- content: `Failed to scrape ${currentLink}: ${error.message ?? 'Unknown error'}`,
460
+ content: '',
416
461
  };
417
462
  });
418
463
  promises.push(promise);
419
464
  }
420
465
  return await Promise.all(promises);
421
466
  } catch (error) {
422
- console.error('Error in scrapeMany:', error);
467
+ logger_.error('Error in scrapeMany:', error);
423
468
  return [];
424
469
  }
425
470
  },
@@ -429,35 +474,44 @@ export const createSourceProcessor = (
429
474
  links,
430
475
  query,
431
476
  target,
477
+ onGetHighlights,
432
478
  onContentScraped,
433
479
  }: {
434
480
  links: string[];
435
481
  query: string;
436
482
  target: number;
483
+ onGetHighlights: t.SearchToolConfig['onGetHighlights'];
437
484
  onContentScraped?: (link: string, update?: Partial<t.ValidSource>) => void;
438
485
  }): Promise<void> => {
439
486
  const initialLinks = links.slice(0, target);
440
487
  // const remainingLinks = links.slice(target).reverse();
441
- const results = await webScraper.scrapeMany({ query, links: initialLinks });
488
+ const results = await webScraper.scrapeMany({
489
+ query,
490
+ links: initialLinks,
491
+ onGetHighlights,
492
+ });
442
493
  for (const result of results) {
443
494
  if (result.error === true) {
444
495
  continue;
445
496
  }
446
- const { url, content, attribution, highlights } = result;
497
+ const { url, content, attribution, references, highlights } = result;
447
498
  onContentScraped?.(url, {
448
499
  content,
449
500
  attribution,
501
+ references,
450
502
  highlights,
451
503
  });
452
504
  }
453
505
  };
454
506
 
455
- const processSources = async (
456
- result: t.SearchResult,
457
- numElements: number,
458
- query: string,
459
- proMode: boolean = false
460
- ): Promise<t.SearchResultData> => {
507
+ const processSources = async ({
508
+ result,
509
+ numElements,
510
+ query,
511
+ news,
512
+ proMode = true,
513
+ onGetHighlights,
514
+ }: t.ProcessSourcesFields): Promise<t.SearchResultData> => {
461
515
  try {
462
516
  if (!result.data) {
463
517
  return {
@@ -485,6 +539,7 @@ export const createSourceProcessor = (
485
539
  await fetchContents({
486
540
  query,
487
541
  target: 1,
542
+ onGetHighlights,
488
543
  onContentScraped,
489
544
  links: [wikiSources[0].link],
490
545
  });
@@ -504,51 +559,69 @@ export const createSourceProcessor = (
504
559
  }
505
560
 
506
561
  const sourceMap = new Map<string, t.ValidSource>();
507
- const allLinks: string[] = [];
562
+ const organicLinksSet = new Set<string>();
508
563
 
509
- for (const source of result.data.organic) {
510
- if (source.link) {
511
- allLinks.push(source.link);
512
- sourceMap.set(source.link, source);
513
- }
514
- }
564
+ // Collect organic links
565
+ const organicLinks = collectLinks(
566
+ result.data.organic,
567
+ sourceMap,
568
+ organicLinksSet
569
+ );
570
+
571
+ // Collect top story links, excluding any that are already in organic links
572
+ const topStories = result.data.topStories ?? [];
573
+ const topStoryLinks = collectLinks(
574
+ topStories,
575
+ sourceMap,
576
+ organicLinksSet
577
+ );
515
578
 
516
- if (allLinks.length === 0) {
579
+ if (organicLinks.length === 0 && (topStoryLinks.length === 0 || !news)) {
517
580
  return result.data;
518
581
  }
519
582
 
520
583
  const onContentScraped = createSourceUpdateCallback(sourceMap);
521
- await fetchContents({
522
- links: allLinks,
523
- query,
524
- onContentScraped,
525
- target: numElements,
526
- });
584
+ const promises: Promise<void>[] = [];
585
+
586
+ // Process organic links
587
+ if (organicLinks.length > 0) {
588
+ promises.push(
589
+ fetchContents({
590
+ query,
591
+ onGetHighlights,
592
+ onContentScraped,
593
+ links: organicLinks,
594
+ target: numElements,
595
+ })
596
+ );
597
+ }
527
598
 
528
- for (let i = 0; i < result.data.organic.length; i++) {
529
- const source = result.data.organic[i];
530
- const updatedSource = sourceMap.get(source.link);
531
- if (updatedSource) {
532
- result.data.organic[i] = {
533
- ...source,
534
- ...updatedSource,
535
- };
536
- }
599
+ // Process top story links
600
+ if (news && topStoryLinks.length > 0) {
601
+ promises.push(
602
+ fetchContents({
603
+ query,
604
+ onGetHighlights,
605
+ onContentScraped,
606
+ links: topStoryLinks,
607
+ target: numElements,
608
+ })
609
+ );
537
610
  }
538
611
 
539
- const successfulSources = result.data.organic
540
- .filter(
541
- (source) =>
542
- source.content != null && !source.content.startsWith('Failed')
543
- )
544
- .slice(0, numElements);
612
+ await Promise.all(promises);
545
613
 
546
- if (successfulSources.length > 0) {
547
- result.data.organic = successfulSources;
614
+ if (result.data.organic.length > 0) {
615
+ updateSourcesWithContent(result.data.organic, sourceMap);
548
616
  }
617
+
618
+ if (news && topStories.length > 0) {
619
+ updateSourcesWithContent(topStories, sourceMap);
620
+ }
621
+
549
622
  return result.data;
550
623
  } catch (error) {
551
- console.error('Error in processSources:', error);
624
+ logger_.error('Error in processSources:', error);
552
625
  return {
553
626
  organic: [],
554
627
  topStories: [],
@@ -565,3 +638,46 @@ export const createSourceProcessor = (
565
638
  topResults,
566
639
  };
567
640
  };
641
+
642
+ /** Helper function to collect links and update sourceMap */
643
+ function collectLinks(
644
+ sources: Array<t.OrganicResult | t.TopStoryResult>,
645
+ sourceMap: Map<string, t.ValidSource>,
646
+ existingLinksSet?: Set<string>
647
+ ): string[] {
648
+ const links: string[] = [];
649
+
650
+ for (const source of sources) {
651
+ if (source.link) {
652
+ // For topStories, only add if not already in organic links
653
+ if (existingLinksSet && existingLinksSet.has(source.link)) {
654
+ continue;
655
+ }
656
+
657
+ links.push(source.link);
658
+ if (existingLinksSet) {
659
+ existingLinksSet.add(source.link);
660
+ }
661
+ sourceMap.set(source.link, source as t.ValidSource);
662
+ }
663
+ }
664
+
665
+ return links;
666
+ }
667
+
668
+ /** Helper function to update sources with scraped content */
669
+ function updateSourcesWithContent<T extends t.ValidSource>(
670
+ sources: T[],
671
+ sourceMap: Map<string, t.ValidSource>
672
+ ): void {
673
+ for (let i = 0; i < sources.length; i++) {
674
+ const source = sources[i];
675
+ const updatedSource = sourceMap.get(source.link);
676
+ if (updatedSource) {
677
+ sources[i] = {
678
+ ...source,
679
+ ...updatedSource,
680
+ } as T;
681
+ }
682
+ }
683
+ }