@librechat/agents 2.4.31 → 2.4.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/dist/cjs/events.cjs +3 -3
  2. package/dist/cjs/events.cjs.map +1 -1
  3. package/dist/cjs/graphs/Graph.cjs +2 -1
  4. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  5. package/dist/cjs/main.cjs +5 -2
  6. package/dist/cjs/main.cjs.map +1 -1
  7. package/dist/cjs/messages/ids.cjs +23 -0
  8. package/dist/cjs/messages/ids.cjs.map +1 -0
  9. package/dist/cjs/splitStream.cjs +2 -1
  10. package/dist/cjs/splitStream.cjs.map +1 -1
  11. package/dist/cjs/stream.cjs +87 -154
  12. package/dist/cjs/stream.cjs.map +1 -1
  13. package/dist/cjs/tools/ToolNode.cjs +14 -3
  14. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  15. package/dist/cjs/tools/handlers.cjs +144 -0
  16. package/dist/cjs/tools/handlers.cjs.map +1 -0
  17. package/dist/cjs/tools/search/content.cjs +140 -0
  18. package/dist/cjs/tools/search/content.cjs.map +1 -0
  19. package/dist/cjs/tools/search/firecrawl.cjs +23 -41
  20. package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
  21. package/dist/cjs/tools/search/format.cjs +161 -74
  22. package/dist/cjs/tools/search/format.cjs.map +1 -1
  23. package/dist/cjs/tools/search/highlights.cjs +64 -12
  24. package/dist/cjs/tools/search/highlights.cjs.map +1 -1
  25. package/dist/cjs/tools/search/rerankers.cjs +43 -36
  26. package/dist/cjs/tools/search/rerankers.cjs.map +1 -1
  27. package/dist/cjs/tools/search/schema.cjs +70 -0
  28. package/dist/cjs/tools/search/schema.cjs.map +1 -0
  29. package/dist/cjs/tools/search/search.cjs +150 -69
  30. package/dist/cjs/tools/search/search.cjs.map +1 -1
  31. package/dist/cjs/tools/search/tool.cjs +247 -58
  32. package/dist/cjs/tools/search/tool.cjs.map +1 -1
  33. package/dist/cjs/tools/search/utils.cjs +66 -0
  34. package/dist/cjs/tools/search/utils.cjs.map +1 -0
  35. package/dist/esm/events.mjs +1 -1
  36. package/dist/esm/events.mjs.map +1 -1
  37. package/dist/esm/graphs/Graph.mjs +2 -1
  38. package/dist/esm/graphs/Graph.mjs.map +1 -1
  39. package/dist/esm/main.mjs +3 -1
  40. package/dist/esm/main.mjs.map +1 -1
  41. package/dist/esm/messages/ids.mjs +21 -0
  42. package/dist/esm/messages/ids.mjs.map +1 -0
  43. package/dist/esm/splitStream.mjs +2 -1
  44. package/dist/esm/splitStream.mjs.map +1 -1
  45. package/dist/esm/stream.mjs +87 -152
  46. package/dist/esm/stream.mjs.map +1 -1
  47. package/dist/esm/tools/ToolNode.mjs +14 -3
  48. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  49. package/dist/esm/tools/handlers.mjs +141 -0
  50. package/dist/esm/tools/handlers.mjs.map +1 -0
  51. package/dist/esm/tools/search/content.mjs +119 -0
  52. package/dist/esm/tools/search/content.mjs.map +1 -0
  53. package/dist/esm/tools/search/firecrawl.mjs +24 -41
  54. package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
  55. package/dist/esm/tools/search/format.mjs +161 -74
  56. package/dist/esm/tools/search/format.mjs.map +1 -1
  57. package/dist/esm/tools/search/highlights.mjs +64 -12
  58. package/dist/esm/tools/search/highlights.mjs.map +1 -1
  59. package/dist/esm/tools/search/rerankers.mjs +43 -36
  60. package/dist/esm/tools/search/rerankers.mjs.map +1 -1
  61. package/dist/esm/tools/search/schema.mjs +61 -0
  62. package/dist/esm/tools/search/schema.mjs.map +1 -0
  63. package/dist/esm/tools/search/search.mjs +150 -69
  64. package/dist/esm/tools/search/search.mjs.map +1 -1
  65. package/dist/esm/tools/search/tool.mjs +246 -57
  66. package/dist/esm/tools/search/tool.mjs.map +1 -1
  67. package/dist/esm/tools/search/utils.mjs +61 -0
  68. package/dist/esm/tools/search/utils.mjs.map +1 -0
  69. package/dist/types/graphs/Graph.d.ts +1 -1
  70. package/dist/types/index.d.ts +1 -0
  71. package/dist/types/messages/ids.d.ts +3 -0
  72. package/dist/types/messages/index.d.ts +1 -0
  73. package/dist/types/stream.d.ts +0 -8
  74. package/dist/types/tools/ToolNode.d.ts +6 -0
  75. package/dist/types/tools/example.d.ts +23 -3
  76. package/dist/types/tools/handlers.d.ts +8 -0
  77. package/dist/types/tools/search/content.d.ts +4 -0
  78. package/dist/types/tools/search/firecrawl.d.ts +7 -86
  79. package/dist/types/tools/search/format.d.ts +4 -1
  80. package/dist/types/tools/search/highlights.d.ts +1 -1
  81. package/dist/types/tools/search/rerankers.d.ts +8 -4
  82. package/dist/types/tools/search/schema.d.ts +16 -0
  83. package/dist/types/tools/search/search.d.ts +2 -2
  84. package/dist/types/tools/search/test.d.ts +1 -0
  85. package/dist/types/tools/search/tool.d.ts +25 -4
  86. package/dist/types/tools/search/types.d.ts +443 -53
  87. package/dist/types/tools/search/utils.d.ts +10 -0
  88. package/package.json +9 -7
  89. package/src/events.ts +49 -15
  90. package/src/graphs/Graph.ts +6 -2
  91. package/src/index.ts +1 -0
  92. package/src/messages/ids.ts +26 -0
  93. package/src/messages/index.ts +1 -0
  94. package/src/scripts/search.ts +8 -3
  95. package/src/splitStream.test.ts +132 -71
  96. package/src/splitStream.ts +2 -1
  97. package/src/stream.ts +94 -183
  98. package/src/tools/ToolNode.ts +37 -14
  99. package/src/tools/handlers.ts +167 -0
  100. package/src/tools/search/content.test.ts +173 -0
  101. package/src/tools/search/content.ts +147 -0
  102. package/src/tools/search/firecrawl.ts +36 -148
  103. package/src/tools/search/format.ts +205 -74
  104. package/src/tools/search/highlights.ts +99 -16
  105. package/src/tools/search/output.md +2775 -0
  106. package/src/tools/search/rerankers.ts +57 -36
  107. package/src/tools/search/schema.ts +63 -0
  108. package/src/tools/search/search.ts +230 -117
  109. package/src/tools/search/test.html +884 -0
  110. package/src/tools/search/test.md +643 -0
  111. package/src/tools/search/test.ts +159 -0
  112. package/src/tools/search/tool.ts +363 -87
  113. package/src/tools/search/types.ts +503 -61
  114. package/src/tools/search/utils.ts +79 -0
  115. package/src/utils/llmConfig.ts +1 -1
@@ -1,8 +1,8 @@
1
- /* eslint-disable no-console */
2
1
  import axios from 'axios';
3
2
  import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
4
3
  import type * as t from './types';
5
- import { getAttribution, FirecrawlScraper } from './firecrawl';
4
+ import { getAttribution, createDefaultLogger } from './utils';
5
+ import { FirecrawlScraper } from './firecrawl';
6
6
  import { BaseReranker } from './rerankers';
7
7
 
8
8
  const chunker = {
@@ -51,12 +51,14 @@ const chunker = {
51
51
  chunkSize?: number;
52
52
  chunkOverlap?: number;
53
53
  separators?: string[];
54
- }
54
+ },
55
+ logger?: t.Logger
55
56
  ): Promise<string[][]> => {
56
57
  // Split multiple texts
58
+ const logger_ = logger || createDefaultLogger();
57
59
  const promises = texts.map((text) =>
58
60
  chunker.splitText(text, options).catch((error) => {
59
- console.error('Error splitting text:', error);
61
+ logger_.error('Error splitting text:', error);
60
62
  return [text];
61
63
  })
62
64
  );
@@ -64,7 +66,7 @@ const chunker = {
64
66
  },
65
67
  };
66
68
 
67
- const createSourceUpdateCallback = (sourceMap: Map<string, t.ValidSource>) => {
69
+ function createSourceUpdateCallback(sourceMap: Map<string, t.ValidSource>) {
68
70
  return (link: string, update?: Partial<t.ValidSource>): void => {
69
71
  const source = sourceMap.get(link);
70
72
  if (source) {
@@ -74,25 +76,29 @@ const createSourceUpdateCallback = (sourceMap: Map<string, t.ValidSource>) => {
74
76
  });
75
77
  }
76
78
  };
77
- };
79
+ }
78
80
 
79
81
  const getHighlights = async ({
80
82
  query,
81
83
  content,
82
84
  reranker,
83
85
  topResults = 5,
86
+ logger,
84
87
  }: {
85
88
  content: string;
86
89
  query: string;
87
90
  reranker?: BaseReranker;
88
91
  topResults?: number;
92
+ logger?: t.Logger;
89
93
  }): Promise<t.Highlight[] | undefined> => {
94
+ const logger_ = logger || createDefaultLogger();
95
+
90
96
  if (!content) {
91
- console.warn('No content provided for highlights');
97
+ logger_.warn('No content provided for highlights');
92
98
  return;
93
99
  }
94
100
  if (!reranker) {
95
- console.warn('No reranker provided for highlights');
101
+ logger_.warn('No reranker provided for highlights');
96
102
  return;
97
103
  }
98
104
 
@@ -101,14 +107,14 @@ const getHighlights = async ({
101
107
  if (Array.isArray(documents)) {
102
108
  return await reranker.rerank(query, documents, topResults);
103
109
  } else {
104
- console.error(
110
+ logger_.error(
105
111
  'Expected documents to be an array, got:',
106
112
  typeof documents
107
113
  );
108
114
  return;
109
115
  }
110
116
  } catch (error) {
111
- console.error('Error in content processing:', error);
117
+ logger_.error('Error in content processing:', error);
112
118
  return;
113
119
  }
114
120
  };
@@ -116,16 +122,11 @@ const getHighlights = async ({
116
122
  const createSerperAPI = (
117
123
  apiKey?: string
118
124
  ): {
119
- getSources: (
120
- query: string,
121
- numResults?: number,
122
- storedLocation?: string
123
- ) => Promise<t.SearchResult>;
125
+ getSources: (params: t.GetSourcesParams) => Promise<t.SearchResult>;
124
126
  } => {
125
127
  const config = {
126
128
  apiKey: apiKey ?? process.env.SERPER_API_KEY,
127
129
  apiUrl: 'https://google.serper.dev/search',
128
- defaultLocation: 'us',
129
130
  timeout: 10000,
130
131
  };
131
132
 
@@ -133,43 +134,72 @@ const createSerperAPI = (
133
134
  throw new Error('SERPER_API_KEY is required for SerperAPI');
134
135
  }
135
136
 
136
- const getSources = async (
137
- query: string,
138
- numResults: number = 8,
139
- storedLocation?: string
140
- ): Promise<t.SearchResult> => {
137
+ const getSources = async ({
138
+ query,
139
+ date,
140
+ country,
141
+ safeSearch,
142
+ numResults = 8,
143
+ type,
144
+ }: t.GetSourcesParams): Promise<t.SearchResult> => {
141
145
  if (!query.trim()) {
142
146
  return { success: false, error: 'Query cannot be empty' };
143
147
  }
144
148
 
145
149
  try {
146
- const searchLocation = (
147
- storedLocation ?? config.defaultLocation
148
- ).toLowerCase();
149
-
150
- const payload = {
150
+ const safe = ['off', 'moderate', 'active'] as const;
151
+ const payload: t.SerperSearchPayload = {
151
152
  q: query,
153
+ safe: safe[safeSearch ?? 1],
152
154
  num: Math.min(Math.max(1, numResults), 10),
153
- gl: searchLocation,
154
155
  };
155
156
 
156
- const response = await axios.post(config.apiUrl, payload, {
157
- headers: {
158
- 'X-API-KEY': config.apiKey,
159
- 'Content-Type': 'application/json',
160
- },
161
- timeout: config.timeout,
162
- });
157
+ // Set the search type if provided
158
+ if (type) {
159
+ payload.type = type;
160
+ }
161
+
162
+ if (date != null) {
163
+ payload.tbs = `qdr:${date}`;
164
+ }
165
+
166
+ if (country != null && country !== '') {
167
+ payload['gl'] = country.toLowerCase();
168
+ }
169
+
170
+ // Determine the API endpoint based on the search type
171
+ let apiEndpoint = config.apiUrl;
172
+ if (type === 'images') {
173
+ apiEndpoint = 'https://google.serper.dev/images';
174
+ } else if (type === 'videos') {
175
+ apiEndpoint = 'https://google.serper.dev/videos';
176
+ } else if (type === 'news') {
177
+ apiEndpoint = 'https://google.serper.dev/news';
178
+ }
179
+
180
+ const response = await axios.post<t.SerperResultData>(
181
+ apiEndpoint,
182
+ payload,
183
+ {
184
+ headers: {
185
+ 'X-API-KEY': config.apiKey,
186
+ 'Content-Type': 'application/json',
187
+ },
188
+ timeout: config.timeout,
189
+ }
190
+ );
163
191
 
164
192
  const data = response.data;
165
193
  const results: t.SearchResultData = {
166
194
  organic: data.organic,
167
195
  images: data.images ?? [],
196
+ answerBox: data.answerBox,
168
197
  topStories: data.topStories ?? [],
169
- knowledgeGraph: data.knowledgeGraph as t.KnowledgeGraphResult,
170
- answerBox: data.answerBox as t.AnswerBoxResult,
171
- peopleAlsoAsk: data.peopleAlsoAsk as t.PeopleAlsoAskResult[],
172
- relatedSearches: data.relatedSearches as string[],
198
+ peopleAlsoAsk: data.peopleAlsoAsk,
199
+ knowledgeGraph: data.knowledgeGraph,
200
+ relatedSearches: data.relatedSearches,
201
+ videos: data.videos ?? [],
202
+ news: data.news ?? [],
173
203
  };
174
204
 
175
205
  return { success: true, data: results };
@@ -187,11 +217,7 @@ const createSearXNGAPI = (
187
217
  instanceUrl?: string,
188
218
  apiKey?: string
189
219
  ): {
190
- getSources: (
191
- query: string,
192
- numResults?: number,
193
- storedLocation?: string
194
- ) => Promise<t.SearchResult>;
220
+ getSources: (params: t.GetSourcesParams) => Promise<t.SearchResult>;
195
221
  } => {
196
222
  const config = {
197
223
  instanceUrl: instanceUrl ?? process.env.SEARXNG_INSTANCE_URL,
@@ -204,11 +230,11 @@ const createSearXNGAPI = (
204
230
  throw new Error('SEARXNG_INSTANCE_URL is required for SearXNG API');
205
231
  }
206
232
 
207
- const getSources = async (
208
- query: string,
209
- numResults: number = 8,
210
- storedLocation?: string
211
- ): Promise<t.SearchResult> => {
233
+ const getSources = async ({
234
+ query,
235
+ numResults = 8,
236
+ type,
237
+ }: t.GetSourcesParams): Promise<t.SearchResult> => {
212
238
  if (!query.trim()) {
213
239
  return { success: false, error: 'Query cannot be empty' };
214
240
  }
@@ -224,22 +250,27 @@ const createSearXNGAPI = (
224
250
  searchUrl = searchUrl.replace(/\/$/, '') + '/search';
225
251
  }
226
252
 
253
+ // Determine the search category based on the type
254
+ let category = 'general';
255
+ if (type === 'images') {
256
+ category = 'images';
257
+ } else if (type === 'videos') {
258
+ category = 'videos';
259
+ } else if (type === 'news') {
260
+ category = 'news';
261
+ }
262
+
227
263
  // Prepare parameters for SearXNG
228
- const params: Record<string, string | number> = {
264
+ const params: t.SearxNGSearchPayload = {
229
265
  q: query,
230
266
  format: 'json',
231
267
  pageno: 1,
232
- categories: 'general',
268
+ categories: category,
233
269
  language: 'all',
234
270
  safesearch: 0,
235
271
  engines: 'google,bing,duckduckgo',
236
- max_results: Math.min(Math.max(1, numResults), 20),
237
272
  };
238
273
 
239
- if (storedLocation != null && storedLocation !== 'all') {
240
- params.language = storedLocation;
241
- }
242
-
243
274
  const headers: Record<string, string> = {
244
275
  'Content-Type': 'application/json',
245
276
  };
@@ -282,6 +313,8 @@ const createSearXNGAPI = (
282
313
  topStories: [],
283
314
  // Use undefined instead of null for optional properties
284
315
  relatedSearches: data.suggestions ?? [],
316
+ videos: [],
317
+ news: [],
285
318
  };
286
319
 
287
320
  return { success: true, data: results };
@@ -301,11 +334,7 @@ const createSearXNGAPI = (
301
334
  export const createSearchAPI = (
302
335
  config: t.SearchConfig
303
336
  ): {
304
- getSources: (
305
- query: string,
306
- numResults?: number,
307
- storedLocation?: string
308
- ) => Promise<t.SearchResult>;
337
+ getSources: (params: t.GetSourcesParams) => Promise<t.SearchResult>;
309
338
  } => {
310
339
  const {
311
340
  searchProvider = 'serper',
@@ -330,10 +359,7 @@ export const createSourceProcessor = (
330
359
  scraperInstance?: FirecrawlScraper
331
360
  ): {
332
361
  processSources: (
333
- result: t.SearchResult,
334
- numElements: number,
335
- query: string,
336
- proMode?: boolean
362
+ fields: t.ProcessSourcesFields
337
363
  ) => Promise<t.SearchResultData>;
338
364
  topResults: number;
339
365
  } => {
@@ -345,47 +371,59 @@ export const createSourceProcessor = (
345
371
  // strategies = ['no_extraction'],
346
372
  // filterContent = true,
347
373
  reranker,
374
+ logger,
348
375
  } = config;
349
376
 
377
+ const logger_ = logger || createDefaultLogger();
350
378
  const firecrawlScraper = scraperInstance;
351
379
 
352
380
  const webScraper = {
353
381
  scrapeMany: async ({
354
382
  query,
355
383
  links,
384
+ onGetHighlights,
356
385
  }: {
357
386
  query: string;
358
387
  links: string[];
388
+ onGetHighlights: t.SearchToolConfig['onGetHighlights'];
359
389
  }): Promise<Array<t.ScrapeResult>> => {
360
- console.log(`Scraping ${links.length} links with Firecrawl`);
390
+ logger_.debug(`Scraping ${links.length} links with Firecrawl`);
361
391
  const promises: Array<Promise<t.ScrapeResult>> = [];
362
392
  try {
363
- for (const currentLink of links) {
393
+ for (let i = 0; i < links.length; i++) {
394
+ const currentLink = links[i];
364
395
  const promise: Promise<t.ScrapeResult> = firecrawlScraper
365
396
  .scrapeUrl(currentLink, {})
366
397
  .then(([url, response]) => {
367
- const attribution = getAttribution(url, response.data?.metadata);
398
+ const attribution = getAttribution(
399
+ url,
400
+ response.data?.metadata,
401
+ logger_
402
+ );
368
403
  if (response.success && response.data) {
369
- const content = firecrawlScraper.extractContent(response);
404
+ const [content, references] =
405
+ firecrawlScraper.extractContent(response);
370
406
  return {
371
407
  url,
408
+ references,
372
409
  attribution,
373
410
  content: chunker.cleanText(content),
374
- };
411
+ } as t.ScrapeResult;
375
412
  }
376
413
 
377
414
  return {
378
415
  url,
379
416
  attribution,
380
417
  error: true,
381
- content: `Failed to scrape ${url}: ${response.error ?? 'Unknown error'}`,
382
- };
418
+ content: '',
419
+ } as t.ScrapeResult;
383
420
  })
384
421
  .then(async (result) => {
385
422
  try {
386
423
  if (result.error != null) {
387
- console.error(
388
- `Error scraping ${result.url}: ${result.content}`
424
+ logger_.error(
425
+ `Error scraping ${result.url}: ${result.content}`,
426
+ result.error
389
427
  );
390
428
  return {
391
429
  ...result,
@@ -395,31 +433,35 @@ export const createSourceProcessor = (
395
433
  query,
396
434
  reranker,
397
435
  content: result.content,
436
+ logger: logger_,
398
437
  });
438
+ if (onGetHighlights) {
439
+ onGetHighlights(result.url);
440
+ }
399
441
  return {
400
442
  ...result,
401
443
  highlights,
402
444
  };
403
445
  } catch (error) {
404
- console.error('Error processing scraped content:', error);
446
+ logger_.error('Error processing scraped content:', error);
405
447
  return {
406
448
  ...result,
407
449
  };
408
450
  }
409
451
  })
410
452
  .catch((error) => {
411
- console.error(`Error scraping ${currentLink}:`, error);
453
+ logger_.error(`Error scraping ${currentLink}:`, error);
412
454
  return {
413
455
  url: currentLink,
414
456
  error: true,
415
- content: `Failed to scrape ${currentLink}: ${error.message ?? 'Unknown error'}`,
457
+ content: '',
416
458
  };
417
459
  });
418
460
  promises.push(promise);
419
461
  }
420
462
  return await Promise.all(promises);
421
463
  } catch (error) {
422
- console.error('Error in scrapeMany:', error);
464
+ logger_.error('Error in scrapeMany:', error);
423
465
  return [];
424
466
  }
425
467
  },
@@ -429,35 +471,44 @@ export const createSourceProcessor = (
429
471
  links,
430
472
  query,
431
473
  target,
474
+ onGetHighlights,
432
475
  onContentScraped,
433
476
  }: {
434
477
  links: string[];
435
478
  query: string;
436
479
  target: number;
480
+ onGetHighlights: t.SearchToolConfig['onGetHighlights'];
437
481
  onContentScraped?: (link: string, update?: Partial<t.ValidSource>) => void;
438
482
  }): Promise<void> => {
439
483
  const initialLinks = links.slice(0, target);
440
484
  // const remainingLinks = links.slice(target).reverse();
441
- const results = await webScraper.scrapeMany({ query, links: initialLinks });
485
+ const results = await webScraper.scrapeMany({
486
+ query,
487
+ links: initialLinks,
488
+ onGetHighlights,
489
+ });
442
490
  for (const result of results) {
443
491
  if (result.error === true) {
444
492
  continue;
445
493
  }
446
- const { url, content, attribution, highlights } = result;
494
+ const { url, content, attribution, references, highlights } = result;
447
495
  onContentScraped?.(url, {
448
496
  content,
449
497
  attribution,
498
+ references,
450
499
  highlights,
451
500
  });
452
501
  }
453
502
  };
454
503
 
455
- const processSources = async (
456
- result: t.SearchResult,
457
- numElements: number,
458
- query: string,
459
- proMode: boolean = false
460
- ): Promise<t.SearchResultData> => {
504
+ const processSources = async ({
505
+ result,
506
+ numElements,
507
+ query,
508
+ news,
509
+ proMode = true,
510
+ onGetHighlights,
511
+ }: t.ProcessSourcesFields): Promise<t.SearchResultData> => {
461
512
  try {
462
513
  if (!result.data) {
463
514
  return {
@@ -485,6 +536,7 @@ export const createSourceProcessor = (
485
536
  await fetchContents({
486
537
  query,
487
538
  target: 1,
539
+ onGetHighlights,
488
540
  onContentScraped,
489
541
  links: [wikiSources[0].link],
490
542
  });
@@ -504,51 +556,69 @@ export const createSourceProcessor = (
504
556
  }
505
557
 
506
558
  const sourceMap = new Map<string, t.ValidSource>();
507
- const allLinks: string[] = [];
559
+ const organicLinksSet = new Set<string>();
508
560
 
509
- for (const source of result.data.organic) {
510
- if (source.link) {
511
- allLinks.push(source.link);
512
- sourceMap.set(source.link, source);
513
- }
514
- }
561
+ // Collect organic links
562
+ const organicLinks = collectLinks(
563
+ result.data.organic,
564
+ sourceMap,
565
+ organicLinksSet
566
+ );
567
+
568
+ // Collect top story links, excluding any that are already in organic links
569
+ const topStories = result.data.topStories ?? [];
570
+ const topStoryLinks = collectLinks(
571
+ topStories,
572
+ sourceMap,
573
+ organicLinksSet
574
+ );
515
575
 
516
- if (allLinks.length === 0) {
576
+ if (organicLinks.length === 0 && (topStoryLinks.length === 0 || !news)) {
517
577
  return result.data;
518
578
  }
519
579
 
520
580
  const onContentScraped = createSourceUpdateCallback(sourceMap);
521
- await fetchContents({
522
- links: allLinks,
523
- query,
524
- onContentScraped,
525
- target: numElements,
526
- });
581
+ const promises: Promise<void>[] = [];
582
+
583
+ // Process organic links
584
+ if (organicLinks.length > 0) {
585
+ promises.push(
586
+ fetchContents({
587
+ query,
588
+ onGetHighlights,
589
+ onContentScraped,
590
+ links: organicLinks,
591
+ target: numElements,
592
+ })
593
+ );
594
+ }
527
595
 
528
- for (let i = 0; i < result.data.organic.length; i++) {
529
- const source = result.data.organic[i];
530
- const updatedSource = sourceMap.get(source.link);
531
- if (updatedSource) {
532
- result.data.organic[i] = {
533
- ...source,
534
- ...updatedSource,
535
- };
536
- }
596
+ // Process top story links
597
+ if (news && topStoryLinks.length > 0) {
598
+ promises.push(
599
+ fetchContents({
600
+ query,
601
+ onGetHighlights,
602
+ onContentScraped,
603
+ links: topStoryLinks,
604
+ target: numElements,
605
+ })
606
+ );
537
607
  }
538
608
 
539
- const successfulSources = result.data.organic
540
- .filter(
541
- (source) =>
542
- source.content != null && !source.content.startsWith('Failed')
543
- )
544
- .slice(0, numElements);
609
+ await Promise.all(promises);
545
610
 
546
- if (successfulSources.length > 0) {
547
- result.data.organic = successfulSources;
611
+ if (result.data.organic.length > 0) {
612
+ updateSourcesWithContent(result.data.organic, sourceMap);
548
613
  }
614
+
615
+ if (news && topStories.length > 0) {
616
+ updateSourcesWithContent(topStories, sourceMap);
617
+ }
618
+
549
619
  return result.data;
550
620
  } catch (error) {
551
- console.error('Error in processSources:', error);
621
+ logger_.error('Error in processSources:', error);
552
622
  return {
553
623
  organic: [],
554
624
  topStories: [],
@@ -565,3 +635,46 @@ export const createSourceProcessor = (
565
635
  topResults,
566
636
  };
567
637
  };
638
+
639
+ /** Helper function to collect links and update sourceMap */
640
+ function collectLinks(
641
+ sources: Array<t.OrganicResult | t.TopStoryResult>,
642
+ sourceMap: Map<string, t.ValidSource>,
643
+ existingLinksSet?: Set<string>
644
+ ): string[] {
645
+ const links: string[] = [];
646
+
647
+ for (const source of sources) {
648
+ if (source.link) {
649
+ // For topStories, only add if not already in organic links
650
+ if (existingLinksSet && existingLinksSet.has(source.link)) {
651
+ continue;
652
+ }
653
+
654
+ links.push(source.link);
655
+ if (existingLinksSet) {
656
+ existingLinksSet.add(source.link);
657
+ }
658
+ sourceMap.set(source.link, source as t.ValidSource);
659
+ }
660
+ }
661
+
662
+ return links;
663
+ }
664
+
665
+ /** Helper function to update sources with scraped content */
666
+ function updateSourcesWithContent<T extends t.ValidSource>(
667
+ sources: T[],
668
+ sourceMap: Map<string, t.ValidSource>
669
+ ): void {
670
+ for (let i = 0; i < sources.length; i++) {
671
+ const source = sources[i];
672
+ const updatedSource = sourceMap.get(source.link);
673
+ if (updatedSource) {
674
+ sources[i] = {
675
+ ...source,
676
+ ...updatedSource,
677
+ } as T;
678
+ }
679
+ }
680
+ }