@librechat/agents 2.4.316 → 2.4.318

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/dist/cjs/tools/search/content.cjs +140 -0
  2. package/dist/cjs/tools/search/content.cjs.map +1 -0
  3. package/dist/cjs/tools/search/firecrawl.cjs +17 -37
  4. package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
  5. package/dist/cjs/tools/search/format.cjs +79 -29
  6. package/dist/cjs/tools/search/format.cjs.map +1 -1
  7. package/dist/cjs/tools/search/highlights.cjs +64 -13
  8. package/dist/cjs/tools/search/highlights.cjs.map +1 -1
  9. package/dist/cjs/tools/search/search.cjs +13 -15
  10. package/dist/cjs/tools/search/search.cjs.map +1 -1
  11. package/dist/cjs/tools/search/tool.cjs +44 -12
  12. package/dist/cjs/tools/search/tool.cjs.map +1 -1
  13. package/dist/cjs/tools/search/utils.cjs +35 -0
  14. package/dist/cjs/tools/search/utils.cjs.map +1 -0
  15. package/dist/esm/tools/search/content.mjs +119 -0
  16. package/dist/esm/tools/search/content.mjs.map +1 -0
  17. package/dist/esm/tools/search/firecrawl.mjs +18 -37
  18. package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
  19. package/dist/esm/tools/search/format.mjs +79 -29
  20. package/dist/esm/tools/search/format.mjs.map +1 -1
  21. package/dist/esm/tools/search/highlights.mjs +64 -13
  22. package/dist/esm/tools/search/highlights.mjs.map +1 -1
  23. package/dist/esm/tools/search/search.mjs +12 -14
  24. package/dist/esm/tools/search/search.mjs.map +1 -1
  25. package/dist/esm/tools/search/tool.mjs +44 -12
  26. package/dist/esm/tools/search/tool.mjs.map +1 -1
  27. package/dist/esm/tools/search/utils.mjs +32 -0
  28. package/dist/esm/tools/search/utils.mjs.map +1 -0
  29. package/dist/types/tools/search/content.d.ts +4 -0
  30. package/dist/types/tools/search/firecrawl.d.ts +6 -86
  31. package/dist/types/tools/search/format.d.ts +4 -1
  32. package/dist/types/tools/search/highlights.d.ts +1 -1
  33. package/dist/types/tools/search/search.d.ts +1 -1
  34. package/dist/types/tools/search/test.d.ts +1 -0
  35. package/dist/types/tools/search/tool.d.ts +12 -4
  36. package/dist/types/tools/search/types.d.ts +380 -46
  37. package/dist/types/tools/search/utils.d.ts +3 -0
  38. package/package.json +3 -2
  39. package/src/scripts/search.ts +5 -3
  40. package/src/tools/search/content.test.ts +173 -0
  41. package/src/tools/search/content.ts +147 -0
  42. package/src/tools/search/firecrawl.ts +27 -144
  43. package/src/tools/search/format.ts +89 -31
  44. package/src/tools/search/highlights.ts +99 -17
  45. package/src/tools/search/output.md +2775 -0
  46. package/src/tools/search/search.ts +42 -54
  47. package/src/tools/search/test.html +884 -0
  48. package/src/tools/search/test.md +643 -0
  49. package/src/tools/search/test.ts +159 -0
  50. package/src/tools/search/tool.ts +54 -15
  51. package/src/tools/search/types.ts +430 -52
  52. package/src/tools/search/utils.ts +43 -0
@@ -2,64 +2,40 @@ import type { RunnableConfig } from '@langchain/core/runnables';
2
2
  import type { BaseReranker } from './rerankers';
3
3
  export type SearchProvider = 'serper' | 'searxng';
4
4
  export type RerankerType = 'infinity' | 'jina' | 'cohere' | 'none';
5
- export interface OrganicResult {
6
- position?: number;
7
- title?: string;
8
- link: string;
9
- snippet?: string;
10
- date?: string;
11
- }
12
- export interface TopStoryResult {
13
- title?: string;
14
- link: string;
15
- source?: string;
16
- date?: string;
17
- imageUrl?: string;
18
- }
19
- export interface ImageResult {
20
- title?: string;
21
- imageUrl?: string;
22
- }
23
- export interface KnowledgeGraphResult {
24
- title?: string;
25
- type?: string;
26
- description?: string;
27
- attributes?: Record<string, string>;
28
- imageUrl?: string;
29
- }
30
- export interface AnswerBoxResult {
31
- title?: string;
32
- answer?: string;
33
- snippet?: string;
34
- date?: string;
35
- }
36
- export interface PeopleAlsoAskResult {
37
- question?: string;
38
- answer?: string;
39
- }
40
5
  export interface Highlight {
41
6
  score: number;
42
7
  text: string;
8
+ references?: UsedReferences;
43
9
  }
44
- export interface ValidSource {
45
- link: string;
46
- position?: number;
47
- title?: string;
48
- snippet?: string;
49
- date?: string;
10
+ export type ProcessedSource = {
50
11
  content?: string;
51
12
  attribution?: string;
13
+ references?: References;
52
14
  highlights?: Highlight[];
53
- }
15
+ };
16
+ export type ProcessedOrganic = OrganicResult & ProcessedSource;
17
+ export type ProcessedTopStory = TopStoryResult & ProcessedSource;
18
+ export type ValidSource = ProcessedOrganic | ProcessedTopStory;
19
+ export type ResultReference = {
20
+ link: string;
21
+ title?: string;
22
+ attribution?: string;
23
+ };
54
24
  export interface SearchResultData {
55
- organic?: ValidSource[];
56
- topStories?: ValidSource[];
25
+ organic?: ProcessedOrganic[];
26
+ topStories?: ProcessedTopStory[];
57
27
  images?: ImageResult[];
28
+ videos?: VideoResult[];
29
+ places?: PlaceResult[];
30
+ news?: NewsResult[];
31
+ shopping?: ShoppingResult[];
58
32
  knowledgeGraph?: KnowledgeGraphResult;
59
33
  answerBox?: AnswerBoxResult;
60
34
  peopleAlsoAsk?: PeopleAlsoAskResult[];
61
- relatedSearches?: string[];
62
- suggestions?: string[];
35
+ relatedSearches?: Array<{
36
+ query: string;
37
+ }>;
38
+ references?: ResultReference[];
63
39
  error?: string;
64
40
  }
65
41
  export interface SearchResult {
@@ -80,11 +56,17 @@ export interface SearchConfig {
80
56
  searxngInstanceUrl?: string;
81
57
  searxngApiKey?: string;
82
58
  }
59
+ export type References = {
60
+ links: MediaReference[];
61
+ images: MediaReference[];
62
+ videos: MediaReference[];
63
+ };
83
64
  export interface ScrapeResult {
84
65
  url: string;
85
66
  error?: boolean;
86
67
  content: string;
87
68
  attribution?: string;
69
+ references?: References;
88
70
  highlights?: Highlight[];
89
71
  }
90
72
  export interface ProcessSourcesConfig {
@@ -148,3 +130,355 @@ export interface SearchToolConfig extends SearchConfig, ProcessSourcesConfig, Fi
148
130
  rerankerType?: RerankerType;
149
131
  onSearchResults?: (results: SearchResult, runnableConfig?: RunnableConfig) => void;
150
132
  }
133
+ export interface MediaReference {
134
+ originalUrl: string;
135
+ title?: string;
136
+ text?: string;
137
+ }
138
+ export type UsedReferences = {
139
+ type: 'link' | 'image' | 'video';
140
+ originalIndex: number;
141
+ reference: MediaReference;
142
+ }[];
143
+ /** Firecrawl */
144
+ export interface FirecrawlScrapeOptions {
145
+ formats?: string[];
146
+ includeTags?: string[];
147
+ excludeTags?: string[];
148
+ headers?: Record<string, string>;
149
+ waitFor?: number;
150
+ timeout?: number;
151
+ }
152
+ export interface ScrapeMetadata {
153
+ sourceURL?: string;
154
+ url?: string;
155
+ scrapeId?: string;
156
+ statusCode?: number;
157
+ title?: string;
158
+ description?: string;
159
+ language?: string;
160
+ favicon?: string;
161
+ viewport?: string;
162
+ robots?: string;
163
+ 'theme-color'?: string;
164
+ 'og:url'?: string;
165
+ 'og:title'?: string;
166
+ 'og:description'?: string;
167
+ 'og:type'?: string;
168
+ 'og:image'?: string;
169
+ 'og:image:width'?: string;
170
+ 'og:image:height'?: string;
171
+ 'og:site_name'?: string;
172
+ ogUrl?: string;
173
+ ogTitle?: string;
174
+ ogDescription?: string;
175
+ ogImage?: string;
176
+ ogSiteName?: string;
177
+ 'article:author'?: string;
178
+ 'article:published_time'?: string;
179
+ 'article:modified_time'?: string;
180
+ 'article:section'?: string;
181
+ 'article:tag'?: string;
182
+ 'article:publisher'?: string;
183
+ publishedTime?: string;
184
+ modifiedTime?: string;
185
+ 'twitter:site'?: string;
186
+ 'twitter:creator'?: string;
187
+ 'twitter:card'?: string;
188
+ 'twitter:image'?: string;
189
+ 'twitter:dnt'?: string;
190
+ 'twitter:app:name:iphone'?: string;
191
+ 'twitter:app:id:iphone'?: string;
192
+ 'twitter:app:url:iphone'?: string;
193
+ 'twitter:app:name:ipad'?: string;
194
+ 'twitter:app:id:ipad'?: string;
195
+ 'twitter:app:url:ipad'?: string;
196
+ 'twitter:app:name:googleplay'?: string;
197
+ 'twitter:app:id:googleplay'?: string;
198
+ 'twitter:app:url:googleplay'?: string;
199
+ 'fb:app_id'?: string;
200
+ 'al:ios:url'?: string;
201
+ 'al:ios:app_name'?: string;
202
+ 'al:ios:app_store_id'?: string;
203
+ [key: string]: string | number | boolean | null | undefined;
204
+ }
205
+ export interface FirecrawlScrapeResponse {
206
+ success: boolean;
207
+ data?: {
208
+ markdown?: string;
209
+ html?: string;
210
+ rawHtml?: string;
211
+ screenshot?: string;
212
+ links?: string[];
213
+ metadata?: ScrapeMetadata;
214
+ };
215
+ error?: string;
216
+ }
217
+ export interface FirecrawlScraperConfig {
218
+ apiKey?: string;
219
+ apiUrl?: string;
220
+ formats?: string[];
221
+ timeout?: number;
222
+ }
223
+ export type GetSourcesParams = {
224
+ query: string;
225
+ country?: string;
226
+ numResults?: number;
227
+ };
228
+ /** Serper API */
229
+ export interface VideoResult {
230
+ title?: string;
231
+ link?: string;
232
+ snippet?: string;
233
+ imageUrl?: string;
234
+ duration?: string;
235
+ source?: string;
236
+ channel?: string;
237
+ date?: string;
238
+ position?: number;
239
+ }
240
+ export interface PlaceResult {
241
+ position?: number;
242
+ name?: string;
243
+ address?: string;
244
+ latitude?: number;
245
+ longitude?: number;
246
+ rating?: number;
247
+ ratingCount?: number;
248
+ category?: string;
249
+ identifier?: string;
250
+ }
251
+ export interface NewsResult {
252
+ title?: string;
253
+ link?: string;
254
+ snippet?: string;
255
+ date?: string;
256
+ source?: string;
257
+ imageUrl?: string;
258
+ position?: number;
259
+ }
260
+ export interface ShoppingResult {
261
+ title?: string;
262
+ source?: string;
263
+ link?: string;
264
+ price?: string;
265
+ delivery?: string;
266
+ imageUrl?: string;
267
+ rating?: number;
268
+ ratingCount?: number;
269
+ offers?: string;
270
+ productId?: string;
271
+ position?: number;
272
+ }
273
+ export interface ScholarResult {
274
+ title?: string;
275
+ link?: string;
276
+ publicationInfo?: string;
277
+ snippet?: string;
278
+ year?: number;
279
+ citedBy?: number;
280
+ }
281
+ export interface ImageResult {
282
+ title?: string;
283
+ imageUrl?: string;
284
+ imageWidth?: number;
285
+ imageHeight?: number;
286
+ thumbnailUrl?: string;
287
+ thumbnailWidth?: number;
288
+ thumbnailHeight?: number;
289
+ source?: string;
290
+ domain?: string;
291
+ link?: string;
292
+ googleUrl?: string;
293
+ position?: number;
294
+ }
295
+ export interface SerperSearchPayload extends SerperSearchInput {
296
+ /**
297
+ * Search type/vertical
298
+ * Options: "search" (web), "images", "news", "places", "videos"
299
+ */
300
+ type?: 'search' | 'images' | 'news' | 'places' | 'videos';
301
+ /**
302
+ * Starting index for search results pagination (used instead of page)
303
+ */
304
+ start?: number;
305
+ /**
306
+ * Filtering for safe search
307
+ * Options: "off", "moderate", "active"
308
+ */
309
+ safe?: 'off' | 'moderate' | 'active';
310
+ }
311
+ export type SerperSearchParameters = Pick<SerperSearchPayload, 'q' | 'type'> & {
312
+ engine: 'google';
313
+ };
314
+ export interface OrganicResult {
315
+ position?: number;
316
+ title?: string;
317
+ link: string;
318
+ snippet?: string;
319
+ date?: string;
320
+ sitelinks?: Array<{
321
+ title: string;
322
+ link: string;
323
+ }>;
324
+ }
325
+ export interface TopStoryResult {
326
+ title?: string;
327
+ link: string;
328
+ source?: string;
329
+ date?: string;
330
+ imageUrl?: string;
331
+ }
332
+ export interface KnowledgeGraphResult {
333
+ title?: string;
334
+ type?: string;
335
+ imageUrl?: string;
336
+ description?: string;
337
+ descriptionSource?: string;
338
+ descriptionLink?: string;
339
+ attributes?: Record<string, string>;
340
+ website?: string;
341
+ }
342
+ export interface AnswerBoxResult {
343
+ title?: string;
344
+ snippet?: string;
345
+ snippetHighlighted?: string[];
346
+ link?: string;
347
+ date?: string;
348
+ }
349
+ export interface PeopleAlsoAskResult {
350
+ question?: string;
351
+ snippet?: string;
352
+ title?: string;
353
+ link?: string;
354
+ }
355
+ export type RelatedSearches = Array<{
356
+ query: string;
357
+ }>;
358
+ export interface SerperSearchInput {
359
+ /**
360
+ * The search query string
361
+ */
362
+ q: string;
363
+ /**
364
+ * Country code for localized results
365
+ * Examples: "us", "uk", "ca", "de", etc.
366
+ */
367
+ gl?: string;
368
+ /**
369
+ * Interface language
370
+ * Examples: "en", "fr", "de", etc.
371
+ */
372
+ hl?: string;
373
+ /**
374
+ * Number of results to return (up to 100)
375
+ */
376
+ num?: number;
377
+ /**
378
+ * Specific location for contextual results
379
+ * Example: "New York, NY"
380
+ */
381
+ location?: string;
382
+ /**
383
+ * Search autocorrection setting
384
+ */
385
+ autocorrect?: boolean;
386
+ page?: number;
387
+ }
388
+ export type SerperResultData = {
389
+ searchParameters: SerperSearchPayload;
390
+ organic?: OrganicResult[];
391
+ topStories?: TopStoryResult[];
392
+ images?: ImageResult[];
393
+ videos?: VideoResult[];
394
+ places?: PlaceResult[];
395
+ news?: NewsResult[];
396
+ shopping?: ShoppingResult[];
397
+ peopleAlsoAsk?: PeopleAlsoAskResult[];
398
+ relatedSearches?: RelatedSearches;
399
+ knowledgeGraph?: KnowledgeGraphResult;
400
+ answerBox?: AnswerBoxResult;
401
+ credits?: number;
402
+ };
403
+ /** SearXNG */
404
+ export interface SearxNGSearchPayload {
405
+ /**
406
+ * The search query string
407
+ * Supports syntax specific to different search engines
408
+ * Example: "site:github.com SearXNG"
409
+ */
410
+ q: string;
411
+ /**
412
+ * Comma-separated list of search categories
413
+ * Example: "general,images,news"
414
+ */
415
+ categories?: string;
416
+ /**
417
+ * Comma-separated list of search engines to use
418
+ * Example: "google,bing,duckduckgo"
419
+ */
420
+ engines?: string;
421
+ /**
422
+ * Code of the language for search results
423
+ * Example: "en", "fr", "de", "es"
424
+ */
425
+ language?: string;
426
+ /**
427
+ * Search page number
428
+ * Default: 1
429
+ */
430
+ pageno?: number;
431
+ /**
432
+ * Time range filter for search results
433
+ * Options: "day", "month", "year"
434
+ */
435
+ time_range?: 'day' | 'month' | 'year';
436
+ /**
437
+ * Output format of results
438
+ * Options: "json", "csv", "rss"
439
+ */
440
+ format?: 'json' | 'csv' | 'rss';
441
+ /**
442
+ * Open search results on new tab
443
+ * Options: `0` (off), `1` (on)
444
+ */
445
+ results_on_new_tab?: 0 | 1;
446
+ /**
447
+ * Proxy image results through SearxNG
448
+ * Options: true, false
449
+ */
450
+ image_proxy?: boolean;
451
+ /**
452
+ * Service for autocomplete suggestions
453
+ * Options: "google", "dbpedia", "duckduckgo", "mwmbl",
454
+ * "startpage", "wikipedia", "stract", "swisscows", "qwant"
455
+ */
456
+ autocomplete?: string;
457
+ /**
458
+ * Safe search filtering level
459
+ * Options: "0" (off), "1" (moderate), "2" (strict)
460
+ */
461
+ safesearch?: 0 | 1 | 2;
462
+ /**
463
+ * Theme to use for results page
464
+ * Default: "simple" (other themes may be available per instance)
465
+ */
466
+ theme?: string;
467
+ /**
468
+ * List of enabled plugins
469
+ * Default: "Hash_plugin,Self_Information,Tracker_URL_remover,Ahmia_blacklist"
470
+ */
471
+ enabled_plugins?: string;
472
+ /**
473
+ * List of disabled plugins
474
+ */
475
+ disabled_plugins?: string;
476
+ /**
477
+ * List of enabled engines
478
+ */
479
+ enabled_engines?: string;
480
+ /**
481
+ * List of disabled engines
482
+ */
483
+ disabled_engines?: string;
484
+ }
@@ -0,0 +1,3 @@
1
+ import type * as t from './types';
2
+ export declare const getDomainName: (link: string, metadata?: t.ScrapeMetadata) => string | undefined;
3
+ export declare function getAttribution(link: string, metadata?: t.ScrapeMetadata): string | undefined;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@librechat/agents",
3
- "version": "2.4.316",
3
+ "version": "2.4.318",
4
4
  "main": "./dist/cjs/main.cjs",
5
5
  "module": "./dist/esm/main.mjs",
6
6
  "types": "./dist/types/index.d.ts",
@@ -73,7 +73,7 @@
73
73
  },
74
74
  "dependencies": {
75
75
  "@langchain/anthropic": "^0.3.20",
76
- "@langchain/aws": "^0.1.8",
76
+ "@langchain/aws": "0.1.8",
77
77
  "@langchain/community": "^0.3.42",
78
78
  "@langchain/core": "^0.3.55",
79
79
  "@langchain/deepseek": "^0.0.1",
@@ -84,6 +84,7 @@
84
84
  "@langchain/ollama": "^0.2.0",
85
85
  "@langchain/openai": "^0.5.10",
86
86
  "@langchain/xai": "^0.0.2",
87
+ "cheerio": "^1.0.0",
87
88
  "dotenv": "^16.4.7",
88
89
  "https-proxy-agent": "^7.0.6",
89
90
  "nanoid": "^3.3.7"
@@ -86,7 +86,7 @@ async function testStandardStreaming(): Promise<void> {
86
86
  tools: [createSearchTool()],
87
87
  instructions:
88
88
  'You are a friendly AI assistant. Always address the user by their name.',
89
- additional_instructions: `The user's name is ${userName} and they are located in ${location}.`,
89
+ // additional_instructions: `The user's name is ${userName} and they are located in ${location}.`,
90
90
  },
91
91
  returnContent: true,
92
92
  customHandlers,
@@ -101,7 +101,7 @@ async function testStandardStreaming(): Promise<void> {
101
101
  version: 'v2' as const,
102
102
  };
103
103
 
104
- console.log('Test 1: Weather query (content parts test)');
104
+ console.log('Test 1: Search query (search tool test)');
105
105
 
106
106
  // const userMessage = `
107
107
  // Make a search for the weather in ${location} today, which is ${currentDate}.
@@ -109,7 +109,9 @@ async function testStandardStreaming(): Promise<void> {
109
109
  // Make sure to always refer to me by name, which is ${userName}.
110
110
  // After giving me a thorough summary, tell me a joke about the weather forecast we went over.
111
111
  // `;
112
- const userMessage = 'Are massage guns good?';
112
+ // const userMessage = 'Are massage guns good?';
113
+ // const userMessage = 'What is functional programming?';
114
+ const userMessage = "Get me today's trending news.";
113
115
 
114
116
  conversationHistory.push(new HumanMessage(userMessage));
115
117
 
@@ -0,0 +1,173 @@
1
+ /* eslint-disable @typescript-eslint/no-unused-vars */
2
+ /* eslint-disable no-console */
3
+ // content.test.ts
4
+ import * as fs from 'fs';
5
+ import { processContent } from './content';
6
+
7
+ describe('Link Processor', () => {
8
+ afterAll(() => {
9
+ if (fs.existsSync('./temp.html')) {
10
+ fs.unlinkSync('./temp.html');
11
+ }
12
+ if (fs.existsSync('./temp.md')) {
13
+ fs.unlinkSync('./temp.md');
14
+ }
15
+ });
16
+ // Basic functionality tests
17
+ test('should replace basic links with references', () => {
18
+ const html = `
19
+ <p>Test with <a href="https://example.com/link" title="Example">a link</a></p>
20
+ <p>And an <img src="https://example.com/img.jpg" alt="image"></p>
21
+ <p>Plus a <video src="https://example.com/video.mp4"></video></p>
22
+ `;
23
+
24
+ const markdown = `
25
+ Test with [a link](https://example.com/link "Example")
26
+ And an ![image](https://example.com/img.jpg)
27
+ Plus a [video](https://example.com/video.mp4)
28
+ `;
29
+
30
+ const result = processContent(html, markdown);
31
+
32
+ expect(result.links.length).toBe(1);
33
+ expect(result.images.length).toBe(1);
34
+ expect(result.videos.length).toBe(1);
35
+ expect(result.markdown).toContain('link#1');
36
+ expect(result.markdown).toContain('image#1');
37
+ expect(result.markdown).toContain('video#1');
38
+ });
39
+
40
+ // Edge case tests
41
+ test('should handle links with parentheses and special characters', () => {
42
+ const html = `
43
+ <a href="https://example.com/page(1).html" title="Parens">Link with parens</a>
44
+ <a href="https://example.com/path?query=test&param=value">Link with query</a>
45
+ `;
46
+
47
+ const markdown = `
48
+ [Link with parens](https://example.com/page(1).html "Parens")
49
+ [Link with query](https://example.com/path?query=test&param=value)
50
+ `;
51
+
52
+ const result = processContent(html, markdown);
53
+
54
+ expect(result.links.length).toBe(2);
55
+ expect(result.markdown).toContain('link#1');
56
+ expect(result.markdown).toContain('link#2');
57
+ });
58
+
59
+ // Performance test with large files
60
+ test('should process large files efficiently', () => {
61
+ const html = fs.readFileSync('src/tools/search/test.html', 'utf-8');
62
+ const markdown = fs.readFileSync('src/tools/search/test.md', 'utf-8');
63
+
64
+ // const largeHtml = generateLargeHtml(1000); // 1000 links
65
+ // fs.writeFileSync('./temp.html', largeHtml);
66
+
67
+ // const largeMd = generateLargeMarkdown(1000); // 1000 links
68
+ // fs.writeFileSync('./temp.md', largeMd);
69
+
70
+ // const html = fs.readFileSync('./temp.html', 'utf-8');
71
+ // const markdown = fs.readFileSync('./temp.md', 'utf-8');
72
+
73
+ // Measure time taken to process
74
+ const startTime = process.hrtime();
75
+ const result = processContent(html, markdown);
76
+ const elapsed = process.hrtime(startTime);
77
+ const timeInMs = elapsed[0] * 1000 + elapsed[1] / 1000000;
78
+
79
+ console.log(
80
+ `Processed ${result.links.length} links, ${result.images.length} images, and ${result.videos.length} videos in ${timeInMs.toFixed(2)}ms`
81
+ );
82
+
83
+ // Basic validations for large file processing
84
+ expect(result.links.length).toBeGreaterThan(0);
85
+ expect(result.markdown).toContain('link#');
86
+
87
+ // Check if all links were replaced (sample check)
88
+ expect(result.markdown).not.toContain('https://example.com/link');
89
+ });
90
+
91
+ // Memory usage test
92
+ test('should have reasonable memory usage', () => {
93
+ const html = fs.readFileSync('src/tools/search/test.html', 'utf-8');
94
+ const markdown = fs.readFileSync('src/tools/search/test.md', 'utf-8');
95
+
96
+ const beforeMem = process.memoryUsage();
97
+ processContent(html, markdown);
98
+ const afterMem = process.memoryUsage();
99
+
100
+ const heapUsed = (afterMem.heapUsed - beforeMem.heapUsed) / 1024 / 1024; // MB
101
+
102
+ console.log(`Memory used: ${heapUsed.toFixed(2)} MB`);
103
+
104
+ // This is a loose check - actual thresholds depend on your environment
105
+ expect(heapUsed).toBeLessThan(100); // Should use less than 100MB additional heap
106
+ });
107
+
108
+ // Real-world file test (if available)
109
+ test('should process real-world Wikipedia content', () => {
110
+ // Try to find real-world test files if they exist
111
+ const wikiHtml = 'src/tools/search/test.html';
112
+ const wikiMd = 'src/tools/search/test.md';
113
+
114
+ if (fs.existsSync(wikiHtml) && fs.existsSync(wikiMd)) {
115
+ const html = fs.readFileSync(wikiHtml, 'utf-8');
116
+ const markdown = fs.readFileSync(wikiMd, 'utf-8');
117
+
118
+ const result = processContent(html, markdown);
119
+
120
+ console.log(
121
+ `Processed ${result.links.length} Wikipedia links, ${result.images.length} images, and ${result.videos.length} videos`
122
+ );
123
+
124
+ expect(result.links.length).toBeGreaterThan(10); // Wikipedia articles typically have many links
125
+ expect(result.markdown).not.toMatch(/\]\(https?:\/\/[^\s")]+\)/); // No regular URLs should remain
126
+ } else {
127
+ console.log('Wikipedia test files not found, skipping this test');
128
+ }
129
+ });
130
+ });
131
+
132
+ // Helper function to generate large HTML test data
133
+ function generateLargeHtml(linkCount: number): string {
134
+ let html = '<html><body>';
135
+
136
+ for (let i = 1; i <= linkCount; i++) {
137
+ html += `<p>Paragraph ${i} with <a href="https://example.com/link${i}" title="Link ${i}">link ${i}</a>`;
138
+
139
+ if (i % 10 === 0) {
140
+ html += ` and <img src="https://example.com/image${i / 10}.jpg" alt="Image ${i / 10}">`;
141
+ }
142
+
143
+ if (i % 50 === 0) {
144
+ html += ` and <video src="https://example.com/video${i / 50}.mp4" title="Video ${i / 50}"></video>`;
145
+ }
146
+
147
+ html += '</p>';
148
+ }
149
+
150
+ html += '</body></html>';
151
+ return html;
152
+ }
153
+
154
+ /** Helper function to generate large Markdown test data */
155
+ function generateLargeMarkdown(linkCount: number): string {
156
+ let markdown = '# Test Document\n\n';
157
+
158
+ for (let i = 1; i <= linkCount; i++) {
159
+ markdown += `Paragraph ${i} with [link ${i}](https://example.com/link${i} "Link ${i}")`;
160
+
161
+ if (i % 10 === 0) {
162
+ markdown += ` and ![Image ${i / 10}](https://example.com/image${i / 10}.jpg)`;
163
+ }
164
+
165
+ if (i % 50 === 0) {
166
+ markdown += ` and [Video ${i / 50}](https://example.com/video${i / 50}.mp4 "Video ${i / 50}")`;
167
+ }
168
+
169
+ markdown += '\n\n';
170
+ }
171
+
172
+ return markdown;
173
+ }