@juspay/neurolink 8.41.1 → 8.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## [8.42.0](https://github.com/juspay/neurolink/compare/v8.41.1...v8.42.0) (2026-02-02)
2
+
3
+ ### Features
4
+
5
+ - **(cache):** Implemented LRU cache for image downloads ([6562c45](https://github.com/juspay/neurolink/commit/6562c45605a6031afe41bdfcc5de0a7687127c3f))
6
+
1
7
  ## [8.41.1](https://github.com/juspay/neurolink/compare/v8.41.0...v8.41.1) (2026-01-31)
2
8
 
3
9
  ### Bug Fixes
@@ -190,3 +190,54 @@ export type EnvVarValidationResult = {
190
190
  invalidVars: string[];
191
191
  warnings: string[];
192
192
  };
193
+ /**
194
+ * Cached image entry structure for image cache
195
+ */
196
+ export type CachedImage = {
197
+ /** The image data as a base64 data URI */
198
+ dataUri: string;
199
+ /** Content type of the image (e.g., "image/jpeg") */
200
+ contentType: string;
201
+ /** Size of the image in bytes */
202
+ size: number;
203
+ /** SHA-256 hash of the image content for deduplication */
204
+ contentHash: string;
205
+ /** Timestamp when the entry was created */
206
+ createdAt: number;
207
+ /** Timestamp of last access */
208
+ lastAccessedAt: number;
209
+ /** Number of times this entry was accessed */
210
+ accessCount: number;
211
+ };
212
+ /**
213
+ * Configuration options for the image cache
214
+ */
215
+ export type ImageCacheConfig = {
216
+ /** Maximum number of entries in the cache (default: 100) */
217
+ maxSize?: number;
218
+ /** Time-to-live in milliseconds (default: 30 minutes) */
219
+ ttlMs?: number;
220
+ /** Maximum size per image in bytes (default: 10MB) */
221
+ maxImageSize?: number;
222
+ };
223
+ /**
224
+ * Cache statistics for monitoring
225
+ */
226
+ export type ImageCacheStats = {
227
+ /** Number of cache hits */
228
+ hits: number;
229
+ /** Number of cache misses */
230
+ misses: number;
231
+ /** Number of entries evicted due to size limits */
232
+ evictions: number;
233
+ /** Number of entries expired due to TTL */
234
+ expirations: number;
235
+ /** Total number of requests */
236
+ totalRequests: number;
237
+ /** Current number of entries in cache */
238
+ size: number;
239
+ /** Total size of cached images in bytes */
240
+ totalBytes: number;
241
+ /** Cache hit rate as percentage */
242
+ hitRate: number;
243
+ };
@@ -0,0 +1,116 @@
1
+ /**
2
+ * Image Cache Utility for NeuroLink
3
+ *
4
+ * Implements an LRU cache for downloaded images to avoid redundant URL downloads.
5
+ * Addresses IMG-026: No Caching issue - same URL downloaded multiple times wasting bandwidth.
6
+ *
7
+ * Features:
8
+ * - LRU (Least Recently Used) eviction strategy
9
+ * - Configurable cache size and TTL
10
+ * - Cache hit/miss metrics
11
+ * - Content hash tracking for deduplication
12
+ *
13
+ * @module utils/imageCache
14
+ */
15
+ import type { CachedImage, ImageCacheConfig, ImageCacheStats } from "../types/utilities.js";
16
+ /**
17
+ * LRU Cache for downloaded images
18
+ *
19
+ * Uses URL as primary key and tracks content hashes for deduplication.
20
+ * Implements LRU eviction and configurable TTL for memory management.
21
+ */
22
+ export declare class ImageCache {
23
+ private cache;
24
+ private contentHashIndex;
25
+ private maxSize;
26
+ private ttlMs;
27
+ private maxImageSize;
28
+ private enabled;
29
+ private stats;
30
+ constructor(config?: ImageCacheConfig);
31
+ /**
32
+ * Parse a config value with bounds checking
33
+ */
34
+ private parseConfigValue;
35
+ /**
36
+ * Normalize URL for consistent cache key generation
37
+ * Removes tracking parameters and normalizes the URL
38
+ */
39
+ private normalizeUrl;
40
+ /**
41
+ * Generate content hash from image data
42
+ */
43
+ private generateContentHash;
44
+ /**
45
+ * Check if an entry is expired based on TTL
46
+ */
47
+ private isExpired;
48
+ /**
49
+ * Check if cache is enabled
50
+ */
51
+ isEnabled(): boolean;
52
+ /**
53
+ * Get a cached image by URL
54
+ * Returns null if not found or expired
55
+ */
56
+ get(url: string): CachedImage | null;
57
+ /**
58
+ * Get a cached image by content hash
59
+ * Useful for deduplication when the same image is accessed via different URLs
60
+ */
61
+ getByContentHash(contentHash: string): CachedImage | null;
62
+ /**
63
+ * Store an image in the cache
64
+ */
65
+ set(url: string, dataUri: string, contentType: string, imageData: Buffer): void;
66
+ /**
67
+ * Delete an entry from the cache
68
+ */
69
+ delete(url: string): boolean;
70
+ /**
71
+ * Evict the oldest (least recently used) entry
72
+ */
73
+ private evictOldest;
74
+ /**
75
+ * Clear all expired entries
76
+ */
77
+ evictExpired(): number;
78
+ /**
79
+ * Clear all entries from the cache
80
+ */
81
+ clear(): void;
82
+ /**
83
+ * Get cache statistics
84
+ */
85
+ getStats(): ImageCacheStats;
86
+ /**
87
+ * Check if a URL is cached and not expired
88
+ */
89
+ has(url: string): boolean;
90
+ /**
91
+ * Get the current cache size
92
+ */
93
+ getSize(): number;
94
+ /**
95
+ * Get cache configuration
96
+ */
97
+ getConfig(): {
98
+ enabled: boolean;
99
+ maxSize: number;
100
+ ttlMs: number;
101
+ maxImageSize: number;
102
+ };
103
+ }
104
+ /**
105
+ * Get the global image cache instance
106
+ * Creates a new instance if none exists
107
+ */
108
+ export declare function getImageCache(config?: ImageCacheConfig): ImageCache;
109
+ /**
110
+ * Reset the global image cache (useful for testing)
111
+ */
112
+ export declare function resetImageCache(): void;
113
+ /**
114
+ * Get image cache statistics from the global instance
115
+ */
116
+ export declare function getImageCacheStats(): ImageCacheStats | null;
@@ -0,0 +1,377 @@
1
+ /**
2
+ * Image Cache Utility for NeuroLink
3
+ *
4
+ * Implements an LRU cache for downloaded images to avoid redundant URL downloads.
5
+ * Addresses IMG-026: No Caching issue - same URL downloaded multiple times wasting bandwidth.
6
+ *
7
+ * Features:
8
+ * - LRU (Least Recently Used) eviction strategy
9
+ * - Configurable cache size and TTL
10
+ * - Cache hit/miss metrics
11
+ * - Content hash tracking for deduplication
12
+ *
13
+ * @module utils/imageCache
14
+ */
15
+ import { createHash } from "crypto";
16
+ import { logger } from "./logger.js";
17
+ /**
18
+ * LRU Cache for downloaded images
19
+ *
20
+ * Uses URL as primary key and tracks content hashes for deduplication.
21
+ * Implements LRU eviction and configurable TTL for memory management.
22
+ */
23
+ export class ImageCache {
24
+ cache = new Map();
25
+ contentHashIndex = new Map(); // contentHash -> url
26
+ maxSize;
27
+ ttlMs;
28
+ maxImageSize;
29
+ enabled;
30
+ stats = {
31
+ hits: 0,
32
+ misses: 0,
33
+ evictions: 0,
34
+ expirations: 0,
35
+ totalRequests: 0,
36
+ };
37
+ constructor(config = {}) {
38
+ // Parse configuration with environment variable overrides
39
+ const envEnabled = process.env.NEUROLINK_IMAGE_CACHE_ENABLED;
40
+ const envMaxSize = process.env.NEUROLINK_IMAGE_CACHE_SIZE;
41
+ const envTtlMs = process.env.NEUROLINK_IMAGE_CACHE_TTL_MS;
42
+ const envMaxImageSize = process.env.NEUROLINK_IMAGE_MAX_SIZE;
43
+ // Check if cache is enabled (default: false)
44
+ this.enabled =
45
+ envEnabled !== undefined ? envEnabled.toLowerCase() === "true" : false;
46
+ this.maxSize = this.parseConfigValue(envMaxSize, config.maxSize, 100, 1, 1000);
47
+ this.ttlMs = this.parseConfigValue(envTtlMs, config.ttlMs, 30 * 60 * 1000, // 30 minutes
48
+ 1000, // 1 second min
49
+ 24 * 60 * 60 * 1000);
50
+ this.maxImageSize = this.parseConfigValue(envMaxImageSize, config.maxImageSize, 10 * 1024 * 1024, // 10MB
51
+ 1024, // 1KB min
52
+ 100 * 1024 * 1024);
53
+ logger.debug("ImageCache initialized", {
54
+ enabled: this.enabled,
55
+ maxSize: this.maxSize,
56
+ ttlMs: this.ttlMs,
57
+ maxImageSize: this.maxImageSize,
58
+ });
59
+ }
60
+ /**
61
+ * Parse a config value with bounds checking
62
+ */
63
+ parseConfigValue(envValue, configValue, defaultValue, min, max) {
64
+ let value;
65
+ if (envValue !== undefined) {
66
+ const parsed = parseInt(envValue, 10);
67
+ value = isNaN(parsed) ? defaultValue : parsed;
68
+ }
69
+ else if (configValue !== undefined) {
70
+ value = configValue;
71
+ }
72
+ else {
73
+ value = defaultValue;
74
+ }
75
+ // Apply bounds
76
+ if (value < min) {
77
+ logger.warn(`Cache config value ${value} below minimum ${min}, using min`);
78
+ return min;
79
+ }
80
+ if (value > max) {
81
+ logger.warn(`Cache config value ${value} above maximum ${max}, using max`);
82
+ return max;
83
+ }
84
+ return value;
85
+ }
86
+ /**
87
+ * Normalize URL for consistent cache key generation
88
+ * Removes tracking parameters and normalizes the URL
89
+ */
90
+ normalizeUrl(url) {
91
+ try {
92
+ const parsed = new URL(url);
93
+ // Remove common tracking parameters that don't affect content
94
+ const trackingParams = [
95
+ "utm_source",
96
+ "utm_medium",
97
+ "utm_campaign",
98
+ "utm_term",
99
+ "utm_content",
100
+ "fbclid",
101
+ "gclid",
102
+ "_ga",
103
+ ];
104
+ trackingParams.forEach((param) => parsed.searchParams.delete(param));
105
+ return parsed.toString();
106
+ }
107
+ catch {
108
+ // If URL parsing fails, use the original URL
109
+ return url;
110
+ }
111
+ }
112
+ /**
113
+ * Generate content hash from image data
114
+ */
115
+ generateContentHash(data) {
116
+ const buffer = typeof data === "string" ? Buffer.from(data, "base64") : data;
117
+ return createHash("sha256").update(buffer).digest("hex").substring(0, 16);
118
+ }
119
+ /**
120
+ * Check if an entry is expired based on TTL
121
+ */
122
+ isExpired(entry) {
123
+ return Date.now() - entry.createdAt > this.ttlMs;
124
+ }
125
+ /**
126
+ * Check if cache is enabled
127
+ */
128
+ isEnabled() {
129
+ return this.enabled;
130
+ }
131
+ /**
132
+ * Get a cached image by URL
133
+ * Returns null if not found or expired
134
+ */
135
+ get(url) {
136
+ // Return null immediately if cache is disabled
137
+ if (!this.enabled) {
138
+ return null;
139
+ }
140
+ this.stats.totalRequests++;
141
+ const normalizedUrl = this.normalizeUrl(url);
142
+ const entry = this.cache.get(normalizedUrl);
143
+ if (!entry) {
144
+ this.stats.misses++;
145
+ logger.debug("Image cache miss", { url: normalizedUrl.substring(0, 50) });
146
+ return null;
147
+ }
148
+ // Check TTL expiration
149
+ if (this.isExpired(entry)) {
150
+ this.stats.expirations++;
151
+ this.delete(normalizedUrl);
152
+ logger.debug("Image cache entry expired", {
153
+ url: normalizedUrl.substring(0, 50),
154
+ });
155
+ return null;
156
+ }
157
+ // Update LRU info
158
+ entry.lastAccessedAt = Date.now();
159
+ entry.accessCount++;
160
+ // Move to end (most recently used) - delete and re-add
161
+ this.cache.delete(normalizedUrl);
162
+ this.cache.set(normalizedUrl, entry);
163
+ this.stats.hits++;
164
+ logger.debug("Image cache hit", {
165
+ url: normalizedUrl.substring(0, 50),
166
+ accessCount: entry.accessCount,
167
+ });
168
+ return entry;
169
+ }
170
+ /**
171
+ * Get a cached image by content hash
172
+ * Useful for deduplication when the same image is accessed via different URLs
173
+ */
174
+ getByContentHash(contentHash) {
175
+ const url = this.contentHashIndex.get(contentHash);
176
+ if (!url) {
177
+ return null;
178
+ }
179
+ return this.get(url);
180
+ }
181
+ /**
182
+ * Store an image in the cache
183
+ */
184
+ set(url, dataUri, contentType, imageData) {
185
+ // Skip caching if disabled
186
+ if (!this.enabled) {
187
+ logger.debug("Image caching disabled, skipping cache storage");
188
+ return;
189
+ }
190
+ const normalizedUrl = this.normalizeUrl(url);
191
+ const size = imageData.length;
192
+ // Skip caching if image exceeds max size
193
+ if (size > this.maxImageSize) {
194
+ logger.debug("Image too large to cache", {
195
+ url: normalizedUrl.substring(0, 50),
196
+ size,
197
+ maxSize: this.maxImageSize,
198
+ });
199
+ return;
200
+ }
201
+ // Generate content hash
202
+ const contentHash = this.generateContentHash(imageData);
203
+ // Check if same content already exists under different URL
204
+ const existingUrl = this.contentHashIndex.get(contentHash);
205
+ if (existingUrl && existingUrl !== normalizedUrl) {
206
+ // Content already cached under different URL - create a shallow copy
207
+ const existingEntry = this.cache.get(existingUrl);
208
+ if (existingEntry && !this.isExpired(existingEntry)) {
209
+ // Create a shallow copy for the new URL to avoid shared reference issues
210
+ this.cache.set(normalizedUrl, { ...existingEntry });
211
+ // Update content hash index to point to the new URL as well
212
+ this.contentHashIndex.set(contentHash, normalizedUrl);
213
+ logger.debug("Image cache dedup hit", {
214
+ newUrl: normalizedUrl.substring(0, 50),
215
+ existingUrl: existingUrl.substring(0, 50),
216
+ });
217
+ return;
218
+ }
219
+ }
220
+ // Evict if at capacity
221
+ while (this.cache.size >= this.maxSize) {
222
+ this.evictOldest();
223
+ }
224
+ const now = Date.now();
225
+ const entry = {
226
+ dataUri,
227
+ contentType,
228
+ size,
229
+ contentHash,
230
+ createdAt: now,
231
+ lastAccessedAt: now,
232
+ accessCount: 1,
233
+ };
234
+ this.cache.set(normalizedUrl, entry);
235
+ this.contentHashIndex.set(contentHash, normalizedUrl);
236
+ logger.debug("Image cached", {
237
+ url: normalizedUrl.substring(0, 50),
238
+ size,
239
+ contentHash: contentHash.substring(0, 8),
240
+ cacheSize: this.cache.size,
241
+ });
242
+ }
243
+ /**
244
+ * Delete an entry from the cache
245
+ */
246
+ delete(url) {
247
+ const normalizedUrl = this.normalizeUrl(url);
248
+ const entry = this.cache.get(normalizedUrl);
249
+ if (entry) {
250
+ // Remove from content hash index
251
+ if (this.contentHashIndex.get(entry.contentHash) === normalizedUrl) {
252
+ this.contentHashIndex.delete(entry.contentHash);
253
+ }
254
+ this.cache.delete(normalizedUrl);
255
+ return true;
256
+ }
257
+ return false;
258
+ }
259
+ /**
260
+ * Evict the oldest (least recently used) entry
261
+ */
262
+ evictOldest() {
263
+ // Map maintains insertion order, first entry is oldest
264
+ const oldestKey = this.cache.keys().next().value;
265
+ if (oldestKey !== undefined) {
266
+ const entry = this.cache.get(oldestKey);
267
+ if (entry) {
268
+ if (this.contentHashIndex.get(entry.contentHash) === oldestKey) {
269
+ this.contentHashIndex.delete(entry.contentHash);
270
+ }
271
+ }
272
+ this.cache.delete(oldestKey);
273
+ this.stats.evictions++;
274
+ logger.debug("Image cache eviction", {
275
+ url: String(oldestKey).substring(0, 50),
276
+ });
277
+ }
278
+ }
279
+ /**
280
+ * Clear all expired entries
281
+ */
282
+ evictExpired() {
283
+ let evicted = 0;
284
+ const now = Date.now();
285
+ for (const [url, entry] of this.cache.entries()) {
286
+ if (now - entry.createdAt > this.ttlMs) {
287
+ this.delete(url);
288
+ evicted++;
289
+ this.stats.expirations++;
290
+ }
291
+ }
292
+ if (evicted > 0) {
293
+ logger.debug(`Evicted ${evicted} expired image cache entries`);
294
+ }
295
+ return evicted;
296
+ }
297
+ /**
298
+ * Clear all entries from the cache
299
+ */
300
+ clear() {
301
+ const size = this.cache.size;
302
+ this.cache.clear();
303
+ this.contentHashIndex.clear();
304
+ logger.debug(`Image cache cleared (${size} entries removed)`);
305
+ }
306
+ /**
307
+ * Get cache statistics
308
+ */
309
+ getStats() {
310
+ let totalBytes = 0;
311
+ for (const entry of this.cache.values()) {
312
+ totalBytes += entry.size;
313
+ }
314
+ const hitRate = this.stats.totalRequests > 0
315
+ ? Number(((this.stats.hits / this.stats.totalRequests) * 100).toFixed(2))
316
+ : 0;
317
+ return {
318
+ ...this.stats,
319
+ size: this.cache.size,
320
+ totalBytes,
321
+ hitRate,
322
+ };
323
+ }
324
+ /**
325
+ * Check if a URL is cached and not expired
326
+ */
327
+ has(url) {
328
+ const normalizedUrl = this.normalizeUrl(url);
329
+ const entry = this.cache.get(normalizedUrl);
330
+ return entry !== null && entry !== undefined && !this.isExpired(entry);
331
+ }
332
+ /**
333
+ * Get the current cache size
334
+ */
335
+ getSize() {
336
+ return this.cache.size;
337
+ }
338
+ /**
339
+ * Get cache configuration
340
+ */
341
+ getConfig() {
342
+ return {
343
+ enabled: this.enabled,
344
+ maxSize: this.maxSize,
345
+ ttlMs: this.ttlMs,
346
+ maxImageSize: this.maxImageSize,
347
+ };
348
+ }
349
+ }
350
+ // Global image cache instance
351
+ let globalImageCache = null;
352
+ /**
353
+ * Get the global image cache instance
354
+ * Creates a new instance if none exists
355
+ */
356
+ export function getImageCache(config) {
357
+ if (!globalImageCache) {
358
+ globalImageCache = new ImageCache(config);
359
+ }
360
+ return globalImageCache;
361
+ }
362
+ /**
363
+ * Reset the global image cache (useful for testing)
364
+ */
365
+ export function resetImageCache() {
366
+ if (globalImageCache) {
367
+ globalImageCache.clear();
368
+ globalImageCache = null;
369
+ }
370
+ }
371
+ /**
372
+ * Get image cache statistics from the global instance
373
+ */
374
+ export function getImageCacheStats() {
375
+ return globalImageCache ? globalImageCache.getStats() : null;
376
+ }
377
+ //# sourceMappingURL=imageCache.js.map
@@ -138,6 +138,7 @@ export declare const imageUtils: {
138
138
  * @param options.maxAttempts - Maximum number of total attempts including initial attempt (default: 3)
139
139
  * @returns Promise<string> - Base64 data URI of the downloaded image
140
140
  * Rate-limited to 10 downloads per second to prevent DoS
141
+ * Uses LRU cache to avoid redundant downloads of the same URL
141
142
  */
142
143
  urlToBase64DataUri: (url: string, { timeoutMs, maxBytes, maxAttempts, }?: {
143
144
  timeoutMs?: number;
@@ -6,6 +6,7 @@ import { logger } from "./logger.js";
6
6
  import { urlDownloadRateLimiter } from "./rateLimiter.js";
7
7
  import { withRetry } from "./retryHandler.js";
8
8
  import { SYSTEM_LIMITS } from "../core/constants.js";
9
+ import { getImageCache } from "./imageCache.js";
9
10
  /**
10
11
  * Network error codes that should trigger a retry
11
12
  */
@@ -546,8 +547,16 @@ export const imageUtils = {
546
547
  * @param options.maxAttempts - Maximum number of total attempts including initial attempt (default: 3)
547
548
  * @returns Promise<string> - Base64 data URI of the downloaded image
548
549
  * Rate-limited to 10 downloads per second to prevent DoS
550
+ * Uses LRU cache to avoid redundant downloads of the same URL
549
551
  */
550
552
  urlToBase64DataUri: async (url, { timeoutMs = 15000, maxBytes = 10 * 1024 * 1024, maxAttempts = 3, } = {}) => {
553
+ // Check cache first
554
+ const cache = getImageCache();
555
+ const cached = cache.get(url);
556
+ if (cached) {
557
+ logger.debug("Using cached image for URL", { url: url.substring(0, 50) });
558
+ return cached.dataUri;
559
+ }
551
560
  // Apply rate limiting before download
552
561
  await urlDownloadRateLimiter.acquire();
553
562
  // Basic protocol whitelist - fail fast, no retry needed
@@ -575,8 +584,12 @@ export const imageUtils = {
575
584
  if (buffer.byteLength > maxBytes) {
576
585
  throw new Error(`Downloaded content too large: ${buffer.byteLength} bytes`);
577
586
  }
578
- const base64 = Buffer.from(buffer).toString("base64");
579
- return `data:${contentType || "image/jpeg"};base64,${base64}`;
587
+ const imageBuffer = Buffer.from(buffer);
588
+ const base64 = imageBuffer.toString("base64");
589
+ const dataUri = `data:${contentType || "image/jpeg"};base64,${base64}`;
590
+ // Store in cache for future use
591
+ cache.set(url, dataUri, contentType || "image/jpeg", imageBuffer);
592
+ return dataUri;
580
593
  }
581
594
  finally {
582
595
  clearTimeout(t);
@@ -10,6 +10,7 @@ import { FileDetector } from "./fileDetector.js";
10
10
  import { PDFProcessor, PDFImageConverter } from "./pdfProcessor.js";
11
11
  import { urlDownloadRateLimiter } from "./rateLimiter.js";
12
12
  import { request, getGlobalDispatcher, interceptors } from "undici";
13
+ import { getImageCache } from "./imageCache.js";
13
14
  import { readFileSync, existsSync } from "fs";
14
15
  /**
15
16
  * Type guard to check if an image input has alt text
@@ -625,9 +626,17 @@ function isInternetUrl(input) {
625
626
  /**
626
627
  * Download image from URL and convert to base64 data URI
627
628
  * Rate-limited to 10 downloads per second to prevent DoS
629
+ * Uses LRU cache to avoid redundant downloads of the same URL
628
630
  */
629
631
  async function downloadImageFromUrl(url) {
630
- // Apply rate limiting before download
632
+ // Check cache first (before rate limiting)
633
+ const cache = getImageCache();
634
+ const cached = cache.get(url);
635
+ if (cached) {
636
+ logger.debug("Using cached image for URL", { url: url.substring(0, 50) });
637
+ return cached.dataUri;
638
+ }
639
+ // Apply rate limiting only if cache missed
631
640
  await urlDownloadRateLimiter.acquire();
632
641
  try {
633
642
  const response = await request(url, {
@@ -659,6 +668,8 @@ async function downloadImageFromUrl(url) {
659
668
  // Convert to base64 data URI
660
669
  const base64 = buffer.toString("base64");
661
670
  const dataUri = `data:${contentType};base64,${base64}`;
671
+ // Store in cache for future use
672
+ cache.set(url, dataUri, contentType, buffer);
662
673
  return dataUri;
663
674
  }
664
675
  catch (error) {
@@ -190,3 +190,54 @@ export type EnvVarValidationResult = {
190
190
  invalidVars: string[];
191
191
  warnings: string[];
192
192
  };
193
+ /**
194
+ * Cached image entry structure for image cache
195
+ */
196
+ export type CachedImage = {
197
+ /** The image data as a base64 data URI */
198
+ dataUri: string;
199
+ /** Content type of the image (e.g., "image/jpeg") */
200
+ contentType: string;
201
+ /** Size of the image in bytes */
202
+ size: number;
203
+ /** SHA-256 hash of the image content for deduplication */
204
+ contentHash: string;
205
+ /** Timestamp when the entry was created */
206
+ createdAt: number;
207
+ /** Timestamp of last access */
208
+ lastAccessedAt: number;
209
+ /** Number of times this entry was accessed */
210
+ accessCount: number;
211
+ };
212
+ /**
213
+ * Configuration options for the image cache
214
+ */
215
+ export type ImageCacheConfig = {
216
+ /** Maximum number of entries in the cache (default: 100) */
217
+ maxSize?: number;
218
+ /** Time-to-live in milliseconds (default: 30 minutes) */
219
+ ttlMs?: number;
220
+ /** Maximum size per image in bytes (default: 10MB) */
221
+ maxImageSize?: number;
222
+ };
223
+ /**
224
+ * Cache statistics for monitoring
225
+ */
226
+ export type ImageCacheStats = {
227
+ /** Number of cache hits */
228
+ hits: number;
229
+ /** Number of cache misses */
230
+ misses: number;
231
+ /** Number of entries evicted due to size limits */
232
+ evictions: number;
233
+ /** Number of entries expired due to TTL */
234
+ expirations: number;
235
+ /** Total number of requests */
236
+ totalRequests: number;
237
+ /** Current number of entries in cache */
238
+ size: number;
239
+ /** Total size of cached images in bytes */
240
+ totalBytes: number;
241
+ /** Cache hit rate as percentage */
242
+ hitRate: number;
243
+ };
@@ -0,0 +1,116 @@
1
+ /**
2
+ * Image Cache Utility for NeuroLink
3
+ *
4
+ * Implements an LRU cache for downloaded images to avoid redundant URL downloads.
5
+ * Addresses IMG-026: No Caching issue - same URL downloaded multiple times wasting bandwidth.
6
+ *
7
+ * Features:
8
+ * - LRU (Least Recently Used) eviction strategy
9
+ * - Configurable cache size and TTL
10
+ * - Cache hit/miss metrics
11
+ * - Content hash tracking for deduplication
12
+ *
13
+ * @module utils/imageCache
14
+ */
15
+ import type { CachedImage, ImageCacheConfig, ImageCacheStats } from "../types/utilities.js";
16
+ /**
17
+ * LRU Cache for downloaded images
18
+ *
19
+ * Uses URL as primary key and tracks content hashes for deduplication.
20
+ * Implements LRU eviction and configurable TTL for memory management.
21
+ */
22
+ export declare class ImageCache {
23
+ private cache;
24
+ private contentHashIndex;
25
+ private maxSize;
26
+ private ttlMs;
27
+ private maxImageSize;
28
+ private enabled;
29
+ private stats;
30
+ constructor(config?: ImageCacheConfig);
31
+ /**
32
+ * Parse a config value with bounds checking
33
+ */
34
+ private parseConfigValue;
35
+ /**
36
+ * Normalize URL for consistent cache key generation
37
+ * Removes tracking parameters and normalizes the URL
38
+ */
39
+ private normalizeUrl;
40
+ /**
41
+ * Generate content hash from image data
42
+ */
43
+ private generateContentHash;
44
+ /**
45
+ * Check if an entry is expired based on TTL
46
+ */
47
+ private isExpired;
48
+ /**
49
+ * Check if cache is enabled
50
+ */
51
+ isEnabled(): boolean;
52
+ /**
53
+ * Get a cached image by URL
54
+ * Returns null if not found or expired
55
+ */
56
+ get(url: string): CachedImage | null;
57
+ /**
58
+ * Get a cached image by content hash
59
+ * Useful for deduplication when the same image is accessed via different URLs
60
+ */
61
+ getByContentHash(contentHash: string): CachedImage | null;
62
+ /**
63
+ * Store an image in the cache
64
+ */
65
+ set(url: string, dataUri: string, contentType: string, imageData: Buffer): void;
66
+ /**
67
+ * Delete an entry from the cache
68
+ */
69
+ delete(url: string): boolean;
70
+ /**
71
+ * Evict the oldest (least recently used) entry
72
+ */
73
+ private evictOldest;
74
+ /**
75
+ * Clear all expired entries
76
+ */
77
+ evictExpired(): number;
78
+ /**
79
+ * Clear all entries from the cache
80
+ */
81
+ clear(): void;
82
+ /**
83
+ * Get cache statistics
84
+ */
85
+ getStats(): ImageCacheStats;
86
+ /**
87
+ * Check if a URL is cached and not expired
88
+ */
89
+ has(url: string): boolean;
90
+ /**
91
+ * Get the current cache size
92
+ */
93
+ getSize(): number;
94
+ /**
95
+ * Get cache configuration
96
+ */
97
+ getConfig(): {
98
+ enabled: boolean;
99
+ maxSize: number;
100
+ ttlMs: number;
101
+ maxImageSize: number;
102
+ };
103
+ }
104
+ /**
105
+ * Get the global image cache instance
106
+ * Creates a new instance if none exists
107
+ */
108
+ export declare function getImageCache(config?: ImageCacheConfig): ImageCache;
109
+ /**
110
+ * Reset the global image cache (useful for testing)
111
+ */
112
+ export declare function resetImageCache(): void;
113
+ /**
114
+ * Get image cache statistics from the global instance
115
+ */
116
+ export declare function getImageCacheStats(): ImageCacheStats | null;
@@ -0,0 +1,376 @@
1
+ /**
2
+ * Image Cache Utility for NeuroLink
3
+ *
4
+ * Implements an LRU cache for downloaded images to avoid redundant URL downloads.
5
+ * Addresses IMG-026: No Caching issue - same URL downloaded multiple times wasting bandwidth.
6
+ *
7
+ * Features:
8
+ * - LRU (Least Recently Used) eviction strategy
9
+ * - Configurable cache size and TTL
10
+ * - Cache hit/miss metrics
11
+ * - Content hash tracking for deduplication
12
+ *
13
+ * @module utils/imageCache
14
+ */
15
+ import { createHash } from "crypto";
16
+ import { logger } from "./logger.js";
17
+ /**
18
+ * LRU Cache for downloaded images
19
+ *
20
+ * Uses URL as primary key and tracks content hashes for deduplication.
21
+ * Implements LRU eviction and configurable TTL for memory management.
22
+ */
23
+ export class ImageCache {
24
+ cache = new Map();
25
+ contentHashIndex = new Map(); // contentHash -> url
26
+ maxSize;
27
+ ttlMs;
28
+ maxImageSize;
29
+ enabled;
30
+ stats = {
31
+ hits: 0,
32
+ misses: 0,
33
+ evictions: 0,
34
+ expirations: 0,
35
+ totalRequests: 0,
36
+ };
37
+ constructor(config = {}) {
38
+ // Parse configuration with environment variable overrides
39
+ const envEnabled = process.env.NEUROLINK_IMAGE_CACHE_ENABLED;
40
+ const envMaxSize = process.env.NEUROLINK_IMAGE_CACHE_SIZE;
41
+ const envTtlMs = process.env.NEUROLINK_IMAGE_CACHE_TTL_MS;
42
+ const envMaxImageSize = process.env.NEUROLINK_IMAGE_MAX_SIZE;
43
+ // Check if cache is enabled (default: false)
44
+ this.enabled =
45
+ envEnabled !== undefined ? envEnabled.toLowerCase() === "true" : false;
46
+ this.maxSize = this.parseConfigValue(envMaxSize, config.maxSize, 100, 1, 1000);
47
+ this.ttlMs = this.parseConfigValue(envTtlMs, config.ttlMs, 30 * 60 * 1000, // 30 minutes
48
+ 1000, // 1 second min
49
+ 24 * 60 * 60 * 1000);
50
+ this.maxImageSize = this.parseConfigValue(envMaxImageSize, config.maxImageSize, 10 * 1024 * 1024, // 10MB
51
+ 1024, // 1KB min
52
+ 100 * 1024 * 1024);
53
+ logger.debug("ImageCache initialized", {
54
+ enabled: this.enabled,
55
+ maxSize: this.maxSize,
56
+ ttlMs: this.ttlMs,
57
+ maxImageSize: this.maxImageSize,
58
+ });
59
+ }
60
+ /**
61
+ * Parse a config value with bounds checking
62
+ */
63
+ parseConfigValue(envValue, configValue, defaultValue, min, max) {
64
+ let value;
65
+ if (envValue !== undefined) {
66
+ const parsed = parseInt(envValue, 10);
67
+ value = isNaN(parsed) ? defaultValue : parsed;
68
+ }
69
+ else if (configValue !== undefined) {
70
+ value = configValue;
71
+ }
72
+ else {
73
+ value = defaultValue;
74
+ }
75
+ // Apply bounds
76
+ if (value < min) {
77
+ logger.warn(`Cache config value ${value} below minimum ${min}, using min`);
78
+ return min;
79
+ }
80
+ if (value > max) {
81
+ logger.warn(`Cache config value ${value} above maximum ${max}, using max`);
82
+ return max;
83
+ }
84
+ return value;
85
+ }
86
+ /**
87
+ * Normalize URL for consistent cache key generation
88
+ * Removes tracking parameters and normalizes the URL
89
+ */
90
+ normalizeUrl(url) {
91
+ try {
92
+ const parsed = new URL(url);
93
+ // Remove common tracking parameters that don't affect content
94
+ const trackingParams = [
95
+ "utm_source",
96
+ "utm_medium",
97
+ "utm_campaign",
98
+ "utm_term",
99
+ "utm_content",
100
+ "fbclid",
101
+ "gclid",
102
+ "_ga",
103
+ ];
104
+ trackingParams.forEach((param) => parsed.searchParams.delete(param));
105
+ return parsed.toString();
106
+ }
107
+ catch {
108
+ // If URL parsing fails, use the original URL
109
+ return url;
110
+ }
111
+ }
112
+ /**
113
+ * Generate content hash from image data
114
+ */
115
+ generateContentHash(data) {
116
+ const buffer = typeof data === "string" ? Buffer.from(data, "base64") : data;
117
+ return createHash("sha256").update(buffer).digest("hex").substring(0, 16);
118
+ }
119
+ /**
120
+ * Check if an entry is expired based on TTL
121
+ */
122
+ isExpired(entry) {
123
+ return Date.now() - entry.createdAt > this.ttlMs;
124
+ }
125
+ /**
126
+ * Check if cache is enabled
127
+ */
128
+ isEnabled() {
129
+ return this.enabled;
130
+ }
131
+ /**
132
+ * Get a cached image by URL
133
+ * Returns null if not found or expired
134
+ */
135
+ get(url) {
136
+ // Return null immediately if cache is disabled
137
+ if (!this.enabled) {
138
+ return null;
139
+ }
140
+ this.stats.totalRequests++;
141
+ const normalizedUrl = this.normalizeUrl(url);
142
+ const entry = this.cache.get(normalizedUrl);
143
+ if (!entry) {
144
+ this.stats.misses++;
145
+ logger.debug("Image cache miss", { url: normalizedUrl.substring(0, 50) });
146
+ return null;
147
+ }
148
+ // Check TTL expiration
149
+ if (this.isExpired(entry)) {
150
+ this.stats.expirations++;
151
+ this.delete(normalizedUrl);
152
+ logger.debug("Image cache entry expired", {
153
+ url: normalizedUrl.substring(0, 50),
154
+ });
155
+ return null;
156
+ }
157
+ // Update LRU info
158
+ entry.lastAccessedAt = Date.now();
159
+ entry.accessCount++;
160
+ // Move to end (most recently used) - delete and re-add
161
+ this.cache.delete(normalizedUrl);
162
+ this.cache.set(normalizedUrl, entry);
163
+ this.stats.hits++;
164
+ logger.debug("Image cache hit", {
165
+ url: normalizedUrl.substring(0, 50),
166
+ accessCount: entry.accessCount,
167
+ });
168
+ return entry;
169
+ }
170
+ /**
171
+ * Get a cached image by content hash
172
+ * Useful for deduplication when the same image is accessed via different URLs
173
+ */
174
+ getByContentHash(contentHash) {
175
+ const url = this.contentHashIndex.get(contentHash);
176
+ if (!url) {
177
+ return null;
178
+ }
179
+ return this.get(url);
180
+ }
181
+ /**
182
+ * Store an image in the cache
183
+ */
184
+ set(url, dataUri, contentType, imageData) {
185
+ // Skip caching if disabled
186
+ if (!this.enabled) {
187
+ logger.debug("Image caching disabled, skipping cache storage");
188
+ return;
189
+ }
190
+ const normalizedUrl = this.normalizeUrl(url);
191
+ const size = imageData.length;
192
+ // Skip caching if image exceeds max size
193
+ if (size > this.maxImageSize) {
194
+ logger.debug("Image too large to cache", {
195
+ url: normalizedUrl.substring(0, 50),
196
+ size,
197
+ maxSize: this.maxImageSize,
198
+ });
199
+ return;
200
+ }
201
+ // Generate content hash
202
+ const contentHash = this.generateContentHash(imageData);
203
+ // Check if same content already exists under different URL
204
+ const existingUrl = this.contentHashIndex.get(contentHash);
205
+ if (existingUrl && existingUrl !== normalizedUrl) {
206
+ // Content already cached under different URL - create a shallow copy
207
+ const existingEntry = this.cache.get(existingUrl);
208
+ if (existingEntry && !this.isExpired(existingEntry)) {
209
+ // Create a shallow copy for the new URL to avoid shared reference issues
210
+ this.cache.set(normalizedUrl, { ...existingEntry });
211
+ // Update content hash index to point to the new URL as well
212
+ this.contentHashIndex.set(contentHash, normalizedUrl);
213
+ logger.debug("Image cache dedup hit", {
214
+ newUrl: normalizedUrl.substring(0, 50),
215
+ existingUrl: existingUrl.substring(0, 50),
216
+ });
217
+ return;
218
+ }
219
+ }
220
+ // Evict if at capacity
221
+ while (this.cache.size >= this.maxSize) {
222
+ this.evictOldest();
223
+ }
224
+ const now = Date.now();
225
+ const entry = {
226
+ dataUri,
227
+ contentType,
228
+ size,
229
+ contentHash,
230
+ createdAt: now,
231
+ lastAccessedAt: now,
232
+ accessCount: 1,
233
+ };
234
+ this.cache.set(normalizedUrl, entry);
235
+ this.contentHashIndex.set(contentHash, normalizedUrl);
236
+ logger.debug("Image cached", {
237
+ url: normalizedUrl.substring(0, 50),
238
+ size,
239
+ contentHash: contentHash.substring(0, 8),
240
+ cacheSize: this.cache.size,
241
+ });
242
+ }
243
+ /**
244
+ * Delete an entry from the cache
245
+ */
246
+ delete(url) {
247
+ const normalizedUrl = this.normalizeUrl(url);
248
+ const entry = this.cache.get(normalizedUrl);
249
+ if (entry) {
250
+ // Remove from content hash index
251
+ if (this.contentHashIndex.get(entry.contentHash) === normalizedUrl) {
252
+ this.contentHashIndex.delete(entry.contentHash);
253
+ }
254
+ this.cache.delete(normalizedUrl);
255
+ return true;
256
+ }
257
+ return false;
258
+ }
259
+ /**
260
+ * Evict the oldest (least recently used) entry
261
+ */
262
+ evictOldest() {
263
+ // Map maintains insertion order, first entry is oldest
264
+ const oldestKey = this.cache.keys().next().value;
265
+ if (oldestKey !== undefined) {
266
+ const entry = this.cache.get(oldestKey);
267
+ if (entry) {
268
+ if (this.contentHashIndex.get(entry.contentHash) === oldestKey) {
269
+ this.contentHashIndex.delete(entry.contentHash);
270
+ }
271
+ }
272
+ this.cache.delete(oldestKey);
273
+ this.stats.evictions++;
274
+ logger.debug("Image cache eviction", {
275
+ url: String(oldestKey).substring(0, 50),
276
+ });
277
+ }
278
+ }
279
+ /**
280
+ * Clear all expired entries
281
+ */
282
+ evictExpired() {
283
+ let evicted = 0;
284
+ const now = Date.now();
285
+ for (const [url, entry] of this.cache.entries()) {
286
+ if (now - entry.createdAt > this.ttlMs) {
287
+ this.delete(url);
288
+ evicted++;
289
+ this.stats.expirations++;
290
+ }
291
+ }
292
+ if (evicted > 0) {
293
+ logger.debug(`Evicted ${evicted} expired image cache entries`);
294
+ }
295
+ return evicted;
296
+ }
297
+ /**
298
+ * Clear all entries from the cache
299
+ */
300
+ clear() {
301
+ const size = this.cache.size;
302
+ this.cache.clear();
303
+ this.contentHashIndex.clear();
304
+ logger.debug(`Image cache cleared (${size} entries removed)`);
305
+ }
306
+ /**
307
+ * Get cache statistics
308
+ */
309
+ getStats() {
310
+ let totalBytes = 0;
311
+ for (const entry of this.cache.values()) {
312
+ totalBytes += entry.size;
313
+ }
314
+ const hitRate = this.stats.totalRequests > 0
315
+ ? Number(((this.stats.hits / this.stats.totalRequests) * 100).toFixed(2))
316
+ : 0;
317
+ return {
318
+ ...this.stats,
319
+ size: this.cache.size,
320
+ totalBytes,
321
+ hitRate,
322
+ };
323
+ }
324
+ /**
325
+ * Check if a URL is cached and not expired
326
+ */
327
+ has(url) {
328
+ const normalizedUrl = this.normalizeUrl(url);
329
+ const entry = this.cache.get(normalizedUrl);
330
+ return entry !== null && entry !== undefined && !this.isExpired(entry);
331
+ }
332
+ /**
333
+ * Get the current cache size
334
+ */
335
+ getSize() {
336
+ return this.cache.size;
337
+ }
338
+ /**
339
+ * Get cache configuration
340
+ */
341
+ getConfig() {
342
+ return {
343
+ enabled: this.enabled,
344
+ maxSize: this.maxSize,
345
+ ttlMs: this.ttlMs,
346
+ maxImageSize: this.maxImageSize,
347
+ };
348
+ }
349
+ }
350
+ // Global image cache instance
351
+ let globalImageCache = null;
352
+ /**
353
+ * Get the global image cache instance
354
+ * Creates a new instance if none exists
355
+ */
356
+ export function getImageCache(config) {
357
+ if (!globalImageCache) {
358
+ globalImageCache = new ImageCache(config);
359
+ }
360
+ return globalImageCache;
361
+ }
362
+ /**
363
+ * Reset the global image cache (useful for testing)
364
+ */
365
+ export function resetImageCache() {
366
+ if (globalImageCache) {
367
+ globalImageCache.clear();
368
+ globalImageCache = null;
369
+ }
370
+ }
371
+ /**
372
+ * Get image cache statistics from the global instance
373
+ */
374
+ export function getImageCacheStats() {
375
+ return globalImageCache ? globalImageCache.getStats() : null;
376
+ }
@@ -138,6 +138,7 @@ export declare const imageUtils: {
138
138
  * @param options.maxAttempts - Maximum number of total attempts including initial attempt (default: 3)
139
139
  * @returns Promise<string> - Base64 data URI of the downloaded image
140
140
  * Rate-limited to 10 downloads per second to prevent DoS
141
+ * Uses LRU cache to avoid redundant downloads of the same URL
141
142
  */
142
143
  urlToBase64DataUri: (url: string, { timeoutMs, maxBytes, maxAttempts, }?: {
143
144
  timeoutMs?: number;
@@ -6,6 +6,7 @@ import { logger } from "./logger.js";
6
6
  import { urlDownloadRateLimiter } from "./rateLimiter.js";
7
7
  import { withRetry } from "./retryHandler.js";
8
8
  import { SYSTEM_LIMITS } from "../core/constants.js";
9
+ import { getImageCache } from "./imageCache.js";
9
10
  /**
10
11
  * Network error codes that should trigger a retry
11
12
  */
@@ -546,8 +547,16 @@ export const imageUtils = {
546
547
  * @param options.maxAttempts - Maximum number of total attempts including initial attempt (default: 3)
547
548
  * @returns Promise<string> - Base64 data URI of the downloaded image
548
549
  * Rate-limited to 10 downloads per second to prevent DoS
550
+ * Uses LRU cache to avoid redundant downloads of the same URL
549
551
  */
550
552
  urlToBase64DataUri: async (url, { timeoutMs = 15000, maxBytes = 10 * 1024 * 1024, maxAttempts = 3, } = {}) => {
553
+ // Check cache first
554
+ const cache = getImageCache();
555
+ const cached = cache.get(url);
556
+ if (cached) {
557
+ logger.debug("Using cached image for URL", { url: url.substring(0, 50) });
558
+ return cached.dataUri;
559
+ }
551
560
  // Apply rate limiting before download
552
561
  await urlDownloadRateLimiter.acquire();
553
562
  // Basic protocol whitelist - fail fast, no retry needed
@@ -575,8 +584,12 @@ export const imageUtils = {
575
584
  if (buffer.byteLength > maxBytes) {
576
585
  throw new Error(`Downloaded content too large: ${buffer.byteLength} bytes`);
577
586
  }
578
- const base64 = Buffer.from(buffer).toString("base64");
579
- return `data:${contentType || "image/jpeg"};base64,${base64}`;
587
+ const imageBuffer = Buffer.from(buffer);
588
+ const base64 = imageBuffer.toString("base64");
589
+ const dataUri = `data:${contentType || "image/jpeg"};base64,${base64}`;
590
+ // Store in cache for future use
591
+ cache.set(url, dataUri, contentType || "image/jpeg", imageBuffer);
592
+ return dataUri;
580
593
  }
581
594
  finally {
582
595
  clearTimeout(t);
@@ -10,6 +10,7 @@ import { FileDetector } from "./fileDetector.js";
10
10
  import { PDFProcessor, PDFImageConverter } from "./pdfProcessor.js";
11
11
  import { urlDownloadRateLimiter } from "./rateLimiter.js";
12
12
  import { request, getGlobalDispatcher, interceptors } from "undici";
13
+ import { getImageCache } from "./imageCache.js";
13
14
  import { readFileSync, existsSync } from "fs";
14
15
  /**
15
16
  * Type guard to check if an image input has alt text
@@ -625,9 +626,17 @@ function isInternetUrl(input) {
625
626
  /**
626
627
  * Download image from URL and convert to base64 data URI
627
628
  * Rate-limited to 10 downloads per second to prevent DoS
629
+ * Uses LRU cache to avoid redundant downloads of the same URL
628
630
  */
629
631
  async function downloadImageFromUrl(url) {
630
- // Apply rate limiting before download
632
+ // Check cache first (before rate limiting)
633
+ const cache = getImageCache();
634
+ const cached = cache.get(url);
635
+ if (cached) {
636
+ logger.debug("Using cached image for URL", { url: url.substring(0, 50) });
637
+ return cached.dataUri;
638
+ }
639
+ // Apply rate limiting only if cache missed
631
640
  await urlDownloadRateLimiter.acquire();
632
641
  try {
633
642
  const response = await request(url, {
@@ -659,6 +668,8 @@ async function downloadImageFromUrl(url) {
659
668
  // Convert to base64 data URI
660
669
  const base64 = buffer.toString("base64");
661
670
  const dataUri = `data:${contentType};base64,${base64}`;
671
+ // Store in cache for future use
672
+ cache.set(url, dataUri, contentType, buffer);
662
673
  return dataUri;
663
674
  }
664
675
  catch (error) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@juspay/neurolink",
3
- "version": "8.41.1",
3
+ "version": "8.42.0",
4
4
  "description": "Universal AI Development Platform with working MCP integration, multi-provider support, and professional CLI. Built-in tools operational, 58+ external MCP servers discoverable. Connect to filesystem, GitHub, database operations, and more. Build, test, and deploy AI applications with 13 providers: OpenAI, Anthropic, Google AI, AWS Bedrock, Azure, Hugging Face, Ollama, and Mistral AI.",
5
5
  "author": {
6
6
  "name": "Juspay Technologies",