@juspay/neurolink 8.41.1 → 8.42.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/lib/types/utilities.d.ts +51 -0
- package/dist/lib/utils/imageCache.d.ts +116 -0
- package/dist/lib/utils/imageCache.js +377 -0
- package/dist/lib/utils/imageProcessor.d.ts +1 -0
- package/dist/lib/utils/imageProcessor.js +15 -2
- package/dist/lib/utils/messageBuilder.js +12 -1
- package/dist/types/utilities.d.ts +51 -0
- package/dist/utils/imageCache.d.ts +116 -0
- package/dist/utils/imageCache.js +376 -0
- package/dist/utils/imageProcessor.d.ts +1 -0
- package/dist/utils/imageProcessor.js +15 -2
- package/dist/utils/messageBuilder.js +12 -1
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
## [8.42.0](https://github.com/juspay/neurolink/compare/v8.41.1...v8.42.0) (2026-02-02)
|
|
2
|
+
|
|
3
|
+
### Features
|
|
4
|
+
|
|
5
|
+
- **(cache):** Implemented LRU cache for image downloads ([6562c45](https://github.com/juspay/neurolink/commit/6562c45605a6031afe41bdfcc5de0a7687127c3f))
|
|
6
|
+
|
|
1
7
|
## [8.41.1](https://github.com/juspay/neurolink/compare/v8.41.0...v8.41.1) (2026-01-31)
|
|
2
8
|
|
|
3
9
|
### Bug Fixes
|
|
@@ -190,3 +190,54 @@ export type EnvVarValidationResult = {
|
|
|
190
190
|
invalidVars: string[];
|
|
191
191
|
warnings: string[];
|
|
192
192
|
};
|
|
193
|
+
/**
|
|
194
|
+
* Cached image entry structure for image cache
|
|
195
|
+
*/
|
|
196
|
+
export type CachedImage = {
|
|
197
|
+
/** The image data as a base64 data URI */
|
|
198
|
+
dataUri: string;
|
|
199
|
+
/** Content type of the image (e.g., "image/jpeg") */
|
|
200
|
+
contentType: string;
|
|
201
|
+
/** Size of the image in bytes */
|
|
202
|
+
size: number;
|
|
203
|
+
/** SHA-256 hash of the image content for deduplication */
|
|
204
|
+
contentHash: string;
|
|
205
|
+
/** Timestamp when the entry was created */
|
|
206
|
+
createdAt: number;
|
|
207
|
+
/** Timestamp of last access */
|
|
208
|
+
lastAccessedAt: number;
|
|
209
|
+
/** Number of times this entry was accessed */
|
|
210
|
+
accessCount: number;
|
|
211
|
+
};
|
|
212
|
+
/**
|
|
213
|
+
* Configuration options for the image cache
|
|
214
|
+
*/
|
|
215
|
+
export type ImageCacheConfig = {
|
|
216
|
+
/** Maximum number of entries in the cache (default: 100) */
|
|
217
|
+
maxSize?: number;
|
|
218
|
+
/** Time-to-live in milliseconds (default: 30 minutes) */
|
|
219
|
+
ttlMs?: number;
|
|
220
|
+
/** Maximum size per image in bytes (default: 10MB) */
|
|
221
|
+
maxImageSize?: number;
|
|
222
|
+
};
|
|
223
|
+
/**
|
|
224
|
+
* Cache statistics for monitoring
|
|
225
|
+
*/
|
|
226
|
+
export type ImageCacheStats = {
|
|
227
|
+
/** Number of cache hits */
|
|
228
|
+
hits: number;
|
|
229
|
+
/** Number of cache misses */
|
|
230
|
+
misses: number;
|
|
231
|
+
/** Number of entries evicted due to size limits */
|
|
232
|
+
evictions: number;
|
|
233
|
+
/** Number of entries expired due to TTL */
|
|
234
|
+
expirations: number;
|
|
235
|
+
/** Total number of requests */
|
|
236
|
+
totalRequests: number;
|
|
237
|
+
/** Current number of entries in cache */
|
|
238
|
+
size: number;
|
|
239
|
+
/** Total size of cached images in bytes */
|
|
240
|
+
totalBytes: number;
|
|
241
|
+
/** Cache hit rate as percentage */
|
|
242
|
+
hitRate: number;
|
|
243
|
+
};
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Image Cache Utility for NeuroLink
|
|
3
|
+
*
|
|
4
|
+
* Implements an LRU cache for downloaded images to avoid redundant URL downloads.
|
|
5
|
+
* Addresses IMG-026: No Caching issue - same URL downloaded multiple times wasting bandwidth.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - LRU (Least Recently Used) eviction strategy
|
|
9
|
+
* - Configurable cache size and TTL
|
|
10
|
+
* - Cache hit/miss metrics
|
|
11
|
+
* - Content hash tracking for deduplication
|
|
12
|
+
*
|
|
13
|
+
* @module utils/imageCache
|
|
14
|
+
*/
|
|
15
|
+
import type { CachedImage, ImageCacheConfig, ImageCacheStats } from "../types/utilities.js";
|
|
16
|
+
/**
|
|
17
|
+
* LRU Cache for downloaded images
|
|
18
|
+
*
|
|
19
|
+
* Uses URL as primary key and tracks content hashes for deduplication.
|
|
20
|
+
* Implements LRU eviction and configurable TTL for memory management.
|
|
21
|
+
*/
|
|
22
|
+
export declare class ImageCache {
|
|
23
|
+
private cache;
|
|
24
|
+
private contentHashIndex;
|
|
25
|
+
private maxSize;
|
|
26
|
+
private ttlMs;
|
|
27
|
+
private maxImageSize;
|
|
28
|
+
private enabled;
|
|
29
|
+
private stats;
|
|
30
|
+
constructor(config?: ImageCacheConfig);
|
|
31
|
+
/**
|
|
32
|
+
* Parse a config value with bounds checking
|
|
33
|
+
*/
|
|
34
|
+
private parseConfigValue;
|
|
35
|
+
/**
|
|
36
|
+
* Normalize URL for consistent cache key generation
|
|
37
|
+
* Removes tracking parameters and normalizes the URL
|
|
38
|
+
*/
|
|
39
|
+
private normalizeUrl;
|
|
40
|
+
/**
|
|
41
|
+
* Generate content hash from image data
|
|
42
|
+
*/
|
|
43
|
+
private generateContentHash;
|
|
44
|
+
/**
|
|
45
|
+
* Check if an entry is expired based on TTL
|
|
46
|
+
*/
|
|
47
|
+
private isExpired;
|
|
48
|
+
/**
|
|
49
|
+
* Check if cache is enabled
|
|
50
|
+
*/
|
|
51
|
+
isEnabled(): boolean;
|
|
52
|
+
/**
|
|
53
|
+
* Get a cached image by URL
|
|
54
|
+
* Returns null if not found or expired
|
|
55
|
+
*/
|
|
56
|
+
get(url: string): CachedImage | null;
|
|
57
|
+
/**
|
|
58
|
+
* Get a cached image by content hash
|
|
59
|
+
* Useful for deduplication when the same image is accessed via different URLs
|
|
60
|
+
*/
|
|
61
|
+
getByContentHash(contentHash: string): CachedImage | null;
|
|
62
|
+
/**
|
|
63
|
+
* Store an image in the cache
|
|
64
|
+
*/
|
|
65
|
+
set(url: string, dataUri: string, contentType: string, imageData: Buffer): void;
|
|
66
|
+
/**
|
|
67
|
+
* Delete an entry from the cache
|
|
68
|
+
*/
|
|
69
|
+
delete(url: string): boolean;
|
|
70
|
+
/**
|
|
71
|
+
* Evict the oldest (least recently used) entry
|
|
72
|
+
*/
|
|
73
|
+
private evictOldest;
|
|
74
|
+
/**
|
|
75
|
+
* Clear all expired entries
|
|
76
|
+
*/
|
|
77
|
+
evictExpired(): number;
|
|
78
|
+
/**
|
|
79
|
+
* Clear all entries from the cache
|
|
80
|
+
*/
|
|
81
|
+
clear(): void;
|
|
82
|
+
/**
|
|
83
|
+
* Get cache statistics
|
|
84
|
+
*/
|
|
85
|
+
getStats(): ImageCacheStats;
|
|
86
|
+
/**
|
|
87
|
+
* Check if a URL is cached and not expired
|
|
88
|
+
*/
|
|
89
|
+
has(url: string): boolean;
|
|
90
|
+
/**
|
|
91
|
+
* Get the current cache size
|
|
92
|
+
*/
|
|
93
|
+
getSize(): number;
|
|
94
|
+
/**
|
|
95
|
+
* Get cache configuration
|
|
96
|
+
*/
|
|
97
|
+
getConfig(): {
|
|
98
|
+
enabled: boolean;
|
|
99
|
+
maxSize: number;
|
|
100
|
+
ttlMs: number;
|
|
101
|
+
maxImageSize: number;
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Get the global image cache instance
|
|
106
|
+
* Creates a new instance if none exists
|
|
107
|
+
*/
|
|
108
|
+
export declare function getImageCache(config?: ImageCacheConfig): ImageCache;
|
|
109
|
+
/**
|
|
110
|
+
* Reset the global image cache (useful for testing)
|
|
111
|
+
*/
|
|
112
|
+
export declare function resetImageCache(): void;
|
|
113
|
+
/**
|
|
114
|
+
* Get image cache statistics from the global instance
|
|
115
|
+
*/
|
|
116
|
+
export declare function getImageCacheStats(): ImageCacheStats | null;
|
|
@@ -0,0 +1,377 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Image Cache Utility for NeuroLink
|
|
3
|
+
*
|
|
4
|
+
* Implements an LRU cache for downloaded images to avoid redundant URL downloads.
|
|
5
|
+
* Addresses IMG-026: No Caching issue - same URL downloaded multiple times wasting bandwidth.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - LRU (Least Recently Used) eviction strategy
|
|
9
|
+
* - Configurable cache size and TTL
|
|
10
|
+
* - Cache hit/miss metrics
|
|
11
|
+
* - Content hash tracking for deduplication
|
|
12
|
+
*
|
|
13
|
+
* @module utils/imageCache
|
|
14
|
+
*/
|
|
15
|
+
import { createHash } from "crypto";
|
|
16
|
+
import { logger } from "./logger.js";
|
|
17
|
+
/**
|
|
18
|
+
* LRU Cache for downloaded images
|
|
19
|
+
*
|
|
20
|
+
* Uses URL as primary key and tracks content hashes for deduplication.
|
|
21
|
+
* Implements LRU eviction and configurable TTL for memory management.
|
|
22
|
+
*/
|
|
23
|
+
export class ImageCache {
|
|
24
|
+
cache = new Map();
|
|
25
|
+
contentHashIndex = new Map(); // contentHash -> url
|
|
26
|
+
maxSize;
|
|
27
|
+
ttlMs;
|
|
28
|
+
maxImageSize;
|
|
29
|
+
enabled;
|
|
30
|
+
stats = {
|
|
31
|
+
hits: 0,
|
|
32
|
+
misses: 0,
|
|
33
|
+
evictions: 0,
|
|
34
|
+
expirations: 0,
|
|
35
|
+
totalRequests: 0,
|
|
36
|
+
};
|
|
37
|
+
constructor(config = {}) {
|
|
38
|
+
// Parse configuration with environment variable overrides
|
|
39
|
+
const envEnabled = process.env.NEUROLINK_IMAGE_CACHE_ENABLED;
|
|
40
|
+
const envMaxSize = process.env.NEUROLINK_IMAGE_CACHE_SIZE;
|
|
41
|
+
const envTtlMs = process.env.NEUROLINK_IMAGE_CACHE_TTL_MS;
|
|
42
|
+
const envMaxImageSize = process.env.NEUROLINK_IMAGE_MAX_SIZE;
|
|
43
|
+
// Check if cache is enabled (default: false)
|
|
44
|
+
this.enabled =
|
|
45
|
+
envEnabled !== undefined ? envEnabled.toLowerCase() === "true" : false;
|
|
46
|
+
this.maxSize = this.parseConfigValue(envMaxSize, config.maxSize, 100, 1, 1000);
|
|
47
|
+
this.ttlMs = this.parseConfigValue(envTtlMs, config.ttlMs, 30 * 60 * 1000, // 30 minutes
|
|
48
|
+
1000, // 1 second min
|
|
49
|
+
24 * 60 * 60 * 1000);
|
|
50
|
+
this.maxImageSize = this.parseConfigValue(envMaxImageSize, config.maxImageSize, 10 * 1024 * 1024, // 10MB
|
|
51
|
+
1024, // 1KB min
|
|
52
|
+
100 * 1024 * 1024);
|
|
53
|
+
logger.debug("ImageCache initialized", {
|
|
54
|
+
enabled: this.enabled,
|
|
55
|
+
maxSize: this.maxSize,
|
|
56
|
+
ttlMs: this.ttlMs,
|
|
57
|
+
maxImageSize: this.maxImageSize,
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Parse a config value with bounds checking
|
|
62
|
+
*/
|
|
63
|
+
parseConfigValue(envValue, configValue, defaultValue, min, max) {
|
|
64
|
+
let value;
|
|
65
|
+
if (envValue !== undefined) {
|
|
66
|
+
const parsed = parseInt(envValue, 10);
|
|
67
|
+
value = isNaN(parsed) ? defaultValue : parsed;
|
|
68
|
+
}
|
|
69
|
+
else if (configValue !== undefined) {
|
|
70
|
+
value = configValue;
|
|
71
|
+
}
|
|
72
|
+
else {
|
|
73
|
+
value = defaultValue;
|
|
74
|
+
}
|
|
75
|
+
// Apply bounds
|
|
76
|
+
if (value < min) {
|
|
77
|
+
logger.warn(`Cache config value ${value} below minimum ${min}, using min`);
|
|
78
|
+
return min;
|
|
79
|
+
}
|
|
80
|
+
if (value > max) {
|
|
81
|
+
logger.warn(`Cache config value ${value} above maximum ${max}, using max`);
|
|
82
|
+
return max;
|
|
83
|
+
}
|
|
84
|
+
return value;
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Normalize URL for consistent cache key generation
|
|
88
|
+
* Removes tracking parameters and normalizes the URL
|
|
89
|
+
*/
|
|
90
|
+
normalizeUrl(url) {
|
|
91
|
+
try {
|
|
92
|
+
const parsed = new URL(url);
|
|
93
|
+
// Remove common tracking parameters that don't affect content
|
|
94
|
+
const trackingParams = [
|
|
95
|
+
"utm_source",
|
|
96
|
+
"utm_medium",
|
|
97
|
+
"utm_campaign",
|
|
98
|
+
"utm_term",
|
|
99
|
+
"utm_content",
|
|
100
|
+
"fbclid",
|
|
101
|
+
"gclid",
|
|
102
|
+
"_ga",
|
|
103
|
+
];
|
|
104
|
+
trackingParams.forEach((param) => parsed.searchParams.delete(param));
|
|
105
|
+
return parsed.toString();
|
|
106
|
+
}
|
|
107
|
+
catch {
|
|
108
|
+
// If URL parsing fails, use the original URL
|
|
109
|
+
return url;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Generate content hash from image data
|
|
114
|
+
*/
|
|
115
|
+
generateContentHash(data) {
|
|
116
|
+
const buffer = typeof data === "string" ? Buffer.from(data, "base64") : data;
|
|
117
|
+
return createHash("sha256").update(buffer).digest("hex").substring(0, 16);
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Check if an entry is expired based on TTL
|
|
121
|
+
*/
|
|
122
|
+
isExpired(entry) {
|
|
123
|
+
return Date.now() - entry.createdAt > this.ttlMs;
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Check if cache is enabled
|
|
127
|
+
*/
|
|
128
|
+
isEnabled() {
|
|
129
|
+
return this.enabled;
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* Get a cached image by URL
|
|
133
|
+
* Returns null if not found or expired
|
|
134
|
+
*/
|
|
135
|
+
get(url) {
|
|
136
|
+
// Return null immediately if cache is disabled
|
|
137
|
+
if (!this.enabled) {
|
|
138
|
+
return null;
|
|
139
|
+
}
|
|
140
|
+
this.stats.totalRequests++;
|
|
141
|
+
const normalizedUrl = this.normalizeUrl(url);
|
|
142
|
+
const entry = this.cache.get(normalizedUrl);
|
|
143
|
+
if (!entry) {
|
|
144
|
+
this.stats.misses++;
|
|
145
|
+
logger.debug("Image cache miss", { url: normalizedUrl.substring(0, 50) });
|
|
146
|
+
return null;
|
|
147
|
+
}
|
|
148
|
+
// Check TTL expiration
|
|
149
|
+
if (this.isExpired(entry)) {
|
|
150
|
+
this.stats.expirations++;
|
|
151
|
+
this.delete(normalizedUrl);
|
|
152
|
+
logger.debug("Image cache entry expired", {
|
|
153
|
+
url: normalizedUrl.substring(0, 50),
|
|
154
|
+
});
|
|
155
|
+
return null;
|
|
156
|
+
}
|
|
157
|
+
// Update LRU info
|
|
158
|
+
entry.lastAccessedAt = Date.now();
|
|
159
|
+
entry.accessCount++;
|
|
160
|
+
// Move to end (most recently used) - delete and re-add
|
|
161
|
+
this.cache.delete(normalizedUrl);
|
|
162
|
+
this.cache.set(normalizedUrl, entry);
|
|
163
|
+
this.stats.hits++;
|
|
164
|
+
logger.debug("Image cache hit", {
|
|
165
|
+
url: normalizedUrl.substring(0, 50),
|
|
166
|
+
accessCount: entry.accessCount,
|
|
167
|
+
});
|
|
168
|
+
return entry;
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* Get a cached image by content hash
|
|
172
|
+
* Useful for deduplication when the same image is accessed via different URLs
|
|
173
|
+
*/
|
|
174
|
+
getByContentHash(contentHash) {
|
|
175
|
+
const url = this.contentHashIndex.get(contentHash);
|
|
176
|
+
if (!url) {
|
|
177
|
+
return null;
|
|
178
|
+
}
|
|
179
|
+
return this.get(url);
|
|
180
|
+
}
|
|
181
|
+
/**
|
|
182
|
+
* Store an image in the cache
|
|
183
|
+
*/
|
|
184
|
+
set(url, dataUri, contentType, imageData) {
|
|
185
|
+
// Skip caching if disabled
|
|
186
|
+
if (!this.enabled) {
|
|
187
|
+
logger.debug("Image caching disabled, skipping cache storage");
|
|
188
|
+
return;
|
|
189
|
+
}
|
|
190
|
+
const normalizedUrl = this.normalizeUrl(url);
|
|
191
|
+
const size = imageData.length;
|
|
192
|
+
// Skip caching if image exceeds max size
|
|
193
|
+
if (size > this.maxImageSize) {
|
|
194
|
+
logger.debug("Image too large to cache", {
|
|
195
|
+
url: normalizedUrl.substring(0, 50),
|
|
196
|
+
size,
|
|
197
|
+
maxSize: this.maxImageSize,
|
|
198
|
+
});
|
|
199
|
+
return;
|
|
200
|
+
}
|
|
201
|
+
// Generate content hash
|
|
202
|
+
const contentHash = this.generateContentHash(imageData);
|
|
203
|
+
// Check if same content already exists under different URL
|
|
204
|
+
const existingUrl = this.contentHashIndex.get(contentHash);
|
|
205
|
+
if (existingUrl && existingUrl !== normalizedUrl) {
|
|
206
|
+
// Content already cached under different URL - create a shallow copy
|
|
207
|
+
const existingEntry = this.cache.get(existingUrl);
|
|
208
|
+
if (existingEntry && !this.isExpired(existingEntry)) {
|
|
209
|
+
// Create a shallow copy for the new URL to avoid shared reference issues
|
|
210
|
+
this.cache.set(normalizedUrl, { ...existingEntry });
|
|
211
|
+
// Update content hash index to point to the new URL as well
|
|
212
|
+
this.contentHashIndex.set(contentHash, normalizedUrl);
|
|
213
|
+
logger.debug("Image cache dedup hit", {
|
|
214
|
+
newUrl: normalizedUrl.substring(0, 50),
|
|
215
|
+
existingUrl: existingUrl.substring(0, 50),
|
|
216
|
+
});
|
|
217
|
+
return;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
// Evict if at capacity
|
|
221
|
+
while (this.cache.size >= this.maxSize) {
|
|
222
|
+
this.evictOldest();
|
|
223
|
+
}
|
|
224
|
+
const now = Date.now();
|
|
225
|
+
const entry = {
|
|
226
|
+
dataUri,
|
|
227
|
+
contentType,
|
|
228
|
+
size,
|
|
229
|
+
contentHash,
|
|
230
|
+
createdAt: now,
|
|
231
|
+
lastAccessedAt: now,
|
|
232
|
+
accessCount: 1,
|
|
233
|
+
};
|
|
234
|
+
this.cache.set(normalizedUrl, entry);
|
|
235
|
+
this.contentHashIndex.set(contentHash, normalizedUrl);
|
|
236
|
+
logger.debug("Image cached", {
|
|
237
|
+
url: normalizedUrl.substring(0, 50),
|
|
238
|
+
size,
|
|
239
|
+
contentHash: contentHash.substring(0, 8),
|
|
240
|
+
cacheSize: this.cache.size,
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
/**
|
|
244
|
+
* Delete an entry from the cache
|
|
245
|
+
*/
|
|
246
|
+
delete(url) {
|
|
247
|
+
const normalizedUrl = this.normalizeUrl(url);
|
|
248
|
+
const entry = this.cache.get(normalizedUrl);
|
|
249
|
+
if (entry) {
|
|
250
|
+
// Remove from content hash index
|
|
251
|
+
if (this.contentHashIndex.get(entry.contentHash) === normalizedUrl) {
|
|
252
|
+
this.contentHashIndex.delete(entry.contentHash);
|
|
253
|
+
}
|
|
254
|
+
this.cache.delete(normalizedUrl);
|
|
255
|
+
return true;
|
|
256
|
+
}
|
|
257
|
+
return false;
|
|
258
|
+
}
|
|
259
|
+
/**
|
|
260
|
+
* Evict the oldest (least recently used) entry
|
|
261
|
+
*/
|
|
262
|
+
evictOldest() {
|
|
263
|
+
// Map maintains insertion order, first entry is oldest
|
|
264
|
+
const oldestKey = this.cache.keys().next().value;
|
|
265
|
+
if (oldestKey !== undefined) {
|
|
266
|
+
const entry = this.cache.get(oldestKey);
|
|
267
|
+
if (entry) {
|
|
268
|
+
if (this.contentHashIndex.get(entry.contentHash) === oldestKey) {
|
|
269
|
+
this.contentHashIndex.delete(entry.contentHash);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
this.cache.delete(oldestKey);
|
|
273
|
+
this.stats.evictions++;
|
|
274
|
+
logger.debug("Image cache eviction", {
|
|
275
|
+
url: String(oldestKey).substring(0, 50),
|
|
276
|
+
});
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
/**
|
|
280
|
+
* Clear all expired entries
|
|
281
|
+
*/
|
|
282
|
+
evictExpired() {
|
|
283
|
+
let evicted = 0;
|
|
284
|
+
const now = Date.now();
|
|
285
|
+
for (const [url, entry] of this.cache.entries()) {
|
|
286
|
+
if (now - entry.createdAt > this.ttlMs) {
|
|
287
|
+
this.delete(url);
|
|
288
|
+
evicted++;
|
|
289
|
+
this.stats.expirations++;
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
if (evicted > 0) {
|
|
293
|
+
logger.debug(`Evicted ${evicted} expired image cache entries`);
|
|
294
|
+
}
|
|
295
|
+
return evicted;
|
|
296
|
+
}
|
|
297
|
+
/**
|
|
298
|
+
* Clear all entries from the cache
|
|
299
|
+
*/
|
|
300
|
+
clear() {
|
|
301
|
+
const size = this.cache.size;
|
|
302
|
+
this.cache.clear();
|
|
303
|
+
this.contentHashIndex.clear();
|
|
304
|
+
logger.debug(`Image cache cleared (${size} entries removed)`);
|
|
305
|
+
}
|
|
306
|
+
/**
|
|
307
|
+
* Get cache statistics
|
|
308
|
+
*/
|
|
309
|
+
getStats() {
|
|
310
|
+
let totalBytes = 0;
|
|
311
|
+
for (const entry of this.cache.values()) {
|
|
312
|
+
totalBytes += entry.size;
|
|
313
|
+
}
|
|
314
|
+
const hitRate = this.stats.totalRequests > 0
|
|
315
|
+
? Number(((this.stats.hits / this.stats.totalRequests) * 100).toFixed(2))
|
|
316
|
+
: 0;
|
|
317
|
+
return {
|
|
318
|
+
...this.stats,
|
|
319
|
+
size: this.cache.size,
|
|
320
|
+
totalBytes,
|
|
321
|
+
hitRate,
|
|
322
|
+
};
|
|
323
|
+
}
|
|
324
|
+
/**
|
|
325
|
+
* Check if a URL is cached and not expired
|
|
326
|
+
*/
|
|
327
|
+
has(url) {
|
|
328
|
+
const normalizedUrl = this.normalizeUrl(url);
|
|
329
|
+
const entry = this.cache.get(normalizedUrl);
|
|
330
|
+
return entry !== null && entry !== undefined && !this.isExpired(entry);
|
|
331
|
+
}
|
|
332
|
+
/**
|
|
333
|
+
* Get the current cache size
|
|
334
|
+
*/
|
|
335
|
+
getSize() {
|
|
336
|
+
return this.cache.size;
|
|
337
|
+
}
|
|
338
|
+
/**
|
|
339
|
+
* Get cache configuration
|
|
340
|
+
*/
|
|
341
|
+
getConfig() {
|
|
342
|
+
return {
|
|
343
|
+
enabled: this.enabled,
|
|
344
|
+
maxSize: this.maxSize,
|
|
345
|
+
ttlMs: this.ttlMs,
|
|
346
|
+
maxImageSize: this.maxImageSize,
|
|
347
|
+
};
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
// Global image cache instance
|
|
351
|
+
let globalImageCache = null;
|
|
352
|
+
/**
|
|
353
|
+
* Get the global image cache instance
|
|
354
|
+
* Creates a new instance if none exists
|
|
355
|
+
*/
|
|
356
|
+
export function getImageCache(config) {
|
|
357
|
+
if (!globalImageCache) {
|
|
358
|
+
globalImageCache = new ImageCache(config);
|
|
359
|
+
}
|
|
360
|
+
return globalImageCache;
|
|
361
|
+
}
|
|
362
|
+
/**
|
|
363
|
+
* Reset the global image cache (useful for testing)
|
|
364
|
+
*/
|
|
365
|
+
export function resetImageCache() {
|
|
366
|
+
if (globalImageCache) {
|
|
367
|
+
globalImageCache.clear();
|
|
368
|
+
globalImageCache = null;
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
/**
|
|
372
|
+
* Get image cache statistics from the global instance
|
|
373
|
+
*/
|
|
374
|
+
export function getImageCacheStats() {
|
|
375
|
+
return globalImageCache ? globalImageCache.getStats() : null;
|
|
376
|
+
}
|
|
377
|
+
//# sourceMappingURL=imageCache.js.map
|
|
@@ -138,6 +138,7 @@ export declare const imageUtils: {
|
|
|
138
138
|
* @param options.maxAttempts - Maximum number of total attempts including initial attempt (default: 3)
|
|
139
139
|
* @returns Promise<string> - Base64 data URI of the downloaded image
|
|
140
140
|
* Rate-limited to 10 downloads per second to prevent DoS
|
|
141
|
+
* Uses LRU cache to avoid redundant downloads of the same URL
|
|
141
142
|
*/
|
|
142
143
|
urlToBase64DataUri: (url: string, { timeoutMs, maxBytes, maxAttempts, }?: {
|
|
143
144
|
timeoutMs?: number;
|
|
@@ -6,6 +6,7 @@ import { logger } from "./logger.js";
|
|
|
6
6
|
import { urlDownloadRateLimiter } from "./rateLimiter.js";
|
|
7
7
|
import { withRetry } from "./retryHandler.js";
|
|
8
8
|
import { SYSTEM_LIMITS } from "../core/constants.js";
|
|
9
|
+
import { getImageCache } from "./imageCache.js";
|
|
9
10
|
/**
|
|
10
11
|
* Network error codes that should trigger a retry
|
|
11
12
|
*/
|
|
@@ -546,8 +547,16 @@ export const imageUtils = {
|
|
|
546
547
|
* @param options.maxAttempts - Maximum number of total attempts including initial attempt (default: 3)
|
|
547
548
|
* @returns Promise<string> - Base64 data URI of the downloaded image
|
|
548
549
|
* Rate-limited to 10 downloads per second to prevent DoS
|
|
550
|
+
* Uses LRU cache to avoid redundant downloads of the same URL
|
|
549
551
|
*/
|
|
550
552
|
urlToBase64DataUri: async (url, { timeoutMs = 15000, maxBytes = 10 * 1024 * 1024, maxAttempts = 3, } = {}) => {
|
|
553
|
+
// Check cache first
|
|
554
|
+
const cache = getImageCache();
|
|
555
|
+
const cached = cache.get(url);
|
|
556
|
+
if (cached) {
|
|
557
|
+
logger.debug("Using cached image for URL", { url: url.substring(0, 50) });
|
|
558
|
+
return cached.dataUri;
|
|
559
|
+
}
|
|
551
560
|
// Apply rate limiting before download
|
|
552
561
|
await urlDownloadRateLimiter.acquire();
|
|
553
562
|
// Basic protocol whitelist - fail fast, no retry needed
|
|
@@ -575,8 +584,12 @@ export const imageUtils = {
|
|
|
575
584
|
if (buffer.byteLength > maxBytes) {
|
|
576
585
|
throw new Error(`Downloaded content too large: ${buffer.byteLength} bytes`);
|
|
577
586
|
}
|
|
578
|
-
const
|
|
579
|
-
|
|
587
|
+
const imageBuffer = Buffer.from(buffer);
|
|
588
|
+
const base64 = imageBuffer.toString("base64");
|
|
589
|
+
const dataUri = `data:${contentType || "image/jpeg"};base64,${base64}`;
|
|
590
|
+
// Store in cache for future use
|
|
591
|
+
cache.set(url, dataUri, contentType || "image/jpeg", imageBuffer);
|
|
592
|
+
return dataUri;
|
|
580
593
|
}
|
|
581
594
|
finally {
|
|
582
595
|
clearTimeout(t);
|
|
@@ -10,6 +10,7 @@ import { FileDetector } from "./fileDetector.js";
|
|
|
10
10
|
import { PDFProcessor, PDFImageConverter } from "./pdfProcessor.js";
|
|
11
11
|
import { urlDownloadRateLimiter } from "./rateLimiter.js";
|
|
12
12
|
import { request, getGlobalDispatcher, interceptors } from "undici";
|
|
13
|
+
import { getImageCache } from "./imageCache.js";
|
|
13
14
|
import { readFileSync, existsSync } from "fs";
|
|
14
15
|
/**
|
|
15
16
|
* Type guard to check if an image input has alt text
|
|
@@ -625,9 +626,17 @@ function isInternetUrl(input) {
|
|
|
625
626
|
/**
|
|
626
627
|
* Download image from URL and convert to base64 data URI
|
|
627
628
|
* Rate-limited to 10 downloads per second to prevent DoS
|
|
629
|
+
* Uses LRU cache to avoid redundant downloads of the same URL
|
|
628
630
|
*/
|
|
629
631
|
async function downloadImageFromUrl(url) {
|
|
630
|
-
//
|
|
632
|
+
// Check cache first (before rate limiting)
|
|
633
|
+
const cache = getImageCache();
|
|
634
|
+
const cached = cache.get(url);
|
|
635
|
+
if (cached) {
|
|
636
|
+
logger.debug("Using cached image for URL", { url: url.substring(0, 50) });
|
|
637
|
+
return cached.dataUri;
|
|
638
|
+
}
|
|
639
|
+
// Apply rate limiting only if cache missed
|
|
631
640
|
await urlDownloadRateLimiter.acquire();
|
|
632
641
|
try {
|
|
633
642
|
const response = await request(url, {
|
|
@@ -659,6 +668,8 @@ async function downloadImageFromUrl(url) {
|
|
|
659
668
|
// Convert to base64 data URI
|
|
660
669
|
const base64 = buffer.toString("base64");
|
|
661
670
|
const dataUri = `data:${contentType};base64,${base64}`;
|
|
671
|
+
// Store in cache for future use
|
|
672
|
+
cache.set(url, dataUri, contentType, buffer);
|
|
662
673
|
return dataUri;
|
|
663
674
|
}
|
|
664
675
|
catch (error) {
|
|
@@ -190,3 +190,54 @@ export type EnvVarValidationResult = {
|
|
|
190
190
|
invalidVars: string[];
|
|
191
191
|
warnings: string[];
|
|
192
192
|
};
|
|
193
|
+
/**
|
|
194
|
+
* Cached image entry structure for image cache
|
|
195
|
+
*/
|
|
196
|
+
export type CachedImage = {
|
|
197
|
+
/** The image data as a base64 data URI */
|
|
198
|
+
dataUri: string;
|
|
199
|
+
/** Content type of the image (e.g., "image/jpeg") */
|
|
200
|
+
contentType: string;
|
|
201
|
+
/** Size of the image in bytes */
|
|
202
|
+
size: number;
|
|
203
|
+
/** SHA-256 hash of the image content for deduplication */
|
|
204
|
+
contentHash: string;
|
|
205
|
+
/** Timestamp when the entry was created */
|
|
206
|
+
createdAt: number;
|
|
207
|
+
/** Timestamp of last access */
|
|
208
|
+
lastAccessedAt: number;
|
|
209
|
+
/** Number of times this entry was accessed */
|
|
210
|
+
accessCount: number;
|
|
211
|
+
};
|
|
212
|
+
/**
|
|
213
|
+
* Configuration options for the image cache
|
|
214
|
+
*/
|
|
215
|
+
export type ImageCacheConfig = {
|
|
216
|
+
/** Maximum number of entries in the cache (default: 100) */
|
|
217
|
+
maxSize?: number;
|
|
218
|
+
/** Time-to-live in milliseconds (default: 30 minutes) */
|
|
219
|
+
ttlMs?: number;
|
|
220
|
+
/** Maximum size per image in bytes (default: 10MB) */
|
|
221
|
+
maxImageSize?: number;
|
|
222
|
+
};
|
|
223
|
+
/**
|
|
224
|
+
* Cache statistics for monitoring
|
|
225
|
+
*/
|
|
226
|
+
export type ImageCacheStats = {
|
|
227
|
+
/** Number of cache hits */
|
|
228
|
+
hits: number;
|
|
229
|
+
/** Number of cache misses */
|
|
230
|
+
misses: number;
|
|
231
|
+
/** Number of entries evicted due to size limits */
|
|
232
|
+
evictions: number;
|
|
233
|
+
/** Number of entries expired due to TTL */
|
|
234
|
+
expirations: number;
|
|
235
|
+
/** Total number of requests */
|
|
236
|
+
totalRequests: number;
|
|
237
|
+
/** Current number of entries in cache */
|
|
238
|
+
size: number;
|
|
239
|
+
/** Total size of cached images in bytes */
|
|
240
|
+
totalBytes: number;
|
|
241
|
+
/** Cache hit rate as percentage */
|
|
242
|
+
hitRate: number;
|
|
243
|
+
};
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Image Cache Utility for NeuroLink
|
|
3
|
+
*
|
|
4
|
+
* Implements an LRU cache for downloaded images to avoid redundant URL downloads.
|
|
5
|
+
* Addresses IMG-026: No Caching issue - same URL downloaded multiple times wasting bandwidth.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - LRU (Least Recently Used) eviction strategy
|
|
9
|
+
* - Configurable cache size and TTL
|
|
10
|
+
* - Cache hit/miss metrics
|
|
11
|
+
* - Content hash tracking for deduplication
|
|
12
|
+
*
|
|
13
|
+
* @module utils/imageCache
|
|
14
|
+
*/
|
|
15
|
+
import type { CachedImage, ImageCacheConfig, ImageCacheStats } from "../types/utilities.js";
|
|
16
|
+
/**
|
|
17
|
+
* LRU Cache for downloaded images
|
|
18
|
+
*
|
|
19
|
+
* Uses URL as primary key and tracks content hashes for deduplication.
|
|
20
|
+
* Implements LRU eviction and configurable TTL for memory management.
|
|
21
|
+
*/
|
|
22
|
+
export declare class ImageCache {
|
|
23
|
+
private cache;
|
|
24
|
+
private contentHashIndex;
|
|
25
|
+
private maxSize;
|
|
26
|
+
private ttlMs;
|
|
27
|
+
private maxImageSize;
|
|
28
|
+
private enabled;
|
|
29
|
+
private stats;
|
|
30
|
+
constructor(config?: ImageCacheConfig);
|
|
31
|
+
/**
|
|
32
|
+
* Parse a config value with bounds checking
|
|
33
|
+
*/
|
|
34
|
+
private parseConfigValue;
|
|
35
|
+
/**
|
|
36
|
+
* Normalize URL for consistent cache key generation
|
|
37
|
+
* Removes tracking parameters and normalizes the URL
|
|
38
|
+
*/
|
|
39
|
+
private normalizeUrl;
|
|
40
|
+
/**
|
|
41
|
+
* Generate content hash from image data
|
|
42
|
+
*/
|
|
43
|
+
private generateContentHash;
|
|
44
|
+
/**
|
|
45
|
+
* Check if an entry is expired based on TTL
|
|
46
|
+
*/
|
|
47
|
+
private isExpired;
|
|
48
|
+
/**
|
|
49
|
+
* Check if cache is enabled
|
|
50
|
+
*/
|
|
51
|
+
isEnabled(): boolean;
|
|
52
|
+
/**
|
|
53
|
+
* Get a cached image by URL
|
|
54
|
+
* Returns null if not found or expired
|
|
55
|
+
*/
|
|
56
|
+
get(url: string): CachedImage | null;
|
|
57
|
+
/**
|
|
58
|
+
* Get a cached image by content hash
|
|
59
|
+
* Useful for deduplication when the same image is accessed via different URLs
|
|
60
|
+
*/
|
|
61
|
+
getByContentHash(contentHash: string): CachedImage | null;
|
|
62
|
+
/**
|
|
63
|
+
* Store an image in the cache
|
|
64
|
+
*/
|
|
65
|
+
set(url: string, dataUri: string, contentType: string, imageData: Buffer): void;
|
|
66
|
+
/**
|
|
67
|
+
* Delete an entry from the cache
|
|
68
|
+
*/
|
|
69
|
+
delete(url: string): boolean;
|
|
70
|
+
/**
|
|
71
|
+
* Evict the oldest (least recently used) entry
|
|
72
|
+
*/
|
|
73
|
+
private evictOldest;
|
|
74
|
+
/**
|
|
75
|
+
* Clear all expired entries
|
|
76
|
+
*/
|
|
77
|
+
evictExpired(): number;
|
|
78
|
+
/**
|
|
79
|
+
* Clear all entries from the cache
|
|
80
|
+
*/
|
|
81
|
+
clear(): void;
|
|
82
|
+
/**
|
|
83
|
+
* Get cache statistics
|
|
84
|
+
*/
|
|
85
|
+
getStats(): ImageCacheStats;
|
|
86
|
+
/**
|
|
87
|
+
* Check if a URL is cached and not expired
|
|
88
|
+
*/
|
|
89
|
+
has(url: string): boolean;
|
|
90
|
+
/**
|
|
91
|
+
* Get the current cache size
|
|
92
|
+
*/
|
|
93
|
+
getSize(): number;
|
|
94
|
+
/**
|
|
95
|
+
* Get cache configuration
|
|
96
|
+
*/
|
|
97
|
+
getConfig(): {
|
|
98
|
+
enabled: boolean;
|
|
99
|
+
maxSize: number;
|
|
100
|
+
ttlMs: number;
|
|
101
|
+
maxImageSize: number;
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Get the global image cache instance
|
|
106
|
+
* Creates a new instance if none exists
|
|
107
|
+
*/
|
|
108
|
+
export declare function getImageCache(config?: ImageCacheConfig): ImageCache;
|
|
109
|
+
/**
|
|
110
|
+
* Reset the global image cache (useful for testing)
|
|
111
|
+
*/
|
|
112
|
+
export declare function resetImageCache(): void;
|
|
113
|
+
/**
|
|
114
|
+
* Get image cache statistics from the global instance
|
|
115
|
+
*/
|
|
116
|
+
export declare function getImageCacheStats(): ImageCacheStats | null;
|
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Image Cache Utility for NeuroLink
|
|
3
|
+
*
|
|
4
|
+
* Implements an LRU cache for downloaded images to avoid redundant URL downloads.
|
|
5
|
+
* Addresses IMG-026: No Caching issue - same URL downloaded multiple times wasting bandwidth.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - LRU (Least Recently Used) eviction strategy
|
|
9
|
+
* - Configurable cache size and TTL
|
|
10
|
+
* - Cache hit/miss metrics
|
|
11
|
+
* - Content hash tracking for deduplication
|
|
12
|
+
*
|
|
13
|
+
* @module utils/imageCache
|
|
14
|
+
*/
|
|
15
|
+
import { createHash } from "crypto";
|
|
16
|
+
import { logger } from "./logger.js";
|
|
17
|
+
/**
|
|
18
|
+
* LRU Cache for downloaded images
|
|
19
|
+
*
|
|
20
|
+
* Uses URL as primary key and tracks content hashes for deduplication.
|
|
21
|
+
* Implements LRU eviction and configurable TTL for memory management.
|
|
22
|
+
*/
|
|
23
|
+
export class ImageCache {
|
|
24
|
+
cache = new Map();
|
|
25
|
+
contentHashIndex = new Map(); // contentHash -> url
|
|
26
|
+
maxSize;
|
|
27
|
+
ttlMs;
|
|
28
|
+
maxImageSize;
|
|
29
|
+
enabled;
|
|
30
|
+
stats = {
|
|
31
|
+
hits: 0,
|
|
32
|
+
misses: 0,
|
|
33
|
+
evictions: 0,
|
|
34
|
+
expirations: 0,
|
|
35
|
+
totalRequests: 0,
|
|
36
|
+
};
|
|
37
|
+
constructor(config = {}) {
|
|
38
|
+
// Parse configuration with environment variable overrides
|
|
39
|
+
const envEnabled = process.env.NEUROLINK_IMAGE_CACHE_ENABLED;
|
|
40
|
+
const envMaxSize = process.env.NEUROLINK_IMAGE_CACHE_SIZE;
|
|
41
|
+
const envTtlMs = process.env.NEUROLINK_IMAGE_CACHE_TTL_MS;
|
|
42
|
+
const envMaxImageSize = process.env.NEUROLINK_IMAGE_MAX_SIZE;
|
|
43
|
+
// Check if cache is enabled (default: false)
|
|
44
|
+
this.enabled =
|
|
45
|
+
envEnabled !== undefined ? envEnabled.toLowerCase() === "true" : false;
|
|
46
|
+
this.maxSize = this.parseConfigValue(envMaxSize, config.maxSize, 100, 1, 1000);
|
|
47
|
+
this.ttlMs = this.parseConfigValue(envTtlMs, config.ttlMs, 30 * 60 * 1000, // 30 minutes
|
|
48
|
+
1000, // 1 second min
|
|
49
|
+
24 * 60 * 60 * 1000);
|
|
50
|
+
this.maxImageSize = this.parseConfigValue(envMaxImageSize, config.maxImageSize, 10 * 1024 * 1024, // 10MB
|
|
51
|
+
1024, // 1KB min
|
|
52
|
+
100 * 1024 * 1024);
|
|
53
|
+
logger.debug("ImageCache initialized", {
|
|
54
|
+
enabled: this.enabled,
|
|
55
|
+
maxSize: this.maxSize,
|
|
56
|
+
ttlMs: this.ttlMs,
|
|
57
|
+
maxImageSize: this.maxImageSize,
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Parse a config value with bounds checking
|
|
62
|
+
*/
|
|
63
|
+
parseConfigValue(envValue, configValue, defaultValue, min, max) {
|
|
64
|
+
let value;
|
|
65
|
+
if (envValue !== undefined) {
|
|
66
|
+
const parsed = parseInt(envValue, 10);
|
|
67
|
+
value = isNaN(parsed) ? defaultValue : parsed;
|
|
68
|
+
}
|
|
69
|
+
else if (configValue !== undefined) {
|
|
70
|
+
value = configValue;
|
|
71
|
+
}
|
|
72
|
+
else {
|
|
73
|
+
value = defaultValue;
|
|
74
|
+
}
|
|
75
|
+
// Apply bounds
|
|
76
|
+
if (value < min) {
|
|
77
|
+
logger.warn(`Cache config value ${value} below minimum ${min}, using min`);
|
|
78
|
+
return min;
|
|
79
|
+
}
|
|
80
|
+
if (value > max) {
|
|
81
|
+
logger.warn(`Cache config value ${value} above maximum ${max}, using max`);
|
|
82
|
+
return max;
|
|
83
|
+
}
|
|
84
|
+
return value;
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Normalize URL for consistent cache key generation
|
|
88
|
+
* Removes tracking parameters and normalizes the URL
|
|
89
|
+
*/
|
|
90
|
+
normalizeUrl(url) {
|
|
91
|
+
try {
|
|
92
|
+
const parsed = new URL(url);
|
|
93
|
+
// Remove common tracking parameters that don't affect content
|
|
94
|
+
const trackingParams = [
|
|
95
|
+
"utm_source",
|
|
96
|
+
"utm_medium",
|
|
97
|
+
"utm_campaign",
|
|
98
|
+
"utm_term",
|
|
99
|
+
"utm_content",
|
|
100
|
+
"fbclid",
|
|
101
|
+
"gclid",
|
|
102
|
+
"_ga",
|
|
103
|
+
];
|
|
104
|
+
trackingParams.forEach((param) => parsed.searchParams.delete(param));
|
|
105
|
+
return parsed.toString();
|
|
106
|
+
}
|
|
107
|
+
catch {
|
|
108
|
+
// If URL parsing fails, use the original URL
|
|
109
|
+
return url;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Generate content hash from image data
|
|
114
|
+
*/
|
|
115
|
+
generateContentHash(data) {
|
|
116
|
+
const buffer = typeof data === "string" ? Buffer.from(data, "base64") : data;
|
|
117
|
+
return createHash("sha256").update(buffer).digest("hex").substring(0, 16);
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Check if an entry is expired based on TTL
|
|
121
|
+
*/
|
|
122
|
+
isExpired(entry) {
|
|
123
|
+
return Date.now() - entry.createdAt > this.ttlMs;
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Check if cache is enabled
|
|
127
|
+
*/
|
|
128
|
+
isEnabled() {
|
|
129
|
+
return this.enabled;
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* Get a cached image by URL
|
|
133
|
+
* Returns null if not found or expired
|
|
134
|
+
*/
|
|
135
|
+
get(url) {
|
|
136
|
+
// Return null immediately if cache is disabled
|
|
137
|
+
if (!this.enabled) {
|
|
138
|
+
return null;
|
|
139
|
+
}
|
|
140
|
+
this.stats.totalRequests++;
|
|
141
|
+
const normalizedUrl = this.normalizeUrl(url);
|
|
142
|
+
const entry = this.cache.get(normalizedUrl);
|
|
143
|
+
if (!entry) {
|
|
144
|
+
this.stats.misses++;
|
|
145
|
+
logger.debug("Image cache miss", { url: normalizedUrl.substring(0, 50) });
|
|
146
|
+
return null;
|
|
147
|
+
}
|
|
148
|
+
// Check TTL expiration
|
|
149
|
+
if (this.isExpired(entry)) {
|
|
150
|
+
this.stats.expirations++;
|
|
151
|
+
this.delete(normalizedUrl);
|
|
152
|
+
logger.debug("Image cache entry expired", {
|
|
153
|
+
url: normalizedUrl.substring(0, 50),
|
|
154
|
+
});
|
|
155
|
+
return null;
|
|
156
|
+
}
|
|
157
|
+
// Update LRU info
|
|
158
|
+
entry.lastAccessedAt = Date.now();
|
|
159
|
+
entry.accessCount++;
|
|
160
|
+
// Move to end (most recently used) - delete and re-add
|
|
161
|
+
this.cache.delete(normalizedUrl);
|
|
162
|
+
this.cache.set(normalizedUrl, entry);
|
|
163
|
+
this.stats.hits++;
|
|
164
|
+
logger.debug("Image cache hit", {
|
|
165
|
+
url: normalizedUrl.substring(0, 50),
|
|
166
|
+
accessCount: entry.accessCount,
|
|
167
|
+
});
|
|
168
|
+
return entry;
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* Get a cached image by content hash
|
|
172
|
+
* Useful for deduplication when the same image is accessed via different URLs
|
|
173
|
+
*/
|
|
174
|
+
getByContentHash(contentHash) {
|
|
175
|
+
const url = this.contentHashIndex.get(contentHash);
|
|
176
|
+
if (!url) {
|
|
177
|
+
return null;
|
|
178
|
+
}
|
|
179
|
+
return this.get(url);
|
|
180
|
+
}
|
|
181
|
+
/**
|
|
182
|
+
* Store an image in the cache
|
|
183
|
+
*/
|
|
184
|
+
set(url, dataUri, contentType, imageData) {
|
|
185
|
+
// Skip caching if disabled
|
|
186
|
+
if (!this.enabled) {
|
|
187
|
+
logger.debug("Image caching disabled, skipping cache storage");
|
|
188
|
+
return;
|
|
189
|
+
}
|
|
190
|
+
const normalizedUrl = this.normalizeUrl(url);
|
|
191
|
+
const size = imageData.length;
|
|
192
|
+
// Skip caching if image exceeds max size
|
|
193
|
+
if (size > this.maxImageSize) {
|
|
194
|
+
logger.debug("Image too large to cache", {
|
|
195
|
+
url: normalizedUrl.substring(0, 50),
|
|
196
|
+
size,
|
|
197
|
+
maxSize: this.maxImageSize,
|
|
198
|
+
});
|
|
199
|
+
return;
|
|
200
|
+
}
|
|
201
|
+
// Generate content hash
|
|
202
|
+
const contentHash = this.generateContentHash(imageData);
|
|
203
|
+
// Check if same content already exists under different URL
|
|
204
|
+
const existingUrl = this.contentHashIndex.get(contentHash);
|
|
205
|
+
if (existingUrl && existingUrl !== normalizedUrl) {
|
|
206
|
+
// Content already cached under different URL - create a shallow copy
|
|
207
|
+
const existingEntry = this.cache.get(existingUrl);
|
|
208
|
+
if (existingEntry && !this.isExpired(existingEntry)) {
|
|
209
|
+
// Create a shallow copy for the new URL to avoid shared reference issues
|
|
210
|
+
this.cache.set(normalizedUrl, { ...existingEntry });
|
|
211
|
+
// Update content hash index to point to the new URL as well
|
|
212
|
+
this.contentHashIndex.set(contentHash, normalizedUrl);
|
|
213
|
+
logger.debug("Image cache dedup hit", {
|
|
214
|
+
newUrl: normalizedUrl.substring(0, 50),
|
|
215
|
+
existingUrl: existingUrl.substring(0, 50),
|
|
216
|
+
});
|
|
217
|
+
return;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
// Evict if at capacity
|
|
221
|
+
while (this.cache.size >= this.maxSize) {
|
|
222
|
+
this.evictOldest();
|
|
223
|
+
}
|
|
224
|
+
const now = Date.now();
|
|
225
|
+
const entry = {
|
|
226
|
+
dataUri,
|
|
227
|
+
contentType,
|
|
228
|
+
size,
|
|
229
|
+
contentHash,
|
|
230
|
+
createdAt: now,
|
|
231
|
+
lastAccessedAt: now,
|
|
232
|
+
accessCount: 1,
|
|
233
|
+
};
|
|
234
|
+
this.cache.set(normalizedUrl, entry);
|
|
235
|
+
this.contentHashIndex.set(contentHash, normalizedUrl);
|
|
236
|
+
logger.debug("Image cached", {
|
|
237
|
+
url: normalizedUrl.substring(0, 50),
|
|
238
|
+
size,
|
|
239
|
+
contentHash: contentHash.substring(0, 8),
|
|
240
|
+
cacheSize: this.cache.size,
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
/**
|
|
244
|
+
* Delete an entry from the cache
|
|
245
|
+
*/
|
|
246
|
+
delete(url) {
|
|
247
|
+
const normalizedUrl = this.normalizeUrl(url);
|
|
248
|
+
const entry = this.cache.get(normalizedUrl);
|
|
249
|
+
if (entry) {
|
|
250
|
+
// Remove from content hash index
|
|
251
|
+
if (this.contentHashIndex.get(entry.contentHash) === normalizedUrl) {
|
|
252
|
+
this.contentHashIndex.delete(entry.contentHash);
|
|
253
|
+
}
|
|
254
|
+
this.cache.delete(normalizedUrl);
|
|
255
|
+
return true;
|
|
256
|
+
}
|
|
257
|
+
return false;
|
|
258
|
+
}
|
|
259
|
+
/**
|
|
260
|
+
* Evict the oldest (least recently used) entry
|
|
261
|
+
*/
|
|
262
|
+
evictOldest() {
|
|
263
|
+
// Map maintains insertion order, first entry is oldest
|
|
264
|
+
const oldestKey = this.cache.keys().next().value;
|
|
265
|
+
if (oldestKey !== undefined) {
|
|
266
|
+
const entry = this.cache.get(oldestKey);
|
|
267
|
+
if (entry) {
|
|
268
|
+
if (this.contentHashIndex.get(entry.contentHash) === oldestKey) {
|
|
269
|
+
this.contentHashIndex.delete(entry.contentHash);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
this.cache.delete(oldestKey);
|
|
273
|
+
this.stats.evictions++;
|
|
274
|
+
logger.debug("Image cache eviction", {
|
|
275
|
+
url: String(oldestKey).substring(0, 50),
|
|
276
|
+
});
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
/**
|
|
280
|
+
* Clear all expired entries
|
|
281
|
+
*/
|
|
282
|
+
evictExpired() {
|
|
283
|
+
let evicted = 0;
|
|
284
|
+
const now = Date.now();
|
|
285
|
+
for (const [url, entry] of this.cache.entries()) {
|
|
286
|
+
if (now - entry.createdAt > this.ttlMs) {
|
|
287
|
+
this.delete(url);
|
|
288
|
+
evicted++;
|
|
289
|
+
this.stats.expirations++;
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
if (evicted > 0) {
|
|
293
|
+
logger.debug(`Evicted ${evicted} expired image cache entries`);
|
|
294
|
+
}
|
|
295
|
+
return evicted;
|
|
296
|
+
}
|
|
297
|
+
/**
|
|
298
|
+
* Clear all entries from the cache
|
|
299
|
+
*/
|
|
300
|
+
clear() {
|
|
301
|
+
const size = this.cache.size;
|
|
302
|
+
this.cache.clear();
|
|
303
|
+
this.contentHashIndex.clear();
|
|
304
|
+
logger.debug(`Image cache cleared (${size} entries removed)`);
|
|
305
|
+
}
|
|
306
|
+
/**
|
|
307
|
+
* Get cache statistics
|
|
308
|
+
*/
|
|
309
|
+
getStats() {
|
|
310
|
+
let totalBytes = 0;
|
|
311
|
+
for (const entry of this.cache.values()) {
|
|
312
|
+
totalBytes += entry.size;
|
|
313
|
+
}
|
|
314
|
+
const hitRate = this.stats.totalRequests > 0
|
|
315
|
+
? Number(((this.stats.hits / this.stats.totalRequests) * 100).toFixed(2))
|
|
316
|
+
: 0;
|
|
317
|
+
return {
|
|
318
|
+
...this.stats,
|
|
319
|
+
size: this.cache.size,
|
|
320
|
+
totalBytes,
|
|
321
|
+
hitRate,
|
|
322
|
+
};
|
|
323
|
+
}
|
|
324
|
+
/**
|
|
325
|
+
* Check if a URL is cached and not expired
|
|
326
|
+
*/
|
|
327
|
+
has(url) {
|
|
328
|
+
const normalizedUrl = this.normalizeUrl(url);
|
|
329
|
+
const entry = this.cache.get(normalizedUrl);
|
|
330
|
+
return entry !== null && entry !== undefined && !this.isExpired(entry);
|
|
331
|
+
}
|
|
332
|
+
/**
|
|
333
|
+
* Get the current cache size
|
|
334
|
+
*/
|
|
335
|
+
getSize() {
|
|
336
|
+
return this.cache.size;
|
|
337
|
+
}
|
|
338
|
+
/**
|
|
339
|
+
* Get cache configuration
|
|
340
|
+
*/
|
|
341
|
+
getConfig() {
|
|
342
|
+
return {
|
|
343
|
+
enabled: this.enabled,
|
|
344
|
+
maxSize: this.maxSize,
|
|
345
|
+
ttlMs: this.ttlMs,
|
|
346
|
+
maxImageSize: this.maxImageSize,
|
|
347
|
+
};
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
// Global image cache instance
|
|
351
|
+
let globalImageCache = null;
|
|
352
|
+
/**
|
|
353
|
+
* Get the global image cache instance
|
|
354
|
+
* Creates a new instance if none exists
|
|
355
|
+
*/
|
|
356
|
+
export function getImageCache(config) {
|
|
357
|
+
if (!globalImageCache) {
|
|
358
|
+
globalImageCache = new ImageCache(config);
|
|
359
|
+
}
|
|
360
|
+
return globalImageCache;
|
|
361
|
+
}
|
|
362
|
+
/**
|
|
363
|
+
* Reset the global image cache (useful for testing)
|
|
364
|
+
*/
|
|
365
|
+
export function resetImageCache() {
|
|
366
|
+
if (globalImageCache) {
|
|
367
|
+
globalImageCache.clear();
|
|
368
|
+
globalImageCache = null;
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
/**
|
|
372
|
+
* Get image cache statistics from the global instance
|
|
373
|
+
*/
|
|
374
|
+
export function getImageCacheStats() {
|
|
375
|
+
return globalImageCache ? globalImageCache.getStats() : null;
|
|
376
|
+
}
|
|
@@ -138,6 +138,7 @@ export declare const imageUtils: {
|
|
|
138
138
|
* @param options.maxAttempts - Maximum number of total attempts including initial attempt (default: 3)
|
|
139
139
|
* @returns Promise<string> - Base64 data URI of the downloaded image
|
|
140
140
|
* Rate-limited to 10 downloads per second to prevent DoS
|
|
141
|
+
* Uses LRU cache to avoid redundant downloads of the same URL
|
|
141
142
|
*/
|
|
142
143
|
urlToBase64DataUri: (url: string, { timeoutMs, maxBytes, maxAttempts, }?: {
|
|
143
144
|
timeoutMs?: number;
|
|
@@ -6,6 +6,7 @@ import { logger } from "./logger.js";
|
|
|
6
6
|
import { urlDownloadRateLimiter } from "./rateLimiter.js";
|
|
7
7
|
import { withRetry } from "./retryHandler.js";
|
|
8
8
|
import { SYSTEM_LIMITS } from "../core/constants.js";
|
|
9
|
+
import { getImageCache } from "./imageCache.js";
|
|
9
10
|
/**
|
|
10
11
|
* Network error codes that should trigger a retry
|
|
11
12
|
*/
|
|
@@ -546,8 +547,16 @@ export const imageUtils = {
|
|
|
546
547
|
* @param options.maxAttempts - Maximum number of total attempts including initial attempt (default: 3)
|
|
547
548
|
* @returns Promise<string> - Base64 data URI of the downloaded image
|
|
548
549
|
* Rate-limited to 10 downloads per second to prevent DoS
|
|
550
|
+
* Uses LRU cache to avoid redundant downloads of the same URL
|
|
549
551
|
*/
|
|
550
552
|
urlToBase64DataUri: async (url, { timeoutMs = 15000, maxBytes = 10 * 1024 * 1024, maxAttempts = 3, } = {}) => {
|
|
553
|
+
// Check cache first
|
|
554
|
+
const cache = getImageCache();
|
|
555
|
+
const cached = cache.get(url);
|
|
556
|
+
if (cached) {
|
|
557
|
+
logger.debug("Using cached image for URL", { url: url.substring(0, 50) });
|
|
558
|
+
return cached.dataUri;
|
|
559
|
+
}
|
|
551
560
|
// Apply rate limiting before download
|
|
552
561
|
await urlDownloadRateLimiter.acquire();
|
|
553
562
|
// Basic protocol whitelist - fail fast, no retry needed
|
|
@@ -575,8 +584,12 @@ export const imageUtils = {
|
|
|
575
584
|
if (buffer.byteLength > maxBytes) {
|
|
576
585
|
throw new Error(`Downloaded content too large: ${buffer.byteLength} bytes`);
|
|
577
586
|
}
|
|
578
|
-
const
|
|
579
|
-
|
|
587
|
+
const imageBuffer = Buffer.from(buffer);
|
|
588
|
+
const base64 = imageBuffer.toString("base64");
|
|
589
|
+
const dataUri = `data:${contentType || "image/jpeg"};base64,${base64}`;
|
|
590
|
+
// Store in cache for future use
|
|
591
|
+
cache.set(url, dataUri, contentType || "image/jpeg", imageBuffer);
|
|
592
|
+
return dataUri;
|
|
580
593
|
}
|
|
581
594
|
finally {
|
|
582
595
|
clearTimeout(t);
|
|
@@ -10,6 +10,7 @@ import { FileDetector } from "./fileDetector.js";
|
|
|
10
10
|
import { PDFProcessor, PDFImageConverter } from "./pdfProcessor.js";
|
|
11
11
|
import { urlDownloadRateLimiter } from "./rateLimiter.js";
|
|
12
12
|
import { request, getGlobalDispatcher, interceptors } from "undici";
|
|
13
|
+
import { getImageCache } from "./imageCache.js";
|
|
13
14
|
import { readFileSync, existsSync } from "fs";
|
|
14
15
|
/**
|
|
15
16
|
* Type guard to check if an image input has alt text
|
|
@@ -625,9 +626,17 @@ function isInternetUrl(input) {
|
|
|
625
626
|
/**
|
|
626
627
|
* Download image from URL and convert to base64 data URI
|
|
627
628
|
* Rate-limited to 10 downloads per second to prevent DoS
|
|
629
|
+
* Uses LRU cache to avoid redundant downloads of the same URL
|
|
628
630
|
*/
|
|
629
631
|
async function downloadImageFromUrl(url) {
|
|
630
|
-
//
|
|
632
|
+
// Check cache first (before rate limiting)
|
|
633
|
+
const cache = getImageCache();
|
|
634
|
+
const cached = cache.get(url);
|
|
635
|
+
if (cached) {
|
|
636
|
+
logger.debug("Using cached image for URL", { url: url.substring(0, 50) });
|
|
637
|
+
return cached.dataUri;
|
|
638
|
+
}
|
|
639
|
+
// Apply rate limiting only if cache missed
|
|
631
640
|
await urlDownloadRateLimiter.acquire();
|
|
632
641
|
try {
|
|
633
642
|
const response = await request(url, {
|
|
@@ -659,6 +668,8 @@ async function downloadImageFromUrl(url) {
|
|
|
659
668
|
// Convert to base64 data URI
|
|
660
669
|
const base64 = buffer.toString("base64");
|
|
661
670
|
const dataUri = `data:${contentType};base64,${base64}`;
|
|
671
|
+
// Store in cache for future use
|
|
672
|
+
cache.set(url, dataUri, contentType, buffer);
|
|
662
673
|
return dataUri;
|
|
663
674
|
}
|
|
664
675
|
catch (error) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@juspay/neurolink",
|
|
3
|
-
"version": "8.
|
|
3
|
+
"version": "8.42.0",
|
|
4
4
|
"description": "Universal AI Development Platform with working MCP integration, multi-provider support, and professional CLI. Built-in tools operational, 58+ external MCP servers discoverable. Connect to filesystem, GitHub, database operations, and more. Build, test, and deploy AI applications with 13 providers: OpenAI, Anthropic, Google AI, AWS Bedrock, Azure, Hugging Face, Ollama, and Mistral AI.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Juspay Technologies",
|