crawlforge-mcp-server 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +315 -0
- package/LICENSE +21 -0
- package/README.md +181 -0
- package/package.json +115 -0
- package/server.js +1963 -0
- package/setup.js +112 -0
- package/src/constants/config.js +615 -0
- package/src/core/ActionExecutor.js +1104 -0
- package/src/core/AlertNotificationSystem.js +601 -0
- package/src/core/AuthManager.js +315 -0
- package/src/core/ChangeTracker.js +2306 -0
- package/src/core/JobManager.js +687 -0
- package/src/core/LLMsTxtAnalyzer.js +753 -0
- package/src/core/LocalizationManager.js +1615 -0
- package/src/core/PerformanceManager.js +828 -0
- package/src/core/ResearchOrchestrator.js +1327 -0
- package/src/core/SnapshotManager.js +1037 -0
- package/src/core/StealthBrowserManager.js +1795 -0
- package/src/core/WebhookDispatcher.js +745 -0
- package/src/core/analysis/ContentAnalyzer.js +749 -0
- package/src/core/analysis/LinkAnalyzer.js +972 -0
- package/src/core/cache/CacheManager.js +821 -0
- package/src/core/connections/ConnectionPool.js +553 -0
- package/src/core/crawlers/BFSCrawler.js +845 -0
- package/src/core/integrations/PerformanceIntegration.js +377 -0
- package/src/core/llm/AnthropicProvider.js +135 -0
- package/src/core/llm/LLMManager.js +415 -0
- package/src/core/llm/LLMProvider.js +97 -0
- package/src/core/llm/OpenAIProvider.js +127 -0
- package/src/core/processing/BrowserProcessor.js +986 -0
- package/src/core/processing/ContentProcessor.js +505 -0
- package/src/core/processing/PDFProcessor.js +448 -0
- package/src/core/processing/StreamProcessor.js +673 -0
- package/src/core/queue/QueueManager.js +98 -0
- package/src/core/workers/WorkerPool.js +585 -0
- package/src/core/workers/worker.js +743 -0
- package/src/monitoring/healthCheck.js +600 -0
- package/src/monitoring/metrics.js +761 -0
- package/src/optimization/wave3-optimizations.js +932 -0
- package/src/security/security-patches.js +120 -0
- package/src/security/security-tests.js +355 -0
- package/src/security/wave3-security.js +652 -0
- package/src/tools/advanced/BatchScrapeTool.js +1089 -0
- package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
- package/src/tools/crawl/crawlDeep.js +449 -0
- package/src/tools/crawl/mapSite.js +400 -0
- package/src/tools/extract/analyzeContent.js +624 -0
- package/src/tools/extract/extractContent.js +329 -0
- package/src/tools/extract/processDocument.js +503 -0
- package/src/tools/extract/summarizeContent.js +376 -0
- package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
- package/src/tools/research/deepResearch.js +706 -0
- package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
- package/src/tools/search/adapters/googleSearch.js +236 -0
- package/src/tools/search/adapters/searchProviderFactory.js +96 -0
- package/src/tools/search/queryExpander.js +543 -0
- package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
- package/src/tools/search/ranking/ResultRanker.js +497 -0
- package/src/tools/search/searchWeb.js +482 -0
- package/src/tools/tracking/trackChanges.js +1355 -0
- package/src/utils/CircuitBreaker.js +515 -0
- package/src/utils/ErrorHandlingConfig.js +342 -0
- package/src/utils/HumanBehaviorSimulator.js +569 -0
- package/src/utils/Logger.js +568 -0
- package/src/utils/MemoryMonitor.js +173 -0
- package/src/utils/RetryManager.js +386 -0
- package/src/utils/contentUtils.js +588 -0
- package/src/utils/domainFilter.js +612 -0
- package/src/utils/inputValidation.js +766 -0
- package/src/utils/rateLimiter.js +196 -0
- package/src/utils/robotsChecker.js +91 -0
- package/src/utils/securityMiddleware.js +416 -0
- package/src/utils/sitemapParser.js +678 -0
- package/src/utils/ssrfProtection.js +640 -0
- package/src/utils/urlNormalizer.js +168 -0
|
@@ -0,0 +1,1037 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SnapshotManager - Snapshot Storage and Management
|
|
3
|
+
* Handles snapshot storage with compression, delta storage for efficiency,
|
|
4
|
+
* retention policies, cleanup, and change history querying
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { promises as fs } from 'fs';
|
|
8
|
+
import path from 'path';
|
|
9
|
+
import { createHash } from 'crypto';
|
|
10
|
+
import { gzip, gunzip } from 'zlib';
|
|
11
|
+
import { promisify } from 'util';
|
|
12
|
+
import { z } from 'zod';
|
|
13
|
+
import { EventEmitter } from 'events';
|
|
14
|
+
|
|
15
|
+
const gzipAsync = promisify(gzip);
|
|
16
|
+
const gunzipAsync = promisify(gunzip);
|
|
17
|
+
|
|
18
|
+
const SnapshotSchema = z.object({
|
|
19
|
+
id: z.string(),
|
|
20
|
+
url: z.string().url(),
|
|
21
|
+
content: z.string(),
|
|
22
|
+
metadata: z.object({
|
|
23
|
+
timestamp: z.number(),
|
|
24
|
+
contentHash: z.string(),
|
|
25
|
+
contentLength: z.number(),
|
|
26
|
+
contentType: z.string().optional(),
|
|
27
|
+
userAgent: z.string().optional(),
|
|
28
|
+
statusCode: z.number().optional(),
|
|
29
|
+
headers: z.record(z.string()).optional(),
|
|
30
|
+
extractionOptions: z.object({}).optional()
|
|
31
|
+
}),
|
|
32
|
+
compression: z.object({
|
|
33
|
+
enabled: z.boolean().default(true),
|
|
34
|
+
algorithm: z.enum(['gzip', 'none']).default('gzip'),
|
|
35
|
+
originalSize: z.number().optional(),
|
|
36
|
+
compressedSize: z.number().optional(),
|
|
37
|
+
compressionRatio: z.number().optional()
|
|
38
|
+
}).optional(),
|
|
39
|
+
delta: z.object({
|
|
40
|
+
enabled: z.boolean().default(false),
|
|
41
|
+
baseSnapshotId: z.string().optional(),
|
|
42
|
+
deltaData: z.string().optional(),
|
|
43
|
+
deltaSize: z.number().optional()
|
|
44
|
+
}).optional()
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
const RetentionPolicySchema = z.object({
|
|
48
|
+
maxSnapshots: z.number().min(1).default(100),
|
|
49
|
+
maxAge: z.number().min(3600000).default(30 * 24 * 3600 * 1000), // 30 days
|
|
50
|
+
maxStorageSize: z.number().min(10 * 1024 * 1024).default(1 * 1024 * 1024 * 1024), // 1GB
|
|
51
|
+
compressionThreshold: z.number().min(1024).default(10 * 1024), // 10KB
|
|
52
|
+
enableDeltaStorage: z.boolean().default(true),
|
|
53
|
+
deltaThreshold: z.number().min(0).max(1).default(0.8), // Similarity threshold for delta storage
|
|
54
|
+
autoCleanup: z.boolean().default(true),
|
|
55
|
+
cleanupInterval: z.number().min(60000).default(24 * 3600 * 1000) // 24 hours
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
const QuerySchema = z.object({
|
|
59
|
+
url: z.string().url().optional(),
|
|
60
|
+
startTime: z.number().optional(),
|
|
61
|
+
endTime: z.number().optional(),
|
|
62
|
+
limit: z.number().min(1).max(1000).default(50),
|
|
63
|
+
offset: z.number().min(0).default(0),
|
|
64
|
+
includeDelta: z.boolean().default(true),
|
|
65
|
+
includeContent: z.boolean().default(false),
|
|
66
|
+
sortBy: z.enum(['timestamp', 'size', 'similarity']).default('timestamp'),
|
|
67
|
+
sortOrder: z.enum(['asc', 'desc']).default('desc'),
|
|
68
|
+
filters: z.object({
|
|
69
|
+
minSize: z.number().optional(),
|
|
70
|
+
maxSize: z.number().optional(),
|
|
71
|
+
contentType: z.string().optional(),
|
|
72
|
+
hasChanges: z.boolean().optional()
|
|
73
|
+
}).optional()
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
export class SnapshotManager extends EventEmitter {
|
|
77
|
+
constructor(options = {}) {
|
|
78
|
+
super();
|
|
79
|
+
|
|
80
|
+
this.options = {
|
|
81
|
+
storageDir: options.storageDir || './snapshots',
|
|
82
|
+
metadataDir: options.metadataDir || './snapshots/metadata',
|
|
83
|
+
tempDir: options.tempDir || './snapshots/temp',
|
|
84
|
+
enableCompression: options.enableCompression !== false,
|
|
85
|
+
enableDeltaStorage: options.enableDeltaStorage !== false,
|
|
86
|
+
enableEncryption: options.enableEncryption || false,
|
|
87
|
+
encryptionKey: options.encryptionKey,
|
|
88
|
+
maxConcurrentOperations: options.maxConcurrentOperations || 10,
|
|
89
|
+
cacheEnabled: options.cacheEnabled !== false,
|
|
90
|
+
cacheSize: options.cacheSize || 100,
|
|
91
|
+
...options
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
this.retentionPolicy = RetentionPolicySchema.parse(options.retentionPolicy || {});
|
|
95
|
+
|
|
96
|
+
// In-memory cache for frequently accessed snapshots
|
|
97
|
+
this.snapshotCache = new Map();
|
|
98
|
+
this.metadataCache = new Map();
|
|
99
|
+
|
|
100
|
+
// Storage statistics
|
|
101
|
+
this.stats = {
|
|
102
|
+
totalSnapshots: 0,
|
|
103
|
+
totalStorageSize: 0,
|
|
104
|
+
compressedSnapshots: 0,
|
|
105
|
+
deltaSnapshots: 0,
|
|
106
|
+
averageCompressionRatio: 0,
|
|
107
|
+
averageDeltaSize: 0,
|
|
108
|
+
cacheHits: 0,
|
|
109
|
+
cacheMisses: 0,
|
|
110
|
+
cleanupOperations: 0,
|
|
111
|
+
lastCleanup: null,
|
|
112
|
+
operationCounts: {
|
|
113
|
+
store: 0,
|
|
114
|
+
retrieve: 0,
|
|
115
|
+
delete: 0,
|
|
116
|
+
query: 0
|
|
117
|
+
}
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
// Active operations tracking
|
|
121
|
+
this.activeOperations = new Map();
|
|
122
|
+
this.operationQueue = [];
|
|
123
|
+
|
|
124
|
+
// Cleanup timer
|
|
125
|
+
this.cleanupTimer = null;
|
|
126
|
+
|
|
127
|
+
this.initialize();
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
async initialize() {
|
|
131
|
+
try {
|
|
132
|
+
// Create storage directories
|
|
133
|
+
await this.createDirectories();
|
|
134
|
+
|
|
135
|
+
// Load existing snapshot metadata
|
|
136
|
+
await this.loadMetadata();
|
|
137
|
+
|
|
138
|
+
// Start cleanup timer if enabled
|
|
139
|
+
if (this.retentionPolicy.autoCleanup) {
|
|
140
|
+
this.startCleanupTimer();
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Initialize cache
|
|
144
|
+
if (this.options.cacheEnabled) {
|
|
145
|
+
await this.initializeCache();
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
this.emit('initialized', {
|
|
149
|
+
totalSnapshots: this.stats.totalSnapshots,
|
|
150
|
+
storageSize: this.stats.totalStorageSize
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
} catch (error) {
|
|
154
|
+
this.emit('error', { operation: 'initialize', error: error.message });
|
|
155
|
+
throw error;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Store a new snapshot
|
|
161
|
+
* @param {string} url - URL of the snapshot
|
|
162
|
+
* @param {string} content - Content to store
|
|
163
|
+
* @param {Object} metadata - Additional metadata
|
|
164
|
+
* @param {Object} options - Storage options
|
|
165
|
+
* @returns {Object} - Stored snapshot information
|
|
166
|
+
*/
|
|
167
|
+
async storeSnapshot(url, content, metadata = {}, options = {}) {
|
|
168
|
+
const operationId = this.generateOperationId();
|
|
169
|
+
|
|
170
|
+
try {
|
|
171
|
+
this.activeOperations.set(operationId, { type: 'store', url, startTime: Date.now() });
|
|
172
|
+
|
|
173
|
+
const snapshotId = this.generateSnapshotId(url, metadata.timestamp || Date.now());
|
|
174
|
+
const contentHash = this.hashContent(content);
|
|
175
|
+
|
|
176
|
+
// Check for similar existing snapshots for delta storage
|
|
177
|
+
let deltaInfo = null;
|
|
178
|
+
if (this.retentionPolicy.enableDeltaStorage) {
|
|
179
|
+
deltaInfo = await this.findSimilarSnapshot(url, contentHash, content);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Prepare snapshot data
|
|
183
|
+
const snapshot = {
|
|
184
|
+
id: snapshotId,
|
|
185
|
+
url,
|
|
186
|
+
content,
|
|
187
|
+
metadata: {
|
|
188
|
+
timestamp: Date.now(),
|
|
189
|
+
contentHash,
|
|
190
|
+
contentLength: content.length,
|
|
191
|
+
contentType: metadata.contentType || 'text/html',
|
|
192
|
+
userAgent: metadata.userAgent,
|
|
193
|
+
statusCode: metadata.statusCode,
|
|
194
|
+
headers: metadata.headers,
|
|
195
|
+
extractionOptions: metadata.extractionOptions,
|
|
196
|
+
...metadata
|
|
197
|
+
},
|
|
198
|
+
compression: {
|
|
199
|
+
enabled: false,
|
|
200
|
+
algorithm: 'none',
|
|
201
|
+
originalSize: content.length
|
|
202
|
+
},
|
|
203
|
+
delta: {
|
|
204
|
+
enabled: false
|
|
205
|
+
}
|
|
206
|
+
};
|
|
207
|
+
|
|
208
|
+
let finalContent = content;
|
|
209
|
+
let isCompressed = false;
|
|
210
|
+
let isDelta = false;
|
|
211
|
+
|
|
212
|
+
// Apply delta storage if similar snapshot found
|
|
213
|
+
if (deltaInfo && deltaInfo.similarity > this.retentionPolicy.deltaThreshold) {
|
|
214
|
+
const deltaData = this.createDelta(deltaInfo.content, content);
|
|
215
|
+
if (deltaData.length < content.length * 0.7) { // Only use delta if it's significantly smaller
|
|
216
|
+
finalContent = deltaData;
|
|
217
|
+
isDelta = true;
|
|
218
|
+
|
|
219
|
+
snapshot.delta = {
|
|
220
|
+
enabled: true,
|
|
221
|
+
baseSnapshotId: deltaInfo.snapshotId,
|
|
222
|
+
deltaData: deltaData,
|
|
223
|
+
deltaSize: deltaData.length
|
|
224
|
+
};
|
|
225
|
+
|
|
226
|
+
this.stats.deltaSnapshots++;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// Apply compression if enabled and above threshold
|
|
231
|
+
if (this.options.enableCompression &&
|
|
232
|
+
finalContent.length > this.retentionPolicy.compressionThreshold) {
|
|
233
|
+
|
|
234
|
+
const compressed = await gzipAsync(finalContent);
|
|
235
|
+
const compressionRatio = compressed.length / finalContent.length;
|
|
236
|
+
|
|
237
|
+
if (compressionRatio < 0.8) { // Only compress if it reduces size by at least 20%
|
|
238
|
+
finalContent = compressed;
|
|
239
|
+
isCompressed = true;
|
|
240
|
+
|
|
241
|
+
snapshot.compression = {
|
|
242
|
+
enabled: true,
|
|
243
|
+
algorithm: 'gzip',
|
|
244
|
+
originalSize: content.length,
|
|
245
|
+
compressedSize: compressed.length,
|
|
246
|
+
compressionRatio
|
|
247
|
+
};
|
|
248
|
+
|
|
249
|
+
this.stats.compressedSnapshots++;
|
|
250
|
+
this.updateCompressionStats(compressionRatio);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// Store snapshot to disk
|
|
255
|
+
const filePath = await this.writeSnapshotFile(snapshotId, finalContent);
|
|
256
|
+
|
|
257
|
+
// Store metadata
|
|
258
|
+
await this.storeMetadata(snapshotId, snapshot);
|
|
259
|
+
|
|
260
|
+
// Update cache
|
|
261
|
+
if (this.options.cacheEnabled) {
|
|
262
|
+
this.updateCache(snapshotId, snapshot);
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// Update statistics
|
|
266
|
+
this.updateStorageStats(snapshot, isDelta);
|
|
267
|
+
this.stats.operationCounts.store++;
|
|
268
|
+
|
|
269
|
+
this.activeOperations.delete(operationId);
|
|
270
|
+
|
|
271
|
+
this.emit('snapshotStored', {
|
|
272
|
+
snapshotId,
|
|
273
|
+
url,
|
|
274
|
+
size: finalContent.length,
|
|
275
|
+
originalSize: content.length,
|
|
276
|
+
compressed: isCompressed,
|
|
277
|
+
delta: isDelta,
|
|
278
|
+
filePath
|
|
279
|
+
});
|
|
280
|
+
|
|
281
|
+
return {
|
|
282
|
+
snapshotId,
|
|
283
|
+
url,
|
|
284
|
+
timestamp: snapshot.metadata.timestamp,
|
|
285
|
+
contentHash,
|
|
286
|
+
size: finalContent.length,
|
|
287
|
+
originalSize: content.length,
|
|
288
|
+
compressed: isCompressed,
|
|
289
|
+
compressionRatio: snapshot.compression.compressionRatio,
|
|
290
|
+
delta: isDelta,
|
|
291
|
+
deltaSize: snapshot.delta.deltaSize
|
|
292
|
+
};
|
|
293
|
+
|
|
294
|
+
} catch (error) {
|
|
295
|
+
this.activeOperations.delete(operationId);
|
|
296
|
+
this.emit('error', { operation: 'storeSnapshot', url, error: error.message });
|
|
297
|
+
throw new Error(`Failed to store snapshot: ${error.message}`);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
/**
|
|
302
|
+
* Retrieve a snapshot by ID
|
|
303
|
+
* @param {string} snapshotId - Snapshot ID
|
|
304
|
+
* @param {Object} options - Retrieval options
|
|
305
|
+
* @returns {Object} - Retrieved snapshot
|
|
306
|
+
*/
|
|
307
|
+
async retrieveSnapshot(snapshotId, options = {}) {
|
|
308
|
+
const operationId = this.generateOperationId();
|
|
309
|
+
|
|
310
|
+
try {
|
|
311
|
+
this.activeOperations.set(operationId, {
|
|
312
|
+
type: 'retrieve',
|
|
313
|
+
snapshotId,
|
|
314
|
+
startTime: Date.now()
|
|
315
|
+
});
|
|
316
|
+
|
|
317
|
+
// Check cache first
|
|
318
|
+
if (this.options.cacheEnabled && this.snapshotCache.has(snapshotId)) {
|
|
319
|
+
this.stats.cacheHits++;
|
|
320
|
+
const cached = this.snapshotCache.get(snapshotId);
|
|
321
|
+
this.activeOperations.delete(operationId);
|
|
322
|
+
return cached;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
this.stats.cacheMisses++;
|
|
326
|
+
|
|
327
|
+
// Load metadata
|
|
328
|
+
const metadata = await this.loadSnapshotMetadata(snapshotId);
|
|
329
|
+
if (!metadata) {
|
|
330
|
+
throw new Error(`Snapshot not found: ${snapshotId}`);
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
// Read snapshot file
|
|
334
|
+
let content = await this.readSnapshotFile(snapshotId);
|
|
335
|
+
|
|
336
|
+
// Decompress if needed
|
|
337
|
+
if (metadata.compression && metadata.compression.enabled) {
|
|
338
|
+
if (metadata.compression.algorithm === 'gzip') {
|
|
339
|
+
content = await gunzipAsync(content);
|
|
340
|
+
content = content.toString();
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
// Reconstruct from delta if needed
|
|
345
|
+
if (metadata.delta && metadata.delta.enabled) {
|
|
346
|
+
const baseSnapshot = await this.retrieveSnapshot(metadata.delta.baseSnapshotId, options);
|
|
347
|
+
content = this.applyDelta(baseSnapshot.content, content);
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
const snapshot = {
|
|
351
|
+
...metadata,
|
|
352
|
+
content: options.includeContent !== false ? content : undefined,
|
|
353
|
+
retrievedAt: Date.now()
|
|
354
|
+
};
|
|
355
|
+
|
|
356
|
+
// Update cache
|
|
357
|
+
if (this.options.cacheEnabled) {
|
|
358
|
+
this.updateCache(snapshotId, snapshot);
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
this.stats.operationCounts.retrieve++;
|
|
362
|
+
this.activeOperations.delete(operationId);
|
|
363
|
+
|
|
364
|
+
this.emit('snapshotRetrieved', {
|
|
365
|
+
snapshotId,
|
|
366
|
+
url: metadata.url,
|
|
367
|
+
size: content.length,
|
|
368
|
+
fromCache: false
|
|
369
|
+
});
|
|
370
|
+
|
|
371
|
+
return snapshot;
|
|
372
|
+
|
|
373
|
+
} catch (error) {
|
|
374
|
+
this.activeOperations.delete(operationId);
|
|
375
|
+
this.emit('error', { operation: 'retrieveSnapshot', snapshotId, error: error.message });
|
|
376
|
+
throw new Error(`Failed to retrieve snapshot: ${error.message}`);
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
/**
|
|
381
|
+
* Query snapshots with filters
|
|
382
|
+
* @param {Object} query - Query parameters
|
|
383
|
+
* @returns {Array} - Matching snapshots
|
|
384
|
+
*/
|
|
385
|
+
async querySnapshots(query = {}) {
|
|
386
|
+
try {
|
|
387
|
+
const validated = QuerySchema.parse(query);
|
|
388
|
+
|
|
389
|
+
// Load all metadata that matches URL filter
|
|
390
|
+
let snapshots = [];
|
|
391
|
+
|
|
392
|
+
for (const [snapshotId, metadata] of this.metadataCache) {
|
|
393
|
+
if (validated.url && metadata.url !== validated.url) continue;
|
|
394
|
+
|
|
395
|
+
if (validated.startTime && metadata.metadata.timestamp < validated.startTime) continue;
|
|
396
|
+
if (validated.endTime && metadata.metadata.timestamp > validated.endTime) continue;
|
|
397
|
+
|
|
398
|
+
if (validated.filters) {
|
|
399
|
+
const filters = validated.filters;
|
|
400
|
+
|
|
401
|
+
if (filters.minSize && metadata.metadata.contentLength < filters.minSize) continue;
|
|
402
|
+
if (filters.maxSize && metadata.metadata.contentLength > filters.maxSize) continue;
|
|
403
|
+
if (filters.contentType && metadata.metadata.contentType !== filters.contentType) continue;
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
snapshots.push({
|
|
407
|
+
...metadata,
|
|
408
|
+
content: undefined // Don't include content by default
|
|
409
|
+
});
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
// Sort snapshots
|
|
413
|
+
snapshots.sort((a, b) => {
|
|
414
|
+
const aValue = this.getSortValue(a, validated.sortBy);
|
|
415
|
+
const bValue = this.getSortValue(b, validated.sortBy);
|
|
416
|
+
|
|
417
|
+
if (validated.sortOrder === 'desc') {
|
|
418
|
+
return bValue - aValue;
|
|
419
|
+
} else {
|
|
420
|
+
return aValue - bValue;
|
|
421
|
+
}
|
|
422
|
+
});
|
|
423
|
+
|
|
424
|
+
// Apply pagination
|
|
425
|
+
const start = validated.offset;
|
|
426
|
+
const end = start + validated.limit;
|
|
427
|
+
snapshots = snapshots.slice(start, end);
|
|
428
|
+
|
|
429
|
+
// Include content if requested
|
|
430
|
+
if (validated.includeContent) {
|
|
431
|
+
snapshots = await Promise.all(
|
|
432
|
+
snapshots.map(async (snapshot) => {
|
|
433
|
+
const fullSnapshot = await this.retrieveSnapshot(snapshot.id, { includeContent: true });
|
|
434
|
+
return fullSnapshot;
|
|
435
|
+
})
|
|
436
|
+
);
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
this.stats.operationCounts.query++;
|
|
440
|
+
|
|
441
|
+
this.emit('snapshotsQueried', {
|
|
442
|
+
query: validated,
|
|
443
|
+
resultCount: snapshots.length,
|
|
444
|
+
totalMatching: snapshots.length + validated.offset
|
|
445
|
+
});
|
|
446
|
+
|
|
447
|
+
return {
|
|
448
|
+
snapshots,
|
|
449
|
+
totalCount: snapshots.length,
|
|
450
|
+
query: validated,
|
|
451
|
+
executedAt: Date.now()
|
|
452
|
+
};
|
|
453
|
+
|
|
454
|
+
} catch (error) {
|
|
455
|
+
this.emit('error', { operation: 'querySnapshots', query, error: error.message });
|
|
456
|
+
throw new Error(`Failed to query snapshots: ${error.message}`);
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
/**
|
|
461
|
+
* Get change history for a URL
|
|
462
|
+
* @param {string} url - URL to get history for
|
|
463
|
+
* @param {Object} options - History options
|
|
464
|
+
* @returns {Array} - Change history
|
|
465
|
+
*/
|
|
466
|
+
async getChangeHistory(url, options = {}) {
|
|
467
|
+
try {
|
|
468
|
+
const snapshots = await this.querySnapshots({
|
|
469
|
+
url,
|
|
470
|
+
limit: options.limit || 100,
|
|
471
|
+
sortBy: 'timestamp',
|
|
472
|
+
sortOrder: 'desc',
|
|
473
|
+
includeContent: false
|
|
474
|
+
});
|
|
475
|
+
|
|
476
|
+
const history = [];
|
|
477
|
+
const snapshotList = snapshots.snapshots;
|
|
478
|
+
|
|
479
|
+
for (let i = 0; i < snapshotList.length - 1; i++) {
|
|
480
|
+
const current = snapshotList[i];
|
|
481
|
+
const previous = snapshotList[i + 1];
|
|
482
|
+
|
|
483
|
+
// Calculate change metrics
|
|
484
|
+
const changeMetrics = await this.calculateChangeMetrics(previous, current);
|
|
485
|
+
|
|
486
|
+
history.push({
|
|
487
|
+
timestamp: current.metadata.timestamp,
|
|
488
|
+
snapshotId: current.id,
|
|
489
|
+
previousSnapshotId: previous.id,
|
|
490
|
+
changes: changeMetrics,
|
|
491
|
+
timeDelta: current.metadata.timestamp - previous.metadata.timestamp,
|
|
492
|
+
sizeDelta: current.metadata.contentLength - previous.metadata.contentLength
|
|
493
|
+
});
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
return {
|
|
497
|
+
url,
|
|
498
|
+
history,
|
|
499
|
+
totalSnapshots: snapshotList.length,
|
|
500
|
+
timespan: snapshotList.length > 0 ?
|
|
501
|
+
snapshotList[0].metadata.timestamp - snapshotList[snapshotList.length - 1].metadata.timestamp : 0
|
|
502
|
+
};
|
|
503
|
+
|
|
504
|
+
} catch (error) {
|
|
505
|
+
this.emit('error', { operation: 'getChangeHistory', url, error: error.message });
|
|
506
|
+
throw new Error(`Failed to get change history: ${error.message}`);
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
/**
|
|
511
|
+
* Delete snapshots
|
|
512
|
+
* @param {string|Array} snapshotIds - Snapshot ID(s) to delete
|
|
513
|
+
* @returns {Object} - Deletion results
|
|
514
|
+
*/
|
|
515
|
+
async deleteSnapshots(snapshotIds) {
|
|
516
|
+
const ids = Array.isArray(snapshotIds) ? snapshotIds : [snapshotIds];
|
|
517
|
+
const results = {
|
|
518
|
+
deleted: [],
|
|
519
|
+
failed: [],
|
|
520
|
+
totalSize: 0
|
|
521
|
+
};
|
|
522
|
+
|
|
523
|
+
try {
|
|
524
|
+
for (const snapshotId of ids) {
|
|
525
|
+
try {
|
|
526
|
+
const metadata = await this.loadSnapshotMetadata(snapshotId);
|
|
527
|
+
if (!metadata) {
|
|
528
|
+
results.failed.push({ snapshotId, error: 'Snapshot not found' });
|
|
529
|
+
continue;
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
// Delete file
|
|
533
|
+
await this.deleteSnapshotFile(snapshotId);
|
|
534
|
+
|
|
535
|
+
// Delete metadata
|
|
536
|
+
await this.deleteSnapshotMetadata(snapshotId);
|
|
537
|
+
|
|
538
|
+
// Remove from cache
|
|
539
|
+
this.snapshotCache.delete(snapshotId);
|
|
540
|
+
this.metadataCache.delete(snapshotId);
|
|
541
|
+
|
|
542
|
+
// Update statistics
|
|
543
|
+
this.stats.totalSnapshots--;
|
|
544
|
+
this.stats.totalStorageSize -= metadata.metadata.contentLength;
|
|
545
|
+
|
|
546
|
+
results.deleted.push(snapshotId);
|
|
547
|
+
results.totalSize += metadata.metadata.contentLength;
|
|
548
|
+
|
|
549
|
+
this.emit('snapshotDeleted', { snapshotId, size: metadata.metadata.contentLength });
|
|
550
|
+
|
|
551
|
+
} catch (error) {
|
|
552
|
+
results.failed.push({ snapshotId, error: error.message });
|
|
553
|
+
}
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
this.stats.operationCounts.delete += results.deleted.length;
|
|
557
|
+
|
|
558
|
+
return results;
|
|
559
|
+
|
|
560
|
+
} catch (error) {
|
|
561
|
+
this.emit('error', { operation: 'deleteSnapshots', snapshotIds, error: error.message });
|
|
562
|
+
throw new Error(`Failed to delete snapshots: ${error.message}`);
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
/**
|
|
567
|
+
* Clean up old snapshots based on retention policy
|
|
568
|
+
* @returns {Object} - Cleanup results
|
|
569
|
+
*/
|
|
570
|
+
async cleanupSnapshots() {
|
|
571
|
+
const startTime = Date.now();
|
|
572
|
+
|
|
573
|
+
try {
|
|
574
|
+
const cleanupResults = {
|
|
575
|
+
deletedCount: 0,
|
|
576
|
+
freedSpace: 0,
|
|
577
|
+
errors: []
|
|
578
|
+
};
|
|
579
|
+
|
|
580
|
+
const allSnapshots = Array.from(this.metadataCache.values());
|
|
581
|
+
const now = Date.now();
|
|
582
|
+
|
|
583
|
+
// Group snapshots by URL for intelligent cleanup
|
|
584
|
+
const snapshotsByUrl = new Map();
|
|
585
|
+
allSnapshots.forEach(snapshot => {
|
|
586
|
+
const url = snapshot.url;
|
|
587
|
+
if (!snapshotsByUrl.has(url)) {
|
|
588
|
+
snapshotsByUrl.set(url, []);
|
|
589
|
+
}
|
|
590
|
+
snapshotsByUrl.get(url).push(snapshot);
|
|
591
|
+
});
|
|
592
|
+
|
|
593
|
+
// Cleanup by retention policy rules
|
|
594
|
+
for (const [url, snapshots] of snapshotsByUrl) {
|
|
595
|
+
const sortedSnapshots = snapshots.sort((a, b) =>
|
|
596
|
+
b.metadata.timestamp - a.metadata.timestamp
|
|
597
|
+
);
|
|
598
|
+
|
|
599
|
+
const toDelete = [];
|
|
600
|
+
|
|
601
|
+
// Rule 1: Respect maximum snapshot limit per URL
|
|
602
|
+
if (sortedSnapshots.length > this.retentionPolicy.maxSnapshots) {
|
|
603
|
+
toDelete.push(...sortedSnapshots.slice(this.retentionPolicy.maxSnapshots));
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
// Rule 2: Delete snapshots older than maxAge
|
|
607
|
+
const ageThreshold = now - this.retentionPolicy.maxAge;
|
|
608
|
+
sortedSnapshots.forEach(snapshot => {
|
|
609
|
+
if (snapshot.metadata.timestamp < ageThreshold && !toDelete.includes(snapshot)) {
|
|
610
|
+
toDelete.push(snapshot);
|
|
611
|
+
}
|
|
612
|
+
});
|
|
613
|
+
|
|
614
|
+
// Delete marked snapshots
|
|
615
|
+
if (toDelete.length > 0) {
|
|
616
|
+
const deleteResult = await this.deleteSnapshots(toDelete.map(s => s.id));
|
|
617
|
+
cleanupResults.deletedCount += deleteResult.deleted.length;
|
|
618
|
+
cleanupResults.freedSpace += deleteResult.totalSize;
|
|
619
|
+
cleanupResults.errors.push(...deleteResult.failed);
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
// Rule 3: Check total storage size
|
|
624
|
+
if (this.stats.totalStorageSize > this.retentionPolicy.maxStorageSize) {
|
|
625
|
+
const excess = this.stats.totalStorageSize - this.retentionPolicy.maxStorageSize;
|
|
626
|
+
const additionalCleanup = await this.cleanupBySize(excess);
|
|
627
|
+
|
|
628
|
+
cleanupResults.deletedCount += additionalCleanup.deletedCount;
|
|
629
|
+
cleanupResults.freedSpace += additionalCleanup.freedSpace;
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
const cleanupTime = Date.now() - startTime;
|
|
633
|
+
|
|
634
|
+
this.stats.cleanupOperations++;
|
|
635
|
+
this.stats.lastCleanup = Date.now();
|
|
636
|
+
|
|
637
|
+
this.emit('cleanupCompleted', {
|
|
638
|
+
...cleanupResults,
|
|
639
|
+
cleanupTime,
|
|
640
|
+
remainingSnapshots: this.stats.totalSnapshots,
|
|
641
|
+
remainingSize: this.stats.totalStorageSize
|
|
642
|
+
});
|
|
643
|
+
|
|
644
|
+
return {
|
|
645
|
+
...cleanupResults,
|
|
646
|
+
cleanupTime
|
|
647
|
+
};
|
|
648
|
+
|
|
649
|
+
} catch (error) {
|
|
650
|
+
this.emit('error', { operation: 'cleanupSnapshots', error: error.message });
|
|
651
|
+
throw new Error(`Failed to cleanup snapshots: ${error.message}`);
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
// File system operations
|
|
656
|
+
|
|
657
|
+
async createDirectories() {
|
|
658
|
+
const dirs = [
|
|
659
|
+
this.options.storageDir,
|
|
660
|
+
this.options.metadataDir,
|
|
661
|
+
this.options.tempDir
|
|
662
|
+
];
|
|
663
|
+
|
|
664
|
+
for (const dir of dirs) {
|
|
665
|
+
await fs.mkdir(dir, { recursive: true });
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
async writeSnapshotFile(snapshotId, content) {
|
|
670
|
+
const filePath = path.join(this.options.storageDir, `${snapshotId}.snap`);
|
|
671
|
+
|
|
672
|
+
if (Buffer.isBuffer(content)) {
|
|
673
|
+
await fs.writeFile(filePath, content);
|
|
674
|
+
} else {
|
|
675
|
+
await fs.writeFile(filePath, content, 'utf8');
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
return filePath;
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
async readSnapshotFile(snapshotId) {
|
|
682
|
+
const filePath = path.join(this.options.storageDir, `${snapshotId}.snap`);
|
|
683
|
+
return await fs.readFile(filePath);
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
async deleteSnapshotFile(snapshotId) {
|
|
687
|
+
const filePath = path.join(this.options.storageDir, `${snapshotId}.snap`);
|
|
688
|
+
await fs.unlink(filePath);
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
async storeMetadata(snapshotId, metadata) {
|
|
692
|
+
const filePath = path.join(this.options.metadataDir, `${snapshotId}.meta`);
|
|
693
|
+
await fs.writeFile(filePath, JSON.stringify(metadata, null, 2), 'utf8');
|
|
694
|
+
|
|
695
|
+
// Update in-memory cache
|
|
696
|
+
this.metadataCache.set(snapshotId, metadata);
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
async loadSnapshotMetadata(snapshotId) {
|
|
700
|
+
// Check cache first
|
|
701
|
+
if (this.metadataCache.has(snapshotId)) {
|
|
702
|
+
return this.metadataCache.get(snapshotId);
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
// Load from disk
|
|
706
|
+
try {
|
|
707
|
+
const filePath = path.join(this.options.metadataDir, `${snapshotId}.meta`);
|
|
708
|
+
const content = await fs.readFile(filePath, 'utf8');
|
|
709
|
+
const metadata = JSON.parse(content);
|
|
710
|
+
|
|
711
|
+
// Cache it
|
|
712
|
+
this.metadataCache.set(snapshotId, metadata);
|
|
713
|
+
|
|
714
|
+
return metadata;
|
|
715
|
+
} catch (error) {
|
|
716
|
+
return null;
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
async deleteSnapshotMetadata(snapshotId) {
|
|
721
|
+
const filePath = path.join(this.options.metadataDir, `${snapshotId}.meta`);
|
|
722
|
+
await fs.unlink(filePath);
|
|
723
|
+
|
|
724
|
+
this.metadataCache.delete(snapshotId);
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
async loadMetadata() {
|
|
728
|
+
try {
|
|
729
|
+
const metadataFiles = await fs.readdir(this.options.metadataDir);
|
|
730
|
+
let totalSize = 0;
|
|
731
|
+
let totalSnapshots = 0;
|
|
732
|
+
|
|
733
|
+
for (const file of metadataFiles) {
|
|
734
|
+
if (file.endsWith('.meta')) {
|
|
735
|
+
const snapshotId = file.replace('.meta', '');
|
|
736
|
+
const metadata = await this.loadSnapshotMetadata(snapshotId);
|
|
737
|
+
|
|
738
|
+
if (metadata) {
|
|
739
|
+
totalSnapshots++;
|
|
740
|
+
totalSize += metadata.metadata.contentLength || 0;
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
this.stats.totalSnapshots = totalSnapshots;
|
|
746
|
+
this.stats.totalStorageSize = totalSize;
|
|
747
|
+
|
|
748
|
+
} catch (error) {
|
|
749
|
+
// Directory doesn't exist yet, that's okay
|
|
750
|
+
this.stats.totalSnapshots = 0;
|
|
751
|
+
this.stats.totalStorageSize = 0;
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
// Utility methods
|
|
756
|
+
|
|
757
|
+
generateSnapshotId(url, timestamp) {
|
|
758
|
+
const hash = createHash('sha256');
|
|
759
|
+
hash.update(`${url}-${timestamp}-${Math.random()}`);
|
|
760
|
+
return hash.digest('hex').substring(0, 16);
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
generateOperationId() {
|
|
764
|
+
return `op-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
hashContent(content) {
|
|
768
|
+
const hash = createHash('sha256');
|
|
769
|
+
hash.update(content);
|
|
770
|
+
return hash.digest('hex');
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
async findSimilarSnapshot(url, contentHash, content) {
|
|
774
|
+
// Find recent snapshots for the same URL
|
|
775
|
+
const recentSnapshots = await this.querySnapshots({
|
|
776
|
+
url,
|
|
777
|
+
limit: 10,
|
|
778
|
+
sortBy: 'timestamp',
|
|
779
|
+
sortOrder: 'desc',
|
|
780
|
+
includeContent: false
|
|
781
|
+
});
|
|
782
|
+
|
|
783
|
+
for (const snapshot of recentSnapshots.snapshots) {
|
|
784
|
+
if (snapshot.metadata.contentHash === contentHash) {
|
|
785
|
+
// Exact match
|
|
786
|
+
return {
|
|
787
|
+
snapshotId: snapshot.id,
|
|
788
|
+
similarity: 1.0,
|
|
789
|
+
content: null
|
|
790
|
+
};
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
// Load content for similarity comparison
|
|
794
|
+
const fullSnapshot = await this.retrieveSnapshot(snapshot.id, { includeContent: true });
|
|
795
|
+
const similarity = this.calculateContentSimilarity(content, fullSnapshot.content);
|
|
796
|
+
|
|
797
|
+
if (similarity > this.retentionPolicy.deltaThreshold) {
|
|
798
|
+
return {
|
|
799
|
+
snapshotId: snapshot.id,
|
|
800
|
+
similarity,
|
|
801
|
+
content: fullSnapshot.content
|
|
802
|
+
};
|
|
803
|
+
}
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
return null;
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
calculateContentSimilarity(content1, content2) {
|
|
810
|
+
// Simple similarity calculation based on content length difference
|
|
811
|
+
// In production, you might want to use more sophisticated algorithms
|
|
812
|
+
const len1 = content1.length;
|
|
813
|
+
const len2 = content2.length;
|
|
814
|
+
|
|
815
|
+
if (len1 === 0 && len2 === 0) return 1.0;
|
|
816
|
+
if (len1 === 0 || len2 === 0) return 0.0;
|
|
817
|
+
|
|
818
|
+
const lengthSimilarity = 1 - Math.abs(len1 - len2) / Math.max(len1, len2);
|
|
819
|
+
|
|
820
|
+
// Additional similarity checks can be added here
|
|
821
|
+
// For example, using diff algorithms or content hashing
|
|
822
|
+
|
|
823
|
+
return lengthSimilarity;
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
createDelta(baseContent, currentContent) {
|
|
827
|
+
// Simple delta implementation - in production, consider using proper diff libraries
|
|
828
|
+
// This is a placeholder that would create a compressed diff
|
|
829
|
+
const deltaObject = {
|
|
830
|
+
type: 'diff',
|
|
831
|
+
base: baseContent.length,
|
|
832
|
+
current: currentContent.length,
|
|
833
|
+
// In a real implementation, you'd store the actual diff data
|
|
834
|
+
operations: [] // diff operations would go here
|
|
835
|
+
};
|
|
836
|
+
|
|
837
|
+
return JSON.stringify(deltaObject);
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
applyDelta(baseContent, deltaData) {
|
|
841
|
+
try {
|
|
842
|
+
const delta = JSON.parse(deltaData);
|
|
843
|
+
|
|
844
|
+
// In a real implementation, you'd apply the diff operations
|
|
845
|
+
// For now, return the base content as a fallback
|
|
846
|
+
return baseContent;
|
|
847
|
+
} catch (error) {
|
|
848
|
+
throw new Error(`Failed to apply delta: ${error.message}`);
|
|
849
|
+
}
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
async calculateChangeMetrics(previousSnapshot, currentSnapshot) {
|
|
853
|
+
// Calculate various change metrics between snapshots
|
|
854
|
+
const metrics = {
|
|
855
|
+
sizeDelta: currentSnapshot.metadata.contentLength - previousSnapshot.metadata.contentLength,
|
|
856
|
+
timeDelta: currentSnapshot.metadata.timestamp - previousSnapshot.metadata.timestamp,
|
|
857
|
+
hashChanged: currentSnapshot.metadata.contentHash !== previousSnapshot.metadata.contentHash,
|
|
858
|
+
contentTypeChanged: currentSnapshot.metadata.contentType !== previousSnapshot.metadata.contentType,
|
|
859
|
+
similarity: 0
|
|
860
|
+
};
|
|
861
|
+
|
|
862
|
+
// Calculate content similarity if different hashes
|
|
863
|
+
if (metrics.hashChanged) {
|
|
864
|
+
// This would require loading both snapshots' content
|
|
865
|
+
// For now, estimate based on size difference
|
|
866
|
+
metrics.similarity = 1 - Math.abs(metrics.sizeDelta) / Math.max(
|
|
867
|
+
currentSnapshot.metadata.contentLength,
|
|
868
|
+
previousSnapshot.metadata.contentLength
|
|
869
|
+
);
|
|
870
|
+
} else {
|
|
871
|
+
metrics.similarity = 1.0;
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
return metrics;
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
getSortValue(snapshot, sortBy) {
|
|
878
|
+
switch (sortBy) {
|
|
879
|
+
case 'timestamp':
|
|
880
|
+
return snapshot.metadata.timestamp;
|
|
881
|
+
case 'size':
|
|
882
|
+
return snapshot.metadata.contentLength;
|
|
883
|
+
case 'similarity':
|
|
884
|
+
return snapshot.similarity || 0;
|
|
885
|
+
default:
|
|
886
|
+
return snapshot.metadata.timestamp;
|
|
887
|
+
}
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
updateCache(snapshotId, snapshot) {
|
|
891
|
+
// Simple LRU-like cache management
|
|
892
|
+
if (this.snapshotCache.size >= this.options.cacheSize) {
|
|
893
|
+
const firstKey = this.snapshotCache.keys().next().value;
|
|
894
|
+
this.snapshotCache.delete(firstKey);
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
this.snapshotCache.set(snapshotId, snapshot);
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
async initializeCache() {
|
|
901
|
+
// Pre-load recent snapshots into cache
|
|
902
|
+
const recentSnapshots = await this.querySnapshots({
|
|
903
|
+
limit: Math.min(this.options.cacheSize, 50),
|
|
904
|
+
sortBy: 'timestamp',
|
|
905
|
+
sortOrder: 'desc',
|
|
906
|
+
includeContent: false
|
|
907
|
+
});
|
|
908
|
+
|
|
909
|
+
for (const snapshot of recentSnapshots.snapshots) {
|
|
910
|
+
this.metadataCache.set(snapshot.id, snapshot);
|
|
911
|
+
}
|
|
912
|
+
}
|
|
913
|
+
|
|
914
|
+
updateStorageStats(snapshot, isDelta) {
|
|
915
|
+
this.stats.totalSnapshots++;
|
|
916
|
+
this.stats.totalStorageSize += snapshot.metadata.contentLength;
|
|
917
|
+
|
|
918
|
+
if (isDelta) {
|
|
919
|
+
this.updateDeltaStats(snapshot.delta.deltaSize);
|
|
920
|
+
}
|
|
921
|
+
}
|
|
922
|
+
|
|
923
|
+
updateCompressionStats(ratio) {
|
|
924
|
+
const currentAvg = this.stats.averageCompressionRatio;
|
|
925
|
+
const count = this.stats.compressedSnapshots;
|
|
926
|
+
|
|
927
|
+
this.stats.averageCompressionRatio =
|
|
928
|
+
(currentAvg * (count - 1) + ratio) / count;
|
|
929
|
+
}
|
|
930
|
+
|
|
931
|
+
updateDeltaStats(deltaSize) {
|
|
932
|
+
const currentAvg = this.stats.averageDeltaSize;
|
|
933
|
+
const count = this.stats.deltaSnapshots;
|
|
934
|
+
|
|
935
|
+
this.stats.averageDeltaSize =
|
|
936
|
+
(currentAvg * (count - 1) + deltaSize) / count;
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
async cleanupBySize(targetReduction) {
|
|
940
|
+
// Clean up oldest snapshots to free up space
|
|
941
|
+
const allSnapshots = Array.from(this.metadataCache.values());
|
|
942
|
+
const sorted = allSnapshots.sort((a, b) =>
|
|
943
|
+
a.metadata.timestamp - b.metadata.timestamp
|
|
944
|
+
);
|
|
945
|
+
|
|
946
|
+
let freedSpace = 0;
|
|
947
|
+
const toDelete = [];
|
|
948
|
+
|
|
949
|
+
for (const snapshot of sorted) {
|
|
950
|
+
if (freedSpace >= targetReduction) break;
|
|
951
|
+
|
|
952
|
+
toDelete.push(snapshot.id);
|
|
953
|
+
freedSpace += snapshot.metadata.contentLength;
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
const deleteResult = await this.deleteSnapshots(toDelete);
|
|
957
|
+
|
|
958
|
+
return {
|
|
959
|
+
deletedCount: deleteResult.deleted.length,
|
|
960
|
+
freedSpace: deleteResult.totalSize
|
|
961
|
+
};
|
|
962
|
+
}
|
|
963
|
+
|
|
964
|
+
startCleanupTimer() {
|
|
965
|
+
if (this.cleanupTimer) {
|
|
966
|
+
clearInterval(this.cleanupTimer);
|
|
967
|
+
}
|
|
968
|
+
|
|
969
|
+
this.cleanupTimer = setInterval(async () => {
|
|
970
|
+
try {
|
|
971
|
+
await this.cleanupSnapshots();
|
|
972
|
+
} catch (error) {
|
|
973
|
+
this.emit('error', { operation: 'scheduledCleanup', error: error.message });
|
|
974
|
+
}
|
|
975
|
+
}, this.retentionPolicy.cleanupInterval);
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
stopCleanupTimer() {
|
|
979
|
+
if (this.cleanupTimer) {
|
|
980
|
+
clearInterval(this.cleanupTimer);
|
|
981
|
+
this.cleanupTimer = null;
|
|
982
|
+
}
|
|
983
|
+
}
|
|
984
|
+
|
|
985
|
+
// Public API methods
|
|
986
|
+
|
|
987
|
+
getStats() {
|
|
988
|
+
return {
|
|
989
|
+
...this.stats,
|
|
990
|
+
cacheSize: this.snapshotCache.size,
|
|
991
|
+
metadataCacheSize: this.metadataCache.size,
|
|
992
|
+
activeOperations: this.activeOperations.size,
|
|
993
|
+
averageSnapshotSize: this.stats.totalSnapshots > 0 ?
|
|
994
|
+
this.stats.totalStorageSize / this.stats.totalSnapshots : 0,
|
|
995
|
+
storageEfficiency: {
|
|
996
|
+
compressionRatio: this.stats.averageCompressionRatio,
|
|
997
|
+
deltaRatio: this.stats.averageDeltaSize,
|
|
998
|
+
compressedPercentage: this.stats.totalSnapshots > 0 ?
|
|
999
|
+
(this.stats.compressedSnapshots / this.stats.totalSnapshots) * 100 : 0,
|
|
1000
|
+
deltaPercentage: this.stats.totalSnapshots > 0 ?
|
|
1001
|
+
(this.stats.deltaSnapshots / this.stats.totalSnapshots) * 100 : 0
|
|
1002
|
+
}
|
|
1003
|
+
};
|
|
1004
|
+
}
|
|
1005
|
+
|
|
1006
|
+
getRetentionPolicy() {
|
|
1007
|
+
return { ...this.retentionPolicy };
|
|
1008
|
+
}
|
|
1009
|
+
|
|
1010
|
+
updateRetentionPolicy(newPolicy) {
|
|
1011
|
+
this.retentionPolicy = RetentionPolicySchema.parse({
|
|
1012
|
+
...this.retentionPolicy,
|
|
1013
|
+
...newPolicy
|
|
1014
|
+
});
|
|
1015
|
+
|
|
1016
|
+
this.emit('retentionPolicyUpdated', this.retentionPolicy);
|
|
1017
|
+
}
|
|
1018
|
+
|
|
1019
|
+
async shutdown() {
|
|
1020
|
+
this.stopCleanupTimer();
|
|
1021
|
+
|
|
1022
|
+
// Wait for active operations to complete
|
|
1023
|
+
const maxWaitTime = 30000; // 30 seconds
|
|
1024
|
+
const startTime = Date.now();
|
|
1025
|
+
|
|
1026
|
+
while (this.activeOperations.size > 0 && (Date.now() - startTime) < maxWaitTime) {
|
|
1027
|
+
await new Promise(resolve => setTimeout(resolve, 100));
|
|
1028
|
+
}
|
|
1029
|
+
|
|
1030
|
+
this.emit('shutdown', {
|
|
1031
|
+
pendingOperations: this.activeOperations.size,
|
|
1032
|
+
shutdownTime: Date.now() - startTime
|
|
1033
|
+
});
|
|
1034
|
+
}
|
|
1035
|
+
}
|
|
1036
|
+
|
|
1037
|
+
export default SnapshotManager;
|