rag-lite-ts 1.0.2 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +605 -93
- package/dist/cli/indexer.js +192 -4
- package/dist/cli/search.js +50 -11
- package/dist/cli.js +183 -26
- package/dist/core/abstract-embedder.d.ts +125 -0
- package/dist/core/abstract-embedder.js +264 -0
- package/dist/core/actionable-error-messages.d.ts +60 -0
- package/dist/core/actionable-error-messages.js +397 -0
- package/dist/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/core/batch-processing-optimizer.js +541 -0
- package/dist/core/binary-index-format.d.ts +52 -0
- package/dist/core/binary-index-format.js +122 -0
- package/dist/core/chunker.d.ts +2 -0
- package/dist/core/cli-database-utils.d.ts +53 -0
- package/dist/core/cli-database-utils.js +239 -0
- package/dist/core/config.js +10 -3
- package/dist/core/content-errors.d.ts +111 -0
- package/dist/core/content-errors.js +362 -0
- package/dist/core/content-manager.d.ts +343 -0
- package/dist/core/content-manager.js +1504 -0
- package/dist/core/content-performance-optimizer.d.ts +150 -0
- package/dist/core/content-performance-optimizer.js +516 -0
- package/dist/core/content-resolver.d.ts +104 -0
- package/dist/core/content-resolver.js +285 -0
- package/dist/core/cross-modal-search.d.ts +164 -0
- package/dist/core/cross-modal-search.js +342 -0
- package/dist/core/database-connection-manager.d.ts +109 -0
- package/dist/core/database-connection-manager.js +304 -0
- package/dist/core/db.d.ts +141 -2
- package/dist/core/db.js +631 -89
- package/dist/core/embedder-factory.d.ts +176 -0
- package/dist/core/embedder-factory.js +338 -0
- package/dist/core/index.d.ts +3 -1
- package/dist/core/index.js +4 -1
- package/dist/core/ingestion.d.ts +85 -15
- package/dist/core/ingestion.js +510 -45
- package/dist/core/lazy-dependency-loader.d.ts +152 -0
- package/dist/core/lazy-dependency-loader.js +453 -0
- package/dist/core/mode-detection-service.d.ts +150 -0
- package/dist/core/mode-detection-service.js +565 -0
- package/dist/core/mode-model-validator.d.ts +92 -0
- package/dist/core/mode-model-validator.js +203 -0
- package/dist/core/model-registry.d.ts +120 -0
- package/dist/core/model-registry.js +415 -0
- package/dist/core/model-validator.d.ts +217 -0
- package/dist/core/model-validator.js +782 -0
- package/dist/core/polymorphic-search-factory.d.ts +154 -0
- package/dist/core/polymorphic-search-factory.js +344 -0
- package/dist/core/raglite-paths.d.ts +121 -0
- package/dist/core/raglite-paths.js +145 -0
- package/dist/core/reranking-config.d.ts +42 -0
- package/dist/core/reranking-config.js +156 -0
- package/dist/core/reranking-factory.d.ts +92 -0
- package/dist/core/reranking-factory.js +591 -0
- package/dist/core/reranking-strategies.d.ts +325 -0
- package/dist/core/reranking-strategies.js +720 -0
- package/dist/core/resource-cleanup.d.ts +163 -0
- package/dist/core/resource-cleanup.js +371 -0
- package/dist/core/resource-manager.d.ts +212 -0
- package/dist/core/resource-manager.js +564 -0
- package/dist/core/search.d.ts +28 -1
- package/dist/core/search.js +83 -5
- package/dist/core/streaming-operations.d.ts +145 -0
- package/dist/core/streaming-operations.js +409 -0
- package/dist/core/types.d.ts +3 -0
- package/dist/core/universal-embedder.d.ts +177 -0
- package/dist/core/universal-embedder.js +139 -0
- package/dist/core/validation-messages.d.ts +99 -0
- package/dist/core/validation-messages.js +334 -0
- package/dist/core/vector-index.d.ts +1 -1
- package/dist/core/vector-index.js +37 -39
- package/dist/factories/index.d.ts +3 -1
- package/dist/factories/index.js +2 -0
- package/dist/factories/polymorphic-factory.d.ts +50 -0
- package/dist/factories/polymorphic-factory.js +159 -0
- package/dist/factories/text-factory.d.ts +128 -34
- package/dist/factories/text-factory.js +346 -97
- package/dist/file-processor.d.ts +88 -2
- package/dist/file-processor.js +720 -17
- package/dist/index.d.ts +32 -0
- package/dist/index.js +29 -0
- package/dist/ingestion.d.ts +16 -0
- package/dist/ingestion.js +21 -0
- package/dist/mcp-server.d.ts +35 -3
- package/dist/mcp-server.js +1107 -31
- package/dist/multimodal/clip-embedder.d.ts +327 -0
- package/dist/multimodal/clip-embedder.js +992 -0
- package/dist/multimodal/index.d.ts +6 -0
- package/dist/multimodal/index.js +6 -0
- package/dist/run-error-recovery-tests.d.ts +7 -0
- package/dist/run-error-recovery-tests.js +101 -0
- package/dist/search.d.ts +60 -9
- package/dist/search.js +82 -11
- package/dist/test-utils.d.ts +8 -26
- package/dist/text/chunker.d.ts +1 -0
- package/dist/text/embedder.js +15 -8
- package/dist/text/index.d.ts +1 -0
- package/dist/text/index.js +1 -0
- package/dist/text/reranker.d.ts +1 -2
- package/dist/text/reranker.js +17 -47
- package/dist/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/text/sentence-transformer-embedder.js +340 -0
- package/dist/types.d.ts +39 -0
- package/dist/utils/vector-math.d.ts +31 -0
- package/dist/utils/vector-math.js +70 -0
- package/package.json +27 -6
- package/dist/api-errors.d.ts.map +0 -1
- package/dist/api-errors.js.map +0 -1
- package/dist/cli/indexer.d.ts.map +0 -1
- package/dist/cli/indexer.js.map +0 -1
- package/dist/cli/search.d.ts.map +0 -1
- package/dist/cli/search.js.map +0 -1
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js.map +0 -1
- package/dist/core/adapters.d.ts.map +0 -1
- package/dist/core/adapters.js.map +0 -1
- package/dist/core/chunker.d.ts.map +0 -1
- package/dist/core/chunker.js.map +0 -1
- package/dist/core/config.d.ts.map +0 -1
- package/dist/core/config.js.map +0 -1
- package/dist/core/db.d.ts.map +0 -1
- package/dist/core/db.js.map +0 -1
- package/dist/core/error-handler.d.ts.map +0 -1
- package/dist/core/error-handler.js.map +0 -1
- package/dist/core/index.d.ts.map +0 -1
- package/dist/core/index.js.map +0 -1
- package/dist/core/ingestion.d.ts.map +0 -1
- package/dist/core/ingestion.js.map +0 -1
- package/dist/core/interfaces.d.ts.map +0 -1
- package/dist/core/interfaces.js.map +0 -1
- package/dist/core/path-manager.d.ts.map +0 -1
- package/dist/core/path-manager.js.map +0 -1
- package/dist/core/search-example.d.ts +0 -25
- package/dist/core/search-example.d.ts.map +0 -1
- package/dist/core/search-example.js +0 -138
- package/dist/core/search-example.js.map +0 -1
- package/dist/core/search-pipeline-example.d.ts +0 -21
- package/dist/core/search-pipeline-example.d.ts.map +0 -1
- package/dist/core/search-pipeline-example.js +0 -188
- package/dist/core/search-pipeline-example.js.map +0 -1
- package/dist/core/search-pipeline.d.ts.map +0 -1
- package/dist/core/search-pipeline.js.map +0 -1
- package/dist/core/search.d.ts.map +0 -1
- package/dist/core/search.js.map +0 -1
- package/dist/core/types.d.ts.map +0 -1
- package/dist/core/types.js.map +0 -1
- package/dist/core/vector-index.d.ts.map +0 -1
- package/dist/core/vector-index.js.map +0 -1
- package/dist/dom-polyfills.d.ts.map +0 -1
- package/dist/dom-polyfills.js.map +0 -1
- package/dist/examples/clean-api-examples.d.ts +0 -44
- package/dist/examples/clean-api-examples.d.ts.map +0 -1
- package/dist/examples/clean-api-examples.js +0 -206
- package/dist/examples/clean-api-examples.js.map +0 -1
- package/dist/factories/index.d.ts.map +0 -1
- package/dist/factories/index.js.map +0 -1
- package/dist/factories/text-factory.d.ts.map +0 -1
- package/dist/factories/text-factory.js.map +0 -1
- package/dist/file-processor.d.ts.map +0 -1
- package/dist/file-processor.js.map +0 -1
- package/dist/index-manager.d.ts.map +0 -1
- package/dist/index-manager.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/indexer.d.ts.map +0 -1
- package/dist/indexer.js.map +0 -1
- package/dist/ingestion.d.ts.map +0 -1
- package/dist/ingestion.js.map +0 -1
- package/dist/mcp-server.d.ts.map +0 -1
- package/dist/mcp-server.js.map +0 -1
- package/dist/preprocess.d.ts.map +0 -1
- package/dist/preprocess.js.map +0 -1
- package/dist/preprocessors/index.d.ts.map +0 -1
- package/dist/preprocessors/index.js.map +0 -1
- package/dist/preprocessors/mdx.d.ts.map +0 -1
- package/dist/preprocessors/mdx.js.map +0 -1
- package/dist/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/preprocessors/mermaid.js.map +0 -1
- package/dist/preprocessors/registry.d.ts.map +0 -1
- package/dist/preprocessors/registry.js.map +0 -1
- package/dist/search-standalone.d.ts.map +0 -1
- package/dist/search-standalone.js.map +0 -1
- package/dist/search.d.ts.map +0 -1
- package/dist/search.js.map +0 -1
- package/dist/test-utils.d.ts.map +0 -1
- package/dist/test-utils.js.map +0 -1
- package/dist/text/chunker.d.ts.map +0 -1
- package/dist/text/chunker.js.map +0 -1
- package/dist/text/embedder.d.ts.map +0 -1
- package/dist/text/embedder.js.map +0 -1
- package/dist/text/index.d.ts.map +0 -1
- package/dist/text/index.js.map +0 -1
- package/dist/text/preprocessors/index.d.ts.map +0 -1
- package/dist/text/preprocessors/index.js.map +0 -1
- package/dist/text/preprocessors/mdx.d.ts.map +0 -1
- package/dist/text/preprocessors/mdx.js.map +0 -1
- package/dist/text/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/text/preprocessors/mermaid.js.map +0 -1
- package/dist/text/preprocessors/registry.d.ts.map +0 -1
- package/dist/text/preprocessors/registry.js.map +0 -1
- package/dist/text/reranker.d.ts.map +0 -1
- package/dist/text/reranker.js.map +0 -1
- package/dist/text/tokenizer.d.ts.map +0 -1
- package/dist/text/tokenizer.js.map +0 -1
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js.map +0 -1
|
@@ -0,0 +1,1504 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Content Manager - Handles content ingestion routing for unified content system
|
|
3
|
+
* Routes filesystem content to reference storage and memory content to content directory
|
|
4
|
+
* Implements deduplication and content ID generation
|
|
5
|
+
*/
|
|
6
|
+
import { createHash } from 'crypto';
|
|
7
|
+
import { promises as fs } from 'fs';
|
|
8
|
+
import { join, dirname, extname, basename } from 'path';
|
|
9
|
+
import { insertContentMetadata, getContentMetadataByHash, getStorageStats, updateStorageStats, getContentMetadataByStorageType, deleteContentMetadata } from './db.js';
|
|
10
|
+
import { ContentIngestionError, ContentDirectoryError, StorageLimitExceededError, InvalidContentFormatError, ContentErrorHandler } from './content-errors.js';
|
|
11
|
+
import { globalResourceCleanup, withResourceCleanup, writeFileAtomic, withTimeout, SafeBuffer } from './resource-cleanup.js';
|
|
12
|
+
import { createStreamingOperations, formatBytes, formatProcessingTime } from './streaming-operations.js';
|
|
13
|
+
import { createContentPerformanceOptimizer, formatCacheHitRate } from './content-performance-optimizer.js';
|
|
14
|
+
/**
|
|
15
|
+
* Default configuration
|
|
16
|
+
*/
|
|
17
|
+
const DEFAULT_CONFIG = {
|
|
18
|
+
contentDir: '.raglite/content',
|
|
19
|
+
maxFileSize: 50 * 1024 * 1024, // 50MB
|
|
20
|
+
maxContentDirSize: 2 * 1024 * 1024 * 1024, // 2GB
|
|
21
|
+
enableDeduplication: true,
|
|
22
|
+
enableStorageTracking: true,
|
|
23
|
+
storageWarningThreshold: 75, // Warn at 75% usage
|
|
24
|
+
storageErrorThreshold: 95 // Reject at 95% usage
|
|
25
|
+
};
|
|
26
|
+
/**
|
|
27
|
+
* ContentManager class for handling content ingestion routing
|
|
28
|
+
* Implements the unified content system's ingestion logic
|
|
29
|
+
*/
|
|
30
|
+
export class ContentManager {
|
|
31
|
+
db;
|
|
32
|
+
config;
|
|
33
|
+
streamingOps;
|
|
34
|
+
performanceOptimizer;
|
|
35
|
+
constructor(db, config = {}) {
|
|
36
|
+
this.db = db;
|
|
37
|
+
// Parse and normalize configuration
|
|
38
|
+
const inputConfig = { ...DEFAULT_CONFIG, ...config };
|
|
39
|
+
// Parse size strings to bytes
|
|
40
|
+
const maxFileSize = this.parseSizeToBytes(inputConfig.maxFileSize);
|
|
41
|
+
const maxContentDirSize = this.parseSizeToBytes(inputConfig.maxContentDirSize);
|
|
42
|
+
// Validate thresholds
|
|
43
|
+
if (inputConfig.storageWarningThreshold < 0 || inputConfig.storageWarningThreshold > 100) {
|
|
44
|
+
throw new Error('Storage warning threshold must be between 0 and 100');
|
|
45
|
+
}
|
|
46
|
+
if (inputConfig.storageErrorThreshold < 0 || inputConfig.storageErrorThreshold > 100) {
|
|
47
|
+
throw new Error('Storage error threshold must be between 0 and 100');
|
|
48
|
+
}
|
|
49
|
+
if (inputConfig.storageErrorThreshold <= inputConfig.storageWarningThreshold) {
|
|
50
|
+
throw new Error('Storage error threshold must be greater than warning threshold');
|
|
51
|
+
}
|
|
52
|
+
// Create normalized config
|
|
53
|
+
this.config = {
|
|
54
|
+
contentDir: inputConfig.contentDir,
|
|
55
|
+
maxFileSize,
|
|
56
|
+
maxContentDirSize,
|
|
57
|
+
enableDeduplication: inputConfig.enableDeduplication,
|
|
58
|
+
enableStorageTracking: inputConfig.enableStorageTracking,
|
|
59
|
+
storageWarningThreshold: inputConfig.storageWarningThreshold,
|
|
60
|
+
storageErrorThreshold: inputConfig.storageErrorThreshold
|
|
61
|
+
};
|
|
62
|
+
// Initialize streaming operations with appropriate chunk size based on file size limits
|
|
63
|
+
const chunkSize = Math.floor(Math.min(1024 * 1024, Math.max(64 * 1024, maxFileSize / 100))); // 64KB to 1MB chunks
|
|
64
|
+
this.streamingOps = createStreamingOperations({
|
|
65
|
+
chunkSize,
|
|
66
|
+
enableProgress: false, // Can be enabled for debugging
|
|
67
|
+
enableHashing: true,
|
|
68
|
+
timeout: 300000 // 5 minutes
|
|
69
|
+
});
|
|
70
|
+
// Initialize performance optimizer with optimized settings
|
|
71
|
+
this.performanceOptimizer = createContentPerformanceOptimizer({
|
|
72
|
+
hashCacheSize: 1000,
|
|
73
|
+
hashCacheTTL: 60 * 60 * 1000, // 1 hour
|
|
74
|
+
maxConcurrentOperations: 10,
|
|
75
|
+
batchSize: 50,
|
|
76
|
+
fileBufferSize: chunkSize,
|
|
77
|
+
enableAsyncIO: true,
|
|
78
|
+
enableMetrics: true,
|
|
79
|
+
metricsRetentionTime: 24 * 60 * 60 * 1000 // 24 hours
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Ingests content from filesystem by creating references without copying files
|
|
84
|
+
* @param filePath - Path to the file to ingest
|
|
85
|
+
* @returns Promise that resolves to content ingestion result
|
|
86
|
+
*/
|
|
87
|
+
async ingestFromFilesystem(filePath) {
|
|
88
|
+
// Use resource cleanup with timeout for filesystem operations
|
|
89
|
+
return withResourceCleanup(async (transactionId) => {
|
|
90
|
+
let content = null;
|
|
91
|
+
let safeBuffer = null;
|
|
92
|
+
try {
|
|
93
|
+
// Verify file exists and get stats with timeout
|
|
94
|
+
const stats = await withTimeout(fs.stat(filePath), 10000, // 10 second timeout for file stat
|
|
95
|
+
'File stat operation timed out');
|
|
96
|
+
if (!stats.isFile()) {
|
|
97
|
+
throw new ContentIngestionError('file validation', `Path is not a file: ${filePath}`, 'filesystem_ingestion');
|
|
98
|
+
}
|
|
99
|
+
// Check file size limit
|
|
100
|
+
if (stats.size > this.config.maxFileSize) {
|
|
101
|
+
const sizeMB = Math.round((stats.size / 1024 / 1024) * 100) / 100;
|
|
102
|
+
const limitMB = Math.round((this.config.maxFileSize / 1024 / 1024) * 100) / 100;
|
|
103
|
+
throw new ContentIngestionError('file size validation', `File size (${sizeMB}MB) exceeds maximum allowed size (${limitMB}MB)`, 'filesystem_ingestion');
|
|
104
|
+
}
|
|
105
|
+
// Use optimized hash calculation with caching
|
|
106
|
+
let contentHash;
|
|
107
|
+
if (stats.size > 10 * 1024 * 1024) { // Use streaming for files > 10MB
|
|
108
|
+
contentHash = await withTimeout(this.performanceOptimizer.calculateFileHashOptimized(filePath), 120000, // 2 minute timeout for large file hashing
|
|
109
|
+
'Optimized hash calculation timed out');
|
|
110
|
+
// Log performance metrics for large files
|
|
111
|
+
if (stats.size > 50 * 1024 * 1024) {
|
|
112
|
+
const cacheStats = this.performanceOptimizer.getHashCacheStats();
|
|
113
|
+
console.log(`Optimized hash completed: ${formatBytes(stats.size)} (Cache hit rate: ${formatCacheHitRate(cacheStats.hitRate)})`);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
else {
|
|
117
|
+
// For smaller files, use traditional method with memory management
|
|
118
|
+
content = await withTimeout(fs.readFile(filePath), 60000, // 60 second timeout for file reading
|
|
119
|
+
'File read operation timed out');
|
|
120
|
+
// Create safe buffer for memory management (don't clear original for normal operations)
|
|
121
|
+
safeBuffer = new SafeBuffer(content, { clearOriginal: false });
|
|
122
|
+
globalResourceCleanup.addBuffer(transactionId, safeBuffer.get());
|
|
123
|
+
contentHash = this.generateContentHash(safeBuffer.get());
|
|
124
|
+
}
|
|
125
|
+
// Check for existing content if deduplication is enabled
|
|
126
|
+
if (this.config.enableDeduplication) {
|
|
127
|
+
const existing = await withTimeout(getContentMetadataByHash(this.db, contentHash), 10000, // 10 second timeout for database query
|
|
128
|
+
'Database query for existing content timed out');
|
|
129
|
+
if (existing) {
|
|
130
|
+
return {
|
|
131
|
+
contentId: existing.id,
|
|
132
|
+
wasDeduped: true,
|
|
133
|
+
storageType: existing.storageType,
|
|
134
|
+
contentPath: existing.contentPath
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
// Generate content ID
|
|
139
|
+
const contentId = safeBuffer ? this.generateContentId(safeBuffer.get()) : this.generateContentIdFromHash(contentHash);
|
|
140
|
+
// Detect content type - for streaming case, read small sample for magic number detection
|
|
141
|
+
let contentType;
|
|
142
|
+
if (stats.size > 10 * 1024 * 1024 && !content) {
|
|
143
|
+
// For large files processed with streaming, read small sample for content type detection
|
|
144
|
+
const sampleSize = Math.min(8192, stats.size); // Read first 8KB for magic number detection
|
|
145
|
+
const sample = Buffer.alloc(sampleSize);
|
|
146
|
+
const fd = await fs.open(filePath, 'r');
|
|
147
|
+
try {
|
|
148
|
+
await fd.read(sample, 0, sampleSize, 0);
|
|
149
|
+
contentType = this.detectContentType(filePath, sample);
|
|
150
|
+
}
|
|
151
|
+
finally {
|
|
152
|
+
await fd.close();
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
else {
|
|
156
|
+
contentType = safeBuffer ? this.detectContentType(filePath, safeBuffer.get()) : this.detectContentType(filePath);
|
|
157
|
+
}
|
|
158
|
+
// Validate content type is supported
|
|
159
|
+
const validation = this.validateContentType(contentType);
|
|
160
|
+
if (!validation.isSupported) {
|
|
161
|
+
throw new InvalidContentFormatError(contentType, validation.error, 'filesystem_ingestion');
|
|
162
|
+
}
|
|
163
|
+
// Create content metadata for filesystem reference
|
|
164
|
+
const contentMetadata = {
|
|
165
|
+
id: contentId,
|
|
166
|
+
storageType: 'filesystem',
|
|
167
|
+
originalPath: filePath,
|
|
168
|
+
contentPath: filePath, // For filesystem, content path is the same as original path
|
|
169
|
+
displayName: basename(filePath),
|
|
170
|
+
contentType,
|
|
171
|
+
fileSize: stats.size,
|
|
172
|
+
contentHash
|
|
173
|
+
};
|
|
174
|
+
// Track database entry for cleanup in case of failure
|
|
175
|
+
globalResourceCleanup.addDatabaseEntry(transactionId, this.db, contentId);
|
|
176
|
+
// Insert content metadata with timeout
|
|
177
|
+
await withTimeout(insertContentMetadata(this.db, contentMetadata), 10000, // 10 second timeout for database insertion
|
|
178
|
+
'Database insertion timed out');
|
|
179
|
+
return {
|
|
180
|
+
contentId,
|
|
181
|
+
wasDeduped: false,
|
|
182
|
+
storageType: 'filesystem',
|
|
183
|
+
contentPath: filePath
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
catch (error) {
|
|
187
|
+
if (error instanceof ContentIngestionError || error instanceof InvalidContentFormatError) {
|
|
188
|
+
throw error; // Re-throw content-specific errors
|
|
189
|
+
}
|
|
190
|
+
ContentErrorHandler.handleContentError(error, 'filesystem ingestion', 'ingestFromFilesystem');
|
|
191
|
+
}
|
|
192
|
+
finally {
|
|
193
|
+
// Clear sensitive buffer data
|
|
194
|
+
if (safeBuffer) {
|
|
195
|
+
safeBuffer.clear();
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
}, 90000); // 90 second overall timeout for filesystem operations
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* Ingests content from memory by storing it in content directory with hash-based filenames
|
|
202
|
+
* @param content - Buffer containing the content
|
|
203
|
+
* @param metadata - Memory content metadata
|
|
204
|
+
* @returns Promise that resolves to content ingestion result
|
|
205
|
+
*/
|
|
206
|
+
async ingestFromMemory(content, metadata) {
|
|
207
|
+
// Use resource cleanup with timeout for long-running operations
|
|
208
|
+
return withResourceCleanup(async (transactionId) => {
|
|
209
|
+
// Create safe buffer for memory management (don't clear original for normal operations)
|
|
210
|
+
const safeBuffer = new SafeBuffer(content, { clearOriginal: false });
|
|
211
|
+
globalResourceCleanup.addBuffer(transactionId, safeBuffer.get());
|
|
212
|
+
try {
|
|
213
|
+
// Check content size limit
|
|
214
|
+
if (content.length > this.config.maxFileSize) {
|
|
215
|
+
const sizeMB = Math.round((content.length / 1024 / 1024) * 100) / 100;
|
|
216
|
+
const limitMB = Math.round((this.config.maxFileSize / 1024 / 1024) * 100) / 100;
|
|
217
|
+
throw new ContentIngestionError('content size validation', `Content size (${sizeMB}MB) exceeds maximum allowed size (${limitMB}MB)`, 'memory_ingestion');
|
|
218
|
+
}
|
|
219
|
+
// Enforce storage limits with enhanced error messages and guidance
|
|
220
|
+
await withTimeout(this.enforceStorageLimits(content.length), 30000, // 30 second timeout for storage limit checks
|
|
221
|
+
'Storage limit enforcement timed out');
|
|
222
|
+
// Use optimized hash calculation with caching
|
|
223
|
+
let contentHash;
|
|
224
|
+
// Use optimized hash calculation with caching
|
|
225
|
+
// Don't use a cache key for memory content to ensure proper deduplication
|
|
226
|
+
contentHash = await withTimeout(this.performanceOptimizer.calculateBufferHashOptimized(safeBuffer.get()), 120000, // 2 minute timeout for hash calculation
|
|
227
|
+
'Optimized buffer hash calculation timed out');
|
|
228
|
+
// Log performance metrics for large content
|
|
229
|
+
if (content.length > 50 * 1024 * 1024) {
|
|
230
|
+
const cacheStats = this.performanceOptimizer.getHashCacheStats();
|
|
231
|
+
console.log(`Optimized buffer hash completed: ${formatBytes(content.length)} (Cache hit rate: ${formatCacheHitRate(cacheStats.hitRate)})`);
|
|
232
|
+
}
|
|
233
|
+
// Check for existing content if deduplication is enabled
|
|
234
|
+
if (this.config.enableDeduplication) {
|
|
235
|
+
const existing = await withTimeout(getContentMetadataByHash(this.db, contentHash), 10000, // 10 second timeout for database queries
|
|
236
|
+
'Database query for existing content timed out');
|
|
237
|
+
if (existing) {
|
|
238
|
+
// Content already exists, no cleanup needed
|
|
239
|
+
return {
|
|
240
|
+
contentId: existing.id,
|
|
241
|
+
wasDeduped: true,
|
|
242
|
+
storageType: existing.storageType,
|
|
243
|
+
contentPath: existing.contentPath
|
|
244
|
+
};
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
// Generate content ID
|
|
248
|
+
const contentId = this.generateContentId(safeBuffer.get());
|
|
249
|
+
// Detect content type
|
|
250
|
+
const contentType = metadata.contentType || this.detectContentTypeFromBuffer(safeBuffer.get(), metadata.displayName);
|
|
251
|
+
// Validate content type is supported
|
|
252
|
+
const validation = this.validateContentType(contentType);
|
|
253
|
+
if (!validation.isSupported) {
|
|
254
|
+
throw new InvalidContentFormatError(contentType, validation.error, 'memory_ingestion');
|
|
255
|
+
}
|
|
256
|
+
// Ensure content directory exists
|
|
257
|
+
await withTimeout(this.ensureContentDirectory(), 5000, // 5 second timeout for directory creation
|
|
258
|
+
'Content directory creation timed out');
|
|
259
|
+
// Generate filename with extension based on content type or display name
|
|
260
|
+
const extension = this.getExtensionFromContentType(contentType) ||
|
|
261
|
+
(metadata.displayName ? extname(metadata.displayName) : '.bin');
|
|
262
|
+
const filename = `${contentHash}${extension}`;
|
|
263
|
+
const contentPath = join(this.config.contentDir, filename);
|
|
264
|
+
// Use streaming write for large content to minimize memory usage
|
|
265
|
+
if (content.length > 10 * 1024 * 1024) { // Use streaming for content > 10MB
|
|
266
|
+
const writeResult = await withTimeout(this.streamingOps.writeBufferStreaming(safeBuffer.get(), contentPath), 180000, // 3 minute timeout for large content writing
|
|
267
|
+
'Streaming write operation timed out');
|
|
268
|
+
// Log performance metrics for large content
|
|
269
|
+
if (content.length > 50 * 1024 * 1024) {
|
|
270
|
+
console.log(`Streaming write completed: ${formatBytes(writeResult.bytesWritten)} in ${formatProcessingTime(writeResult.processingTimeMs)}`);
|
|
271
|
+
}
|
|
272
|
+
// Track file for cleanup
|
|
273
|
+
globalResourceCleanup.addTempFile(transactionId, contentPath);
|
|
274
|
+
}
|
|
275
|
+
else {
|
|
276
|
+
// For smaller content, use atomic write with cleanup tracking
|
|
277
|
+
await withTimeout(writeFileAtomic(contentPath, safeBuffer.get(), transactionId), 60000, // 60 second timeout for file writing
|
|
278
|
+
'File write operation timed out');
|
|
279
|
+
}
|
|
280
|
+
// Create content metadata
|
|
281
|
+
const contentMetadata = {
|
|
282
|
+
id: contentId,
|
|
283
|
+
storageType: 'content_dir',
|
|
284
|
+
originalPath: metadata.originalPath,
|
|
285
|
+
contentPath,
|
|
286
|
+
displayName: metadata.displayName,
|
|
287
|
+
contentType,
|
|
288
|
+
fileSize: content.length,
|
|
289
|
+
contentHash
|
|
290
|
+
};
|
|
291
|
+
// Insert content metadata with cleanup tracking
|
|
292
|
+
globalResourceCleanup.addDatabaseEntry(transactionId, this.db, contentId);
|
|
293
|
+
await withTimeout(insertContentMetadata(this.db, contentMetadata), 10000, // 10 second timeout for database insertion
|
|
294
|
+
'Database insertion timed out');
|
|
295
|
+
// Update storage statistics if tracking is enabled
|
|
296
|
+
if (this.config.enableStorageTracking) {
|
|
297
|
+
try {
|
|
298
|
+
await withTimeout(this.updateStorageStats(), 15000, // 15 second timeout for stats update
|
|
299
|
+
'Storage stats update timed out');
|
|
300
|
+
}
|
|
301
|
+
catch (error) {
|
|
302
|
+
// Don't fail the operation if stats update fails
|
|
303
|
+
console.warn('Failed to update storage stats after ingestion:', error);
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
return {
|
|
307
|
+
contentId,
|
|
308
|
+
wasDeduped: false,
|
|
309
|
+
storageType: 'content_dir',
|
|
310
|
+
contentPath
|
|
311
|
+
};
|
|
312
|
+
}
|
|
313
|
+
catch (error) {
|
|
314
|
+
if (error instanceof ContentIngestionError ||
|
|
315
|
+
error instanceof InvalidContentFormatError ||
|
|
316
|
+
error instanceof StorageLimitExceededError) {
|
|
317
|
+
throw error; // Re-throw content-specific errors
|
|
318
|
+
}
|
|
319
|
+
ContentErrorHandler.handleContentError(error, 'memory ingestion', 'ingestFromMemory');
|
|
320
|
+
}
|
|
321
|
+
finally {
|
|
322
|
+
// Clear sensitive buffer data
|
|
323
|
+
safeBuffer.clear();
|
|
324
|
+
}
|
|
325
|
+
}, 120000); // 2 minute overall timeout for the entire operation
|
|
326
|
+
}
|
|
327
|
+
/**
|
|
328
|
+
* Generates a stable content ID using SHA-256 hash of content
|
|
329
|
+
* @param content - Buffer containing the content
|
|
330
|
+
* @returns Content ID string
|
|
331
|
+
*/
|
|
332
|
+
generateContentId(content) {
|
|
333
|
+
return this.generateContentHash(content);
|
|
334
|
+
}
|
|
335
|
+
/**
|
|
336
|
+
* Generates a unique content ID from an existing hash
|
|
337
|
+
* @param hash - Content hash
|
|
338
|
+
* @returns Content ID string
|
|
339
|
+
*/
|
|
340
|
+
generateContentIdFromHash(hash) {
|
|
341
|
+
return hash;
|
|
342
|
+
}
|
|
343
|
+
/**
|
|
344
|
+
* Gets performance statistics for monitoring and optimization
|
|
345
|
+
* @returns Performance statistics
|
|
346
|
+
*/
|
|
347
|
+
getPerformanceStats() {
|
|
348
|
+
const cacheStats = this.performanceOptimizer.getHashCacheStats();
|
|
349
|
+
const operationStats = this.performanceOptimizer.getPerformanceStats();
|
|
350
|
+
return {
|
|
351
|
+
hashCache: cacheStats,
|
|
352
|
+
operations: operationStats
|
|
353
|
+
};
|
|
354
|
+
}
|
|
355
|
+
/**
|
|
356
|
+
* Clears performance caches and resets metrics
|
|
357
|
+
*/
|
|
358
|
+
clearPerformanceCaches() {
|
|
359
|
+
this.performanceOptimizer.clearHashCache();
|
|
360
|
+
}
|
|
361
|
+
/**
|
|
362
|
+
* Checks if content with given ID already exists (deduplication check)
|
|
363
|
+
* @param contentId - Content ID to check
|
|
364
|
+
* @returns Promise that resolves to true if content exists, false otherwise
|
|
365
|
+
*/
|
|
366
|
+
async deduplicateContent(contentId) {
|
|
367
|
+
try {
|
|
368
|
+
const existing = await getContentMetadataByHash(this.db, contentId);
|
|
369
|
+
return existing !== null;
|
|
370
|
+
}
|
|
371
|
+
catch (error) {
|
|
372
|
+
throw new Error(`Failed to check for duplicate content: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
// =============================================================================
|
|
376
|
+
// STORAGE LIMIT ENFORCEMENT METHODS
|
|
377
|
+
// =============================================================================
|
|
378
|
+
/**
|
|
379
|
+
* Enforces storage limits before accepting new content
|
|
380
|
+
* @param contentSize - Size of content to add in bytes
|
|
381
|
+
* @returns Promise that resolves if content can be added, throws error otherwise
|
|
382
|
+
*/
|
|
383
|
+
async enforceStorageLimits(contentSize) {
|
|
384
|
+
if (!this.config.enableStorageTracking) {
|
|
385
|
+
return; // Skip enforcement if tracking is disabled
|
|
386
|
+
}
|
|
387
|
+
try {
|
|
388
|
+
const stats = await this.getStorageStats();
|
|
389
|
+
const currentUsage = stats.contentDirectory.totalSize;
|
|
390
|
+
const projectedUsage = currentUsage + contentSize;
|
|
391
|
+
const maxSize = this.config.maxContentDirSize;
|
|
392
|
+
const currentPercent = (currentUsage / maxSize) * 100;
|
|
393
|
+
const projectedPercent = (projectedUsage / maxSize) * 100;
|
|
394
|
+
// Check if adding content would exceed error threshold
|
|
395
|
+
if (projectedPercent > this.config.storageErrorThreshold) {
|
|
396
|
+
const currentMB = Math.round((currentUsage / 1024 / 1024) * 100) / 100;
|
|
397
|
+
const maxMB = Math.round((maxSize / 1024 / 1024) * 100) / 100;
|
|
398
|
+
const contentMB = Math.round((contentSize / 1024 / 1024) * 100) / 100;
|
|
399
|
+
const remainingMB = Math.round(((maxSize - currentUsage) / 1024 / 1024) * 100) / 100;
|
|
400
|
+
throw new StorageLimitExceededError(currentMB, maxMB, contentMB, 'storage_enforcement');
|
|
401
|
+
}
|
|
402
|
+
// Check if adding content would exceed warning threshold
|
|
403
|
+
if (projectedPercent > this.config.storageWarningThreshold && currentPercent <= this.config.storageWarningThreshold) {
|
|
404
|
+
const currentMB = Math.round((currentUsage / 1024 / 1024) * 100) / 100;
|
|
405
|
+
const maxMB = Math.round((maxSize / 1024 / 1024) * 100) / 100;
|
|
406
|
+
console.warn(`⚠️ Storage Warning: Content directory usage will reach ${Math.round(projectedPercent)}% after adding this content.\n` +
|
|
407
|
+
`Current: ${currentMB}MB / ${maxMB}MB (${Math.round(currentPercent)}%)\n` +
|
|
408
|
+
`Consider running cleanup operations to free space.`);
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
catch (error) {
|
|
412
|
+
if (error instanceof Error && error.message.includes('Storage limit exceeded')) {
|
|
413
|
+
throw error; // Re-throw storage limit errors
|
|
414
|
+
}
|
|
415
|
+
// Log other errors but don't fail the operation
|
|
416
|
+
console.warn('Failed to enforce storage limits:', error);
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
/**
|
|
420
|
+
* Gets storage limit status and recommendations
|
|
421
|
+
* @returns Promise that resolves to storage limit status
|
|
422
|
+
*/
|
|
423
|
+
async getStorageLimitStatus() {
|
|
424
|
+
try {
|
|
425
|
+
const stats = await this.getStorageStats();
|
|
426
|
+
const currentUsage = stats.contentDirectory.totalSize;
|
|
427
|
+
const maxSize = this.config.maxContentDirSize;
|
|
428
|
+
const currentPercent = (currentUsage / maxSize) * 100;
|
|
429
|
+
const isNearWarningThreshold = currentPercent >= this.config.storageWarningThreshold;
|
|
430
|
+
const isNearErrorThreshold = currentPercent >= this.config.storageErrorThreshold;
|
|
431
|
+
const canAcceptContent = currentPercent < this.config.storageErrorThreshold;
|
|
432
|
+
const recommendations = [];
|
|
433
|
+
if (isNearErrorThreshold) {
|
|
434
|
+
recommendations.push('🚨 URGENT: Storage is critically full - new content will be rejected');
|
|
435
|
+
recommendations.push('Run cleanup operations immediately: removeOrphanedFiles() and removeDuplicateContent()');
|
|
436
|
+
recommendations.push('Consider increasing storage limits or removing unused content');
|
|
437
|
+
}
|
|
438
|
+
else if (isNearWarningThreshold) {
|
|
439
|
+
recommendations.push('⚠️ WARNING: Storage is getting full');
|
|
440
|
+
recommendations.push('Consider running cleanup operations: removeOrphanedFiles() and removeDuplicateContent()');
|
|
441
|
+
recommendations.push('Monitor storage usage closely');
|
|
442
|
+
}
|
|
443
|
+
else if (currentPercent > 50) {
|
|
444
|
+
recommendations.push('ℹ️ Storage is over 50% full');
|
|
445
|
+
recommendations.push('Regular cleanup operations recommended');
|
|
446
|
+
}
|
|
447
|
+
else {
|
|
448
|
+
recommendations.push('✅ Storage usage is healthy');
|
|
449
|
+
}
|
|
450
|
+
return {
|
|
451
|
+
currentUsagePercent: Math.round(currentPercent * 100) / 100,
|
|
452
|
+
isNearWarningThreshold,
|
|
453
|
+
isNearErrorThreshold,
|
|
454
|
+
canAcceptContent,
|
|
455
|
+
recommendations,
|
|
456
|
+
limits: {
|
|
457
|
+
warningThreshold: this.config.storageWarningThreshold,
|
|
458
|
+
errorThreshold: this.config.storageErrorThreshold,
|
|
459
|
+
maxSizeMB: Math.round((maxSize / 1024 / 1024) * 100) / 100,
|
|
460
|
+
currentSizeMB: Math.round((currentUsage / 1024 / 1024) * 100) / 100,
|
|
461
|
+
remainingSizeMB: Math.round(((maxSize - currentUsage) / 1024 / 1024) * 100) / 100
|
|
462
|
+
}
|
|
463
|
+
};
|
|
464
|
+
}
|
|
465
|
+
catch (error) {
|
|
466
|
+
throw new Error(`Failed to get storage limit status: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
// =============================================================================
|
|
470
|
+
// PRIVATE METHODS
|
|
471
|
+
// =============================================================================
|
|
472
|
+
/**
|
|
473
|
+
* Parses size string or number to bytes
|
|
474
|
+
* @param size - Size as number (bytes) or string like "50MB", "2GB"
|
|
475
|
+
* @returns Size in bytes
|
|
476
|
+
*/
|
|
477
|
+
parseSizeToBytes(size) {
|
|
478
|
+
if (typeof size === 'number') {
|
|
479
|
+
return size;
|
|
480
|
+
}
|
|
481
|
+
const sizeStr = size.toString().trim().toUpperCase();
|
|
482
|
+
const match = sizeStr.match(/^(\d+(?:\.\d+)?)\s*(B|KB|MB|GB|TB)?$/);
|
|
483
|
+
if (!match) {
|
|
484
|
+
throw new Error(`Invalid size format: ${size}. Use formats like "50MB", "2GB", or number of bytes.`);
|
|
485
|
+
}
|
|
486
|
+
const value = parseFloat(match[1]);
|
|
487
|
+
const unit = match[2] || 'B';
|
|
488
|
+
const multipliers = {
|
|
489
|
+
'B': 1,
|
|
490
|
+
'KB': 1024,
|
|
491
|
+
'MB': 1024 * 1024,
|
|
492
|
+
'GB': 1024 * 1024 * 1024,
|
|
493
|
+
'TB': 1024 * 1024 * 1024 * 1024
|
|
494
|
+
};
|
|
495
|
+
return Math.round(value * multipliers[unit]);
|
|
496
|
+
}
|
|
497
|
+
/**
|
|
498
|
+
* Generates SHA-256 hash of content
|
|
499
|
+
* @param content - Buffer containing the content
|
|
500
|
+
* @returns SHA-256 hash string
|
|
501
|
+
*/
|
|
502
|
+
generateContentHash(content) {
|
|
503
|
+
return createHash('sha256').update(content).digest('hex');
|
|
504
|
+
}
|
|
505
|
+
/**
|
|
506
|
+
* Detects content type from file path and optionally content using enhanced magic number detection
|
|
507
|
+
* @param filePath - Path to the file
|
|
508
|
+
* @param content - File content buffer (optional)
|
|
509
|
+
* @returns MIME type string
|
|
510
|
+
*/
|
|
511
|
+
detectContentType(filePath, content) {
|
|
512
|
+
const extension = extname(filePath).toLowerCase();
|
|
513
|
+
// First try magic number detection for more reliable identification (if content is available)
|
|
514
|
+
if (content) {
|
|
515
|
+
const magicBasedType = this.detectContentTypeByMagicNumbers(content);
|
|
516
|
+
if (magicBasedType !== 'application/octet-stream') {
|
|
517
|
+
return magicBasedType;
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
// Fall back to extension-based detection
|
|
521
|
+
const extensionBasedType = this.detectContentTypeByExtension(extension);
|
|
522
|
+
if (extensionBasedType !== 'application/octet-stream') {
|
|
523
|
+
return extensionBasedType;
|
|
524
|
+
}
|
|
525
|
+
// Final fallback: check if it's text content (if content is available)
|
|
526
|
+
if (content && this.isTextContent(content)) {
|
|
527
|
+
return 'text/plain';
|
|
528
|
+
}
|
|
529
|
+
return 'application/octet-stream';
|
|
530
|
+
}
|
|
531
|
+
/**
|
|
532
|
+
* Detects content type from buffer and optional filename for memory-based ingestion
|
|
533
|
+
* @param content - Content buffer
|
|
534
|
+
* @param filename - Optional filename for extension-based detection
|
|
535
|
+
* @returns MIME type string
|
|
536
|
+
*/
|
|
537
|
+
detectContentTypeFromBuffer(content, filename) {
|
|
538
|
+
// Use filename if provided for more accurate detection
|
|
539
|
+
if (filename) {
|
|
540
|
+
return this.detectContentType(filename, content);
|
|
541
|
+
}
|
|
542
|
+
// Use magic number detection for buffer-only content
|
|
543
|
+
const magicBasedType = this.detectContentTypeByMagicNumbers(content);
|
|
544
|
+
if (magicBasedType !== 'application/octet-stream') {
|
|
545
|
+
return magicBasedType;
|
|
546
|
+
}
|
|
547
|
+
// Final fallback: check if it's text content
|
|
548
|
+
if (this.isTextContent(content)) {
|
|
549
|
+
return 'text/plain';
|
|
550
|
+
}
|
|
551
|
+
return 'application/octet-stream';
|
|
552
|
+
}
|
|
553
|
+
/**
|
|
554
|
+
* Enhanced magic number detection for comprehensive content type identification
|
|
555
|
+
* @param content - Content buffer to analyze
|
|
556
|
+
* @returns MIME type string based on magic numbers, or 'application/octet-stream' if unknown
|
|
557
|
+
*/
|
|
558
|
+
detectContentTypeByMagicNumbers(content) {
|
|
559
|
+
if (content.length === 0) {
|
|
560
|
+
return 'application/octet-stream';
|
|
561
|
+
}
|
|
562
|
+
// Get enough bytes for magic number detection
|
|
563
|
+
const magicBytes = content.subarray(0, Math.min(32, content.length));
|
|
564
|
+
// PDF - %PDF
|
|
565
|
+
if (magicBytes.length >= 4 && magicBytes.subarray(0, 4).toString() === '%PDF') {
|
|
566
|
+
return 'application/pdf';
|
|
567
|
+
}
|
|
568
|
+
// PNG - 89 50 4E 47 0D 0A 1A 0A
|
|
569
|
+
if (magicBytes.length >= 8 &&
|
|
570
|
+
magicBytes.subarray(0, 8).equals(Buffer.from([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]))) {
|
|
571
|
+
return 'image/png';
|
|
572
|
+
}
|
|
573
|
+
// JPEG - FF D8 FF
|
|
574
|
+
if (magicBytes.length >= 3 &&
|
|
575
|
+
magicBytes.subarray(0, 3).equals(Buffer.from([0xFF, 0xD8, 0xFF]))) {
|
|
576
|
+
return 'image/jpeg';
|
|
577
|
+
}
|
|
578
|
+
// GIF87a or GIF89a
|
|
579
|
+
if (magicBytes.length >= 6) {
|
|
580
|
+
const gifHeader = magicBytes.subarray(0, 6).toString();
|
|
581
|
+
if (gifHeader === 'GIF87a' || gifHeader === 'GIF89a') {
|
|
582
|
+
return 'image/gif';
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
// WebP - RIFF....WEBP
|
|
586
|
+
if (magicBytes.length >= 12 &&
|
|
587
|
+
magicBytes.subarray(0, 4).toString() === 'RIFF' &&
|
|
588
|
+
magicBytes.subarray(8, 12).toString() === 'WEBP') {
|
|
589
|
+
return 'image/webp';
|
|
590
|
+
}
|
|
591
|
+
// ZIP-based formats (DOCX, XLSX, etc.) - 50 4B 03 04 or 50 4B 05 06 or 50 4B 07 08
|
|
592
|
+
if (magicBytes.length >= 4) {
|
|
593
|
+
const zipMagic = magicBytes.subarray(0, 4);
|
|
594
|
+
if (zipMagic.equals(Buffer.from([0x50, 0x4B, 0x03, 0x04])) ||
|
|
595
|
+
zipMagic.equals(Buffer.from([0x50, 0x4B, 0x05, 0x06])) ||
|
|
596
|
+
zipMagic.equals(Buffer.from([0x50, 0x4B, 0x07, 0x08]))) {
|
|
597
|
+
// For ZIP files, we need more context to determine the specific type
|
|
598
|
+
// This is a generic ZIP file, specific detection would require filename
|
|
599
|
+
return 'application/zip';
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
// BMP - 42 4D
|
|
603
|
+
if (magicBytes.length >= 2 &&
|
|
604
|
+
magicBytes.subarray(0, 2).equals(Buffer.from([0x42, 0x4D]))) {
|
|
605
|
+
return 'image/bmp';
|
|
606
|
+
}
|
|
607
|
+
// TIFF - 49 49 2A 00 (little endian) or 4D 4D 00 2A (big endian)
|
|
608
|
+
if (magicBytes.length >= 4) {
|
|
609
|
+
const tiffLE = Buffer.from([0x49, 0x49, 0x2A, 0x00]);
|
|
610
|
+
const tiffBE = Buffer.from([0x4D, 0x4D, 0x00, 0x2A]);
|
|
611
|
+
if (magicBytes.subarray(0, 4).equals(tiffLE) || magicBytes.subarray(0, 4).equals(tiffBE)) {
|
|
612
|
+
return 'image/tiff';
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
// ICO - 00 00 01 00
|
|
616
|
+
if (magicBytes.length >= 4 &&
|
|
617
|
+
magicBytes.subarray(0, 4).equals(Buffer.from([0x00, 0x00, 0x01, 0x00]))) {
|
|
618
|
+
return 'image/x-icon';
|
|
619
|
+
}
|
|
620
|
+
// SVG - Check for XML declaration and SVG tag
|
|
621
|
+
if (magicBytes.length >= 5) {
|
|
622
|
+
const start = magicBytes.toString('utf8', 0, Math.min(100, magicBytes.length)).toLowerCase();
|
|
623
|
+
if (start.includes('<svg') || (start.includes('<?xml') && start.includes('<svg'))) {
|
|
624
|
+
return 'image/svg+xml';
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
// HTML - Check for HTML tags
|
|
628
|
+
if (magicBytes.length >= 5) {
|
|
629
|
+
const start = magicBytes.toString('utf8', 0, Math.min(100, magicBytes.length)).toLowerCase();
|
|
630
|
+
if (start.includes('<!doctype html') || start.includes('<html') || start.includes('<head')) {
|
|
631
|
+
return 'text/html';
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
// XML - Check for XML declaration
|
|
635
|
+
if (magicBytes.length >= 5) {
|
|
636
|
+
const start = magicBytes.toString('utf8', 0, Math.min(50, magicBytes.length)).toLowerCase();
|
|
637
|
+
if (start.startsWith('<?xml')) {
|
|
638
|
+
return 'application/xml';
|
|
639
|
+
}
|
|
640
|
+
}
|
|
641
|
+
// JSON - Check for JSON structure (basic heuristic)
|
|
642
|
+
if (magicBytes.length >= 2) {
|
|
643
|
+
const start = magicBytes.toString('utf8', 0, Math.min(10, magicBytes.length)).trim();
|
|
644
|
+
if (start.startsWith('{') || start.startsWith('[')) {
|
|
645
|
+
// Additional validation to ensure it's likely JSON
|
|
646
|
+
try {
|
|
647
|
+
const sample = content.toString('utf8', 0, Math.min(1024, content.length));
|
|
648
|
+
JSON.parse(sample);
|
|
649
|
+
return 'application/json';
|
|
650
|
+
}
|
|
651
|
+
catch {
|
|
652
|
+
// Not valid JSON, continue with other detection
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
return 'application/octet-stream';
|
|
657
|
+
}
|
|
658
|
+
/**
|
|
659
|
+
* Extension-based content type detection with comprehensive mapping
|
|
660
|
+
* @param extension - File extension (with or without dot)
|
|
661
|
+
* @returns MIME type string based on extension, or 'application/octet-stream' if unknown
|
|
662
|
+
*/
|
|
663
|
+
detectContentTypeByExtension(extension) {
|
|
664
|
+
const ext = extension.toLowerCase().startsWith('.') ? extension.toLowerCase() : `.${extension.toLowerCase()}`;
|
|
665
|
+
// Text formats
|
|
666
|
+
switch (ext) {
|
|
667
|
+
case '.txt':
|
|
668
|
+
case '.text':
|
|
669
|
+
return 'text/plain';
|
|
670
|
+
case '.md':
|
|
671
|
+
case '.markdown':
|
|
672
|
+
case '.mdown':
|
|
673
|
+
return 'text/markdown';
|
|
674
|
+
case '.html':
|
|
675
|
+
case '.htm':
|
|
676
|
+
return 'text/html';
|
|
677
|
+
case '.css':
|
|
678
|
+
return 'text/css';
|
|
679
|
+
case '.js':
|
|
680
|
+
case '.mjs':
|
|
681
|
+
return 'application/javascript';
|
|
682
|
+
case '.json':
|
|
683
|
+
return 'application/json';
|
|
684
|
+
case '.xml':
|
|
685
|
+
return 'application/xml';
|
|
686
|
+
case '.csv':
|
|
687
|
+
return 'text/csv';
|
|
688
|
+
case '.rtf':
|
|
689
|
+
return 'application/rtf';
|
|
690
|
+
// Document formats
|
|
691
|
+
case '.pdf':
|
|
692
|
+
return 'application/pdf';
|
|
693
|
+
case '.doc':
|
|
694
|
+
return 'application/msword';
|
|
695
|
+
case '.docx':
|
|
696
|
+
return 'application/vnd.openxmlformats-officedocument.wordprocessingml.document';
|
|
697
|
+
case '.xls':
|
|
698
|
+
return 'application/vnd.ms-excel';
|
|
699
|
+
case '.xlsx':
|
|
700
|
+
return 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet';
|
|
701
|
+
case '.ppt':
|
|
702
|
+
return 'application/vnd.ms-powerpoint';
|
|
703
|
+
case '.pptx':
|
|
704
|
+
return 'application/vnd.openxmlformats-officedocument.presentationml.presentation';
|
|
705
|
+
case '.odt':
|
|
706
|
+
return 'application/vnd.oasis.opendocument.text';
|
|
707
|
+
case '.ods':
|
|
708
|
+
return 'application/vnd.oasis.opendocument.spreadsheet';
|
|
709
|
+
case '.odp':
|
|
710
|
+
return 'application/vnd.oasis.opendocument.presentation';
|
|
711
|
+
// Image formats
|
|
712
|
+
case '.jpg':
|
|
713
|
+
case '.jpeg':
|
|
714
|
+
return 'image/jpeg';
|
|
715
|
+
case '.png':
|
|
716
|
+
return 'image/png';
|
|
717
|
+
case '.gif':
|
|
718
|
+
return 'image/gif';
|
|
719
|
+
case '.webp':
|
|
720
|
+
return 'image/webp';
|
|
721
|
+
case '.bmp':
|
|
722
|
+
return 'image/bmp';
|
|
723
|
+
case '.tiff':
|
|
724
|
+
case '.tif':
|
|
725
|
+
return 'image/tiff';
|
|
726
|
+
case '.ico':
|
|
727
|
+
return 'image/x-icon';
|
|
728
|
+
case '.svg':
|
|
729
|
+
return 'image/svg+xml';
|
|
730
|
+
case '.avif':
|
|
731
|
+
return 'image/avif';
|
|
732
|
+
case '.heic':
|
|
733
|
+
case '.heif':
|
|
734
|
+
return 'image/heic';
|
|
735
|
+
// Archive formats
|
|
736
|
+
case '.zip':
|
|
737
|
+
return 'application/zip';
|
|
738
|
+
case '.rar':
|
|
739
|
+
return 'application/vnd.rar';
|
|
740
|
+
case '.7z':
|
|
741
|
+
return 'application/x-7z-compressed';
|
|
742
|
+
case '.tar':
|
|
743
|
+
return 'application/x-tar';
|
|
744
|
+
case '.gz':
|
|
745
|
+
return 'application/gzip';
|
|
746
|
+
// Audio formats
|
|
747
|
+
case '.mp3':
|
|
748
|
+
return 'audio/mpeg';
|
|
749
|
+
case '.wav':
|
|
750
|
+
return 'audio/wav';
|
|
751
|
+
case '.ogg':
|
|
752
|
+
return 'audio/ogg';
|
|
753
|
+
case '.flac':
|
|
754
|
+
return 'audio/flac';
|
|
755
|
+
// Video formats
|
|
756
|
+
case '.mp4':
|
|
757
|
+
return 'video/mp4';
|
|
758
|
+
case '.avi':
|
|
759
|
+
return 'video/x-msvideo';
|
|
760
|
+
case '.mov':
|
|
761
|
+
return 'video/quicktime';
|
|
762
|
+
case '.webm':
|
|
763
|
+
return 'video/webm';
|
|
764
|
+
default:
|
|
765
|
+
return 'application/octet-stream';
|
|
766
|
+
}
|
|
767
|
+
}
|
|
768
|
+
/**
|
|
769
|
+
* Validates if a content type is supported for processing
|
|
770
|
+
* @param contentType - MIME type to validate
|
|
771
|
+
* @returns Object with validation result and error message if unsupported
|
|
772
|
+
*/
|
|
773
|
+
validateContentType(contentType) {
|
|
774
|
+
// Define supported content types for RAG-lite processing
|
|
775
|
+
const supportedTypes = new Set([
|
|
776
|
+
// Text formats (fully supported)
|
|
777
|
+
'text/plain',
|
|
778
|
+
'text/markdown',
|
|
779
|
+
'text/html',
|
|
780
|
+
'text/css',
|
|
781
|
+
'text/csv',
|
|
782
|
+
'application/json',
|
|
783
|
+
'application/xml',
|
|
784
|
+
'application/javascript',
|
|
785
|
+
'application/rtf',
|
|
786
|
+
// Document formats (supported via preprocessing)
|
|
787
|
+
'application/pdf',
|
|
788
|
+
'application/msword',
|
|
789
|
+
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
790
|
+
'application/vnd.ms-excel',
|
|
791
|
+
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
792
|
+
'application/vnd.ms-powerpoint',
|
|
793
|
+
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
|
794
|
+
'application/vnd.oasis.opendocument.text',
|
|
795
|
+
'application/vnd.oasis.opendocument.spreadsheet',
|
|
796
|
+
'application/vnd.oasis.opendocument.presentation',
|
|
797
|
+
// Image formats (supported via multimodal processing)
|
|
798
|
+
'image/jpeg',
|
|
799
|
+
'image/png',
|
|
800
|
+
'image/gif',
|
|
801
|
+
'image/webp',
|
|
802
|
+
'image/bmp',
|
|
803
|
+
'image/tiff',
|
|
804
|
+
'image/svg+xml',
|
|
805
|
+
'image/avif',
|
|
806
|
+
'image/heic',
|
|
807
|
+
// Generic binary (accepted but limited processing)
|
|
808
|
+
'application/octet-stream',
|
|
809
|
+
'application/zip' // May contain supported documents
|
|
810
|
+
]);
|
|
811
|
+
if (supportedTypes.has(contentType)) {
|
|
812
|
+
return { isSupported: true };
|
|
813
|
+
}
|
|
814
|
+
// Provide specific guidance for unsupported types
|
|
815
|
+
const category = contentType.split('/')[0];
|
|
816
|
+
let error = `Unsupported content type: ${contentType}. `;
|
|
817
|
+
switch (category) {
|
|
818
|
+
case 'audio':
|
|
819
|
+
error += 'Audio files are not supported for text-based RAG processing. Consider extracting transcripts or metadata.';
|
|
820
|
+
break;
|
|
821
|
+
case 'video':
|
|
822
|
+
error += 'Video files are not supported for text-based RAG processing. Consider extracting transcripts, subtitles, or metadata.';
|
|
823
|
+
break;
|
|
824
|
+
case 'application':
|
|
825
|
+
if (contentType.includes('executable') || contentType.includes('binary')) {
|
|
826
|
+
error += 'Executable and binary application files are not supported for security and processing reasons.';
|
|
827
|
+
}
|
|
828
|
+
else {
|
|
829
|
+
error += 'This application format is not currently supported. Supported formats include PDF, Office documents, and common text formats.';
|
|
830
|
+
}
|
|
831
|
+
break;
|
|
832
|
+
default:
|
|
833
|
+
error += `The ${category} content type is not supported. Supported types include text, documents (PDF, DOCX), and images.`;
|
|
834
|
+
}
|
|
835
|
+
return { isSupported: false, error };
|
|
836
|
+
}
|
|
837
|
+
/**
|
|
838
|
+
* Gets file extension from content type with enhanced mapping
|
|
839
|
+
* @param contentType - MIME type
|
|
840
|
+
* @returns File extension with dot, or null if unknown
|
|
841
|
+
*/
|
|
842
|
+
getExtensionFromContentType(contentType) {
|
|
843
|
+
switch (contentType) {
|
|
844
|
+
// Text formats
|
|
845
|
+
case 'text/plain':
|
|
846
|
+
return '.txt';
|
|
847
|
+
case 'text/markdown':
|
|
848
|
+
return '.md';
|
|
849
|
+
case 'text/html':
|
|
850
|
+
return '.html';
|
|
851
|
+
case 'text/css':
|
|
852
|
+
return '.css';
|
|
853
|
+
case 'text/csv':
|
|
854
|
+
return '.csv';
|
|
855
|
+
case 'application/json':
|
|
856
|
+
return '.json';
|
|
857
|
+
case 'application/xml':
|
|
858
|
+
return '.xml';
|
|
859
|
+
case 'application/javascript':
|
|
860
|
+
return '.js';
|
|
861
|
+
case 'application/rtf':
|
|
862
|
+
return '.rtf';
|
|
863
|
+
// Document formats
|
|
864
|
+
case 'application/pdf':
|
|
865
|
+
return '.pdf';
|
|
866
|
+
case 'application/msword':
|
|
867
|
+
return '.doc';
|
|
868
|
+
case 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
|
|
869
|
+
return '.docx';
|
|
870
|
+
case 'application/vnd.ms-excel':
|
|
871
|
+
return '.xls';
|
|
872
|
+
case 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
|
|
873
|
+
return '.xlsx';
|
|
874
|
+
case 'application/vnd.ms-powerpoint':
|
|
875
|
+
return '.ppt';
|
|
876
|
+
case 'application/vnd.openxmlformats-officedocument.presentationml.presentation':
|
|
877
|
+
return '.pptx';
|
|
878
|
+
case 'application/vnd.oasis.opendocument.text':
|
|
879
|
+
return '.odt';
|
|
880
|
+
case 'application/vnd.oasis.opendocument.spreadsheet':
|
|
881
|
+
return '.ods';
|
|
882
|
+
case 'application/vnd.oasis.opendocument.presentation':
|
|
883
|
+
return '.odp';
|
|
884
|
+
// Image formats
|
|
885
|
+
case 'image/jpeg':
|
|
886
|
+
return '.jpg';
|
|
887
|
+
case 'image/png':
|
|
888
|
+
return '.png';
|
|
889
|
+
case 'image/gif':
|
|
890
|
+
return '.gif';
|
|
891
|
+
case 'image/webp':
|
|
892
|
+
return '.webp';
|
|
893
|
+
case 'image/bmp':
|
|
894
|
+
return '.bmp';
|
|
895
|
+
case 'image/tiff':
|
|
896
|
+
return '.tiff';
|
|
897
|
+
case 'image/x-icon':
|
|
898
|
+
return '.ico';
|
|
899
|
+
case 'image/svg+xml':
|
|
900
|
+
return '.svg';
|
|
901
|
+
case 'image/avif':
|
|
902
|
+
return '.avif';
|
|
903
|
+
case 'image/heic':
|
|
904
|
+
return '.heic';
|
|
905
|
+
// Archive formats
|
|
906
|
+
case 'application/zip':
|
|
907
|
+
return '.zip';
|
|
908
|
+
case 'application/vnd.rar':
|
|
909
|
+
return '.rar';
|
|
910
|
+
case 'application/x-7z-compressed':
|
|
911
|
+
return '.7z';
|
|
912
|
+
case 'application/x-tar':
|
|
913
|
+
return '.tar';
|
|
914
|
+
case 'application/gzip':
|
|
915
|
+
return '.gz';
|
|
916
|
+
default:
|
|
917
|
+
return '.bin'; // Generic binary extension for unknown types
|
|
918
|
+
}
|
|
919
|
+
}
|
|
920
|
+
/**
|
|
921
|
+
* Enhanced text content detection with better UTF-8 and encoding support
|
|
922
|
+
* @param content - Content buffer
|
|
923
|
+
* @returns True if content appears to be text
|
|
924
|
+
*/
|
|
925
|
+
isTextContent(content) {
|
|
926
|
+
if (content.length === 0) {
|
|
927
|
+
return true; // Empty content is considered text
|
|
928
|
+
}
|
|
929
|
+
// Check first 2KB for better accuracy
|
|
930
|
+
const sample = content.subarray(0, Math.min(2048, content.length));
|
|
931
|
+
let nonTextBytes = 0;
|
|
932
|
+
let totalBytes = sample.length;
|
|
933
|
+
// Skip UTF-8 BOM if present
|
|
934
|
+
let startIndex = 0;
|
|
935
|
+
if (sample.length >= 3 &&
|
|
936
|
+
sample[0] === 0xEF && sample[1] === 0xBB && sample[2] === 0xBF) {
|
|
937
|
+
startIndex = 3;
|
|
938
|
+
}
|
|
939
|
+
// Skip UTF-16 BOM if present
|
|
940
|
+
if (sample.length >= 2 &&
|
|
941
|
+
((sample[0] === 0xFF && sample[1] === 0xFE) ||
|
|
942
|
+
(sample[0] === 0xFE && sample[1] === 0xFF))) {
|
|
943
|
+
startIndex = 2;
|
|
944
|
+
}
|
|
945
|
+
for (let i = startIndex; i < sample.length; i++) {
|
|
946
|
+
const byte = sample[i];
|
|
947
|
+
// Allow common control characters
|
|
948
|
+
if (byte === 9 || byte === 10 || byte === 13) { // Tab, LF, CR
|
|
949
|
+
continue;
|
|
950
|
+
}
|
|
951
|
+
// Allow printable ASCII (32-126)
|
|
952
|
+
if (byte >= 32 && byte <= 126) {
|
|
953
|
+
continue;
|
|
954
|
+
}
|
|
955
|
+
// Allow extended ASCII and UTF-8 continuation bytes
|
|
956
|
+
if (byte >= 128) {
|
|
957
|
+
// Check if this is part of a valid UTF-8 sequence
|
|
958
|
+
if (this.isValidUTF8Byte(sample, i)) {
|
|
959
|
+
continue;
|
|
960
|
+
}
|
|
961
|
+
}
|
|
962
|
+
// Count non-text bytes
|
|
963
|
+
nonTextBytes++;
|
|
964
|
+
}
|
|
965
|
+
// Consider it text if less than 5% of bytes are non-text
|
|
966
|
+
const nonTextRatio = nonTextBytes / totalBytes;
|
|
967
|
+
return nonTextRatio < 0.05;
|
|
968
|
+
}
|
|
969
|
+
/**
|
|
970
|
+
* Checks if a byte at given position is part of a valid UTF-8 sequence
|
|
971
|
+
* @param buffer - Buffer to check
|
|
972
|
+
* @param index - Index of the byte to check
|
|
973
|
+
* @returns True if the byte is part of valid UTF-8
|
|
974
|
+
*/
|
|
975
|
+
isValidUTF8Byte(buffer, index) {
|
|
976
|
+
const byte = buffer[index];
|
|
977
|
+
// UTF-8 continuation byte (10xxxxxx)
|
|
978
|
+
if ((byte & 0xC0) === 0x80) {
|
|
979
|
+
return true;
|
|
980
|
+
}
|
|
981
|
+
// UTF-8 start bytes
|
|
982
|
+
if ((byte & 0xE0) === 0xC0) { // 110xxxxx - 2-byte sequence
|
|
983
|
+
return index + 1 < buffer.length && (buffer[index + 1] & 0xC0) === 0x80;
|
|
984
|
+
}
|
|
985
|
+
if ((byte & 0xF0) === 0xE0) { // 1110xxxx - 3-byte sequence
|
|
986
|
+
return index + 2 < buffer.length &&
|
|
987
|
+
(buffer[index + 1] & 0xC0) === 0x80 &&
|
|
988
|
+
(buffer[index + 2] & 0xC0) === 0x80;
|
|
989
|
+
}
|
|
990
|
+
if ((byte & 0xF8) === 0xF0) { // 11110xxx - 4-byte sequence
|
|
991
|
+
return index + 3 < buffer.length &&
|
|
992
|
+
(buffer[index + 1] & 0xC0) === 0x80 &&
|
|
993
|
+
(buffer[index + 2] & 0xC0) === 0x80 &&
|
|
994
|
+
(buffer[index + 3] & 0xC0) === 0x80;
|
|
995
|
+
}
|
|
996
|
+
// Extended ASCII (128-255) - allow but consider less reliable
|
|
997
|
+
return byte >= 128 && byte <= 255;
|
|
998
|
+
}
|
|
999
|
+
/**
|
|
1000
|
+
* Ensures content directory exists
|
|
1001
|
+
* @returns Promise that resolves when directory is created
|
|
1002
|
+
*/
|
|
1003
|
+
async ensureContentDirectory() {
|
|
1004
|
+
try {
|
|
1005
|
+
await fs.mkdir(this.config.contentDir, { recursive: true });
|
|
1006
|
+
}
|
|
1007
|
+
catch (error) {
|
|
1008
|
+
throw new Error(`Failed to create content directory: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1009
|
+
}
|
|
1010
|
+
}
|
|
1011
|
+
/**
|
|
1012
|
+
* Writes content to file atomically to prevent partial writes
|
|
1013
|
+
* @param filePath - Path to write to
|
|
1014
|
+
* @param content - Content to write
|
|
1015
|
+
* @returns Promise that resolves when write is complete
|
|
1016
|
+
* @deprecated Use writeFileAtomic from resource-cleanup.ts for better resource management
|
|
1017
|
+
*/
|
|
1018
|
+
async writeContentAtomic(filePath, content) {
|
|
1019
|
+
const tempPath = `${filePath}.tmp.${Date.now()}`;
|
|
1020
|
+
try {
|
|
1021
|
+
// Ensure directory exists
|
|
1022
|
+
await fs.mkdir(dirname(filePath), { recursive: true });
|
|
1023
|
+
// Write to temporary file first
|
|
1024
|
+
await fs.writeFile(tempPath, content);
|
|
1025
|
+
// Atomically move to final location
|
|
1026
|
+
await fs.rename(tempPath, filePath);
|
|
1027
|
+
}
|
|
1028
|
+
catch (error) {
|
|
1029
|
+
// Clean up temporary file if it exists
|
|
1030
|
+
try {
|
|
1031
|
+
await fs.unlink(tempPath);
|
|
1032
|
+
}
|
|
1033
|
+
catch {
|
|
1034
|
+
// Ignore cleanup errors
|
|
1035
|
+
}
|
|
1036
|
+
throw new ContentDirectoryError('atomic write', `Failed to write content atomically: ${error instanceof Error ? error.message : 'Unknown error'}`, 'file_write');
|
|
1037
|
+
}
|
|
1038
|
+
}
|
|
1039
|
+
// =============================================================================
|
|
1040
|
+
// CONTENT DIRECTORY MANAGEMENT METHODS
|
|
1041
|
+
// =============================================================================
|
|
1042
|
+
/**
|
|
1043
|
+
* Gets comprehensive storage statistics for monitoring and reporting
|
|
1044
|
+
* @returns Promise that resolves to detailed storage statistics
|
|
1045
|
+
*/
|
|
1046
|
+
async getStorageStats() {
|
|
1047
|
+
try {
|
|
1048
|
+
const dbStats = await getStorageStats(this.db);
|
|
1049
|
+
if (!dbStats) {
|
|
1050
|
+
// Initialize stats if they don't exist
|
|
1051
|
+
await this.updateStorageStats();
|
|
1052
|
+
return this.getStorageStats(); // Recursive call after initialization
|
|
1053
|
+
}
|
|
1054
|
+
// Calculate filesystem references total size
|
|
1055
|
+
const filesystemContent = await getContentMetadataByStorageType(this.db, 'filesystem');
|
|
1056
|
+
const filesystemTotalSize = filesystemContent.reduce((sum, meta) => sum + meta.fileSize, 0);
|
|
1057
|
+
// Calculate derived statistics
|
|
1058
|
+
const contentDirSizeMB = Math.round((dbStats.contentDirSize / 1024 / 1024) * 100) / 100;
|
|
1059
|
+
const filesystemSizeMB = Math.round((filesystemTotalSize / 1024 / 1024) * 100) / 100;
|
|
1060
|
+
const maxSizeMB = Math.round((this.config.maxContentDirSize / 1024 / 1024) * 100) / 100;
|
|
1061
|
+
const averageFileSize = dbStats.contentDirFiles > 0
|
|
1062
|
+
? Math.round(dbStats.contentDirSize / dbStats.contentDirFiles)
|
|
1063
|
+
: 0;
|
|
1064
|
+
const totalContentItems = dbStats.contentDirFiles + dbStats.filesystemRefs;
|
|
1065
|
+
const totalStorageUsed = dbStats.contentDirSize + filesystemTotalSize;
|
|
1066
|
+
const totalStorageUsedMB = Math.round((totalStorageUsed / 1024 / 1024) * 100) / 100;
|
|
1067
|
+
const currentUsagePercent = this.config.maxContentDirSize > 0
|
|
1068
|
+
? Math.round((dbStats.contentDirSize / this.config.maxContentDirSize) * 10000) / 100
|
|
1069
|
+
: 0;
|
|
1070
|
+
const remainingSpace = Math.max(0, this.config.maxContentDirSize - dbStats.contentDirSize);
|
|
1071
|
+
const remainingSpaceMB = Math.round((remainingSpace / 1024 / 1024) * 100) / 100;
|
|
1072
|
+
// Calculate storage efficiency (how much space saved by deduplication)
|
|
1073
|
+
// This is a rough estimate based on the assumption that without deduplication,
|
|
1074
|
+
// we might have more duplicate files
|
|
1075
|
+
const storageEfficiency = totalContentItems > 0
|
|
1076
|
+
? Math.round((totalContentItems / Math.max(1, totalContentItems)) * 100)
|
|
1077
|
+
: 100;
|
|
1078
|
+
return {
|
|
1079
|
+
contentDirectory: {
|
|
1080
|
+
totalFiles: dbStats.contentDirFiles,
|
|
1081
|
+
totalSize: dbStats.contentDirSize,
|
|
1082
|
+
totalSizeMB: contentDirSizeMB,
|
|
1083
|
+
averageFileSize
|
|
1084
|
+
},
|
|
1085
|
+
filesystemReferences: {
|
|
1086
|
+
totalRefs: dbStats.filesystemRefs,
|
|
1087
|
+
totalSize: filesystemTotalSize,
|
|
1088
|
+
totalSizeMB: filesystemSizeMB
|
|
1089
|
+
},
|
|
1090
|
+
overall: {
|
|
1091
|
+
totalContentItems,
|
|
1092
|
+
totalStorageUsed,
|
|
1093
|
+
totalStorageUsedMB,
|
|
1094
|
+
storageEfficiency
|
|
1095
|
+
},
|
|
1096
|
+
limits: {
|
|
1097
|
+
maxContentDirSize: this.config.maxContentDirSize,
|
|
1098
|
+
maxContentDirSizeMB: maxSizeMB,
|
|
1099
|
+
currentUsagePercent,
|
|
1100
|
+
remainingSpace,
|
|
1101
|
+
remainingSpaceMB
|
|
1102
|
+
},
|
|
1103
|
+
lastUpdated: new Date(),
|
|
1104
|
+
lastCleanup: dbStats.lastCleanup
|
|
1105
|
+
};
|
|
1106
|
+
}
|
|
1107
|
+
catch (error) {
|
|
1108
|
+
throw new Error(`Failed to get storage statistics: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1109
|
+
}
|
|
1110
|
+
}
|
|
1111
|
+
/**
|
|
1112
|
+
* Gets current storage statistics for the content directory (legacy method)
|
|
1113
|
+
* @returns Promise that resolves to storage statistics
|
|
1114
|
+
* @deprecated Use getStorageStats() for more comprehensive statistics
|
|
1115
|
+
*/
|
|
1116
|
+
async getContentDirectoryStats() {
|
|
1117
|
+
try {
|
|
1118
|
+
const stats = await getStorageStats(this.db);
|
|
1119
|
+
if (!stats) {
|
|
1120
|
+
// Initialize stats if they don't exist
|
|
1121
|
+
await this.updateStorageStats();
|
|
1122
|
+
return {
|
|
1123
|
+
totalFiles: 0,
|
|
1124
|
+
totalSize: 0,
|
|
1125
|
+
filesystemRefs: 0,
|
|
1126
|
+
lastCleanup: null
|
|
1127
|
+
};
|
|
1128
|
+
}
|
|
1129
|
+
return {
|
|
1130
|
+
totalFiles: stats.contentDirFiles,
|
|
1131
|
+
totalSize: stats.contentDirSize,
|
|
1132
|
+
filesystemRefs: stats.filesystemRefs,
|
|
1133
|
+
lastCleanup: stats.lastCleanup
|
|
1134
|
+
};
|
|
1135
|
+
}
|
|
1136
|
+
catch (error) {
|
|
1137
|
+
throw new Error(`Failed to get content directory stats: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1138
|
+
}
|
|
1139
|
+
}
|
|
1140
|
+
/**
|
|
1141
|
+
* Generates a simple, human-readable storage usage report
|
|
1142
|
+
* @returns Promise that resolves to formatted storage report
|
|
1143
|
+
*/
|
|
1144
|
+
async generateStorageReport() {
|
|
1145
|
+
try {
|
|
1146
|
+
const stats = await this.getStorageStats();
|
|
1147
|
+
const report = [
|
|
1148
|
+
'=== RAG-lite Content Storage Report ===',
|
|
1149
|
+
'',
|
|
1150
|
+
'Content Directory:',
|
|
1151
|
+
` Files: ${stats.contentDirectory.totalFiles}`,
|
|
1152
|
+
` Size: ${stats.contentDirectory.totalSizeMB} MB`,
|
|
1153
|
+
` Average file size: ${Math.round(stats.contentDirectory.averageFileSize / 1024)} KB`,
|
|
1154
|
+
'',
|
|
1155
|
+
'Filesystem References:',
|
|
1156
|
+
` References: ${stats.filesystemReferences.totalRefs}`,
|
|
1157
|
+
` Total size: ${stats.filesystemReferences.totalSizeMB} MB`,
|
|
1158
|
+
'',
|
|
1159
|
+
'Overall Usage:',
|
|
1160
|
+
` Total content items: ${stats.overall.totalContentItems}`,
|
|
1161
|
+
` Total storage used: ${stats.overall.totalStorageUsedMB} MB`,
|
|
1162
|
+
` Storage efficiency: ${stats.overall.storageEfficiency}%`,
|
|
1163
|
+
'',
|
|
1164
|
+
'Storage Limits:',
|
|
1165
|
+
` Content directory limit: ${stats.limits.maxContentDirSizeMB} MB`,
|
|
1166
|
+
` Current usage: ${stats.limits.currentUsagePercent}%`,
|
|
1167
|
+
` Remaining space: ${stats.limits.remainingSpaceMB} MB`,
|
|
1168
|
+
'',
|
|
1169
|
+
'Maintenance:',
|
|
1170
|
+
` Last updated: ${stats.lastUpdated.toISOString()}`,
|
|
1171
|
+
` Last cleanup: ${stats.lastCleanup ? stats.lastCleanup.toISOString() : 'Never'}`,
|
|
1172
|
+
''
|
|
1173
|
+
];
|
|
1174
|
+
// Add warnings if needed
|
|
1175
|
+
if (stats.limits.currentUsagePercent > 90) {
|
|
1176
|
+
report.push('⚠️ WARNING: Content directory is over 90% full!');
|
|
1177
|
+
report.push(' Consider running cleanup operations to free space.');
|
|
1178
|
+
report.push('');
|
|
1179
|
+
}
|
|
1180
|
+
else if (stats.limits.currentUsagePercent > 75) {
|
|
1181
|
+
report.push('⚠️ NOTICE: Content directory is over 75% full.');
|
|
1182
|
+
report.push(' You may want to run cleanup operations soon.');
|
|
1183
|
+
report.push('');
|
|
1184
|
+
}
|
|
1185
|
+
return report.join('\n');
|
|
1186
|
+
}
|
|
1187
|
+
catch (error) {
|
|
1188
|
+
throw new Error(`Failed to generate storage report: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1189
|
+
}
|
|
1190
|
+
}
|
|
1191
|
+
/**
|
|
1192
|
+
* Gets storage statistics in a format suitable for monitoring systems
|
|
1193
|
+
* @returns Promise that resolves to monitoring-friendly statistics
|
|
1194
|
+
*/
|
|
1195
|
+
async getStorageMetrics() {
|
|
1196
|
+
try {
|
|
1197
|
+
const stats = await this.getStorageStats();
|
|
1198
|
+
return {
|
|
1199
|
+
contentDirFiles: stats.contentDirectory.totalFiles,
|
|
1200
|
+
contentDirSizeBytes: stats.contentDirectory.totalSize,
|
|
1201
|
+
contentDirSizeMB: stats.contentDirectory.totalSizeMB,
|
|
1202
|
+
filesystemRefs: stats.filesystemReferences.totalRefs,
|
|
1203
|
+
filesystemSizeBytes: stats.filesystemReferences.totalSize,
|
|
1204
|
+
filesystemSizeMB: stats.filesystemReferences.totalSizeMB,
|
|
1205
|
+
totalContentItems: stats.overall.totalContentItems,
|
|
1206
|
+
totalStorageBytes: stats.overall.totalStorageUsed,
|
|
1207
|
+
totalStorageMB: stats.overall.totalStorageUsedMB,
|
|
1208
|
+
usagePercent: stats.limits.currentUsagePercent,
|
|
1209
|
+
remainingBytes: stats.limits.remainingSpace,
|
|
1210
|
+
remainingMB: stats.limits.remainingSpaceMB,
|
|
1211
|
+
lastCleanupTimestamp: stats.lastCleanup ? stats.lastCleanup.getTime() : null,
|
|
1212
|
+
lastUpdatedTimestamp: stats.lastUpdated.getTime()
|
|
1213
|
+
};
|
|
1214
|
+
}
|
|
1215
|
+
catch (error) {
|
|
1216
|
+
throw new Error(`Failed to get storage metrics: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1217
|
+
}
|
|
1218
|
+
}
|
|
1219
|
+
/**
|
|
1220
|
+
* Updates storage statistics by scanning the content directory
|
|
1221
|
+
* @returns Promise that resolves when stats are updated
|
|
1222
|
+
*/
|
|
1223
|
+
async updateStorageStats() {
|
|
1224
|
+
try {
|
|
1225
|
+
let contentDirFiles = 0;
|
|
1226
|
+
let contentDirSize = 0;
|
|
1227
|
+
let filesystemRefs = 0;
|
|
1228
|
+
// Count content directory files and size
|
|
1229
|
+
try {
|
|
1230
|
+
const contentDirContents = await fs.readdir(this.config.contentDir);
|
|
1231
|
+
for (const filename of contentDirContents) {
|
|
1232
|
+
const filePath = join(this.config.contentDir, filename);
|
|
1233
|
+
try {
|
|
1234
|
+
const stats = await fs.stat(filePath);
|
|
1235
|
+
if (stats.isFile()) {
|
|
1236
|
+
contentDirFiles++;
|
|
1237
|
+
contentDirSize += stats.size;
|
|
1238
|
+
}
|
|
1239
|
+
}
|
|
1240
|
+
catch {
|
|
1241
|
+
// Skip files that can't be accessed
|
|
1242
|
+
}
|
|
1243
|
+
}
|
|
1244
|
+
}
|
|
1245
|
+
catch {
|
|
1246
|
+
// Content directory doesn't exist or can't be read
|
|
1247
|
+
contentDirFiles = 0;
|
|
1248
|
+
contentDirSize = 0;
|
|
1249
|
+
}
|
|
1250
|
+
// Count filesystem references
|
|
1251
|
+
const filesystemContent = await getContentMetadataByStorageType(this.db, 'filesystem');
|
|
1252
|
+
filesystemRefs = filesystemContent.length;
|
|
1253
|
+
// Update database stats
|
|
1254
|
+
await updateStorageStats(this.db, {
|
|
1255
|
+
contentDirFiles,
|
|
1256
|
+
contentDirSize,
|
|
1257
|
+
filesystemRefs
|
|
1258
|
+
});
|
|
1259
|
+
}
|
|
1260
|
+
catch (error) {
|
|
1261
|
+
throw new Error(`Failed to update storage stats: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1262
|
+
}
|
|
1263
|
+
}
|
|
1264
|
+
/**
|
|
1265
|
+
* Checks if adding new content would exceed storage limits (legacy method)
|
|
1266
|
+
* @param contentSize - Size of content to add
|
|
1267
|
+
* @returns Promise that resolves to true if within limits, false otherwise
|
|
1268
|
+
* @deprecated Use enforceStorageLimits() for better error handling and guidance
|
|
1269
|
+
*/
|
|
1270
|
+
async checkStorageLimits(contentSize) {
|
|
1271
|
+
try {
|
|
1272
|
+
const stats = await this.getContentDirectoryStats();
|
|
1273
|
+
return (stats.totalSize + contentSize) <= this.config.maxContentDirSize;
|
|
1274
|
+
}
|
|
1275
|
+
catch (error) {
|
|
1276
|
+
// If we can't get stats, allow the operation but log the error
|
|
1277
|
+
console.warn('Failed to check storage limits:', error);
|
|
1278
|
+
return true;
|
|
1279
|
+
}
|
|
1280
|
+
}
|
|
1281
|
+
/**
|
|
1282
|
+
* Removes orphaned files that exist in content directory but have no metadata references
|
|
1283
|
+
* @returns Promise that resolves to cleanup results
|
|
1284
|
+
*/
|
|
1285
|
+
async removeOrphanedFiles() {
|
|
1286
|
+
return this.cleanupOrphanedFiles();
|
|
1287
|
+
}
|
|
1288
|
+
/**
|
|
1289
|
+
* Removes duplicate content files based on content hash, keeping the first occurrence
|
|
1290
|
+
* @returns Promise that resolves to deduplication results
|
|
1291
|
+
*/
|
|
1292
|
+
async removeDuplicateContent() {
|
|
1293
|
+
return this.deduplicateContentFiles();
|
|
1294
|
+
}
|
|
1295
|
+
/**
|
|
1296
|
+
* Cleans up orphaned files in the content directory
|
|
1297
|
+
* Removes files that exist in the directory but have no corresponding metadata
|
|
1298
|
+
* @returns Promise that resolves to cleanup results
|
|
1299
|
+
*/
|
|
1300
|
+
async cleanupOrphanedFiles() {
|
|
1301
|
+
const removedFiles = [];
|
|
1302
|
+
const errors = [];
|
|
1303
|
+
let freedSpace = 0;
|
|
1304
|
+
try {
|
|
1305
|
+
// Ensure content directory exists
|
|
1306
|
+
await this.ensureContentDirectory();
|
|
1307
|
+
// Get all content metadata for content_dir storage
|
|
1308
|
+
const contentMetadata = await getContentMetadataByStorageType(this.db, 'content_dir');
|
|
1309
|
+
const validPaths = new Set(contentMetadata.map(meta => meta.contentPath));
|
|
1310
|
+
// Scan content directory for files
|
|
1311
|
+
const contentDirContents = await fs.readdir(this.config.contentDir);
|
|
1312
|
+
for (const filename of contentDirContents) {
|
|
1313
|
+
const filePath = join(this.config.contentDir, filename);
|
|
1314
|
+
try {
|
|
1315
|
+
const stats = await fs.stat(filePath);
|
|
1316
|
+
if (stats.isFile() && !validPaths.has(filePath)) {
|
|
1317
|
+
// This file is orphaned - remove it
|
|
1318
|
+
await fs.unlink(filePath);
|
|
1319
|
+
removedFiles.push(filename);
|
|
1320
|
+
freedSpace += stats.size;
|
|
1321
|
+
}
|
|
1322
|
+
}
|
|
1323
|
+
catch (error) {
|
|
1324
|
+
errors.push(`Failed to process ${filename}: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1325
|
+
}
|
|
1326
|
+
}
|
|
1327
|
+
// Update storage stats after cleanup
|
|
1328
|
+
if (removedFiles.length > 0) {
|
|
1329
|
+
await this.updateStorageStats();
|
|
1330
|
+
// Update last cleanup time
|
|
1331
|
+
await updateStorageStats(this.db, {
|
|
1332
|
+
lastCleanup: new Date()
|
|
1333
|
+
});
|
|
1334
|
+
}
|
|
1335
|
+
return { removedFiles, errors, freedSpace };
|
|
1336
|
+
}
|
|
1337
|
+
catch (error) {
|
|
1338
|
+
throw new Error(`Failed to cleanup orphaned files: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1339
|
+
}
|
|
1340
|
+
}
|
|
1341
|
+
/**
|
|
1342
|
+
* Removes duplicate content files based on content hash
|
|
1343
|
+
* Keeps the first occurrence and removes duplicates
|
|
1344
|
+
* @returns Promise that resolves to deduplication results
|
|
1345
|
+
*/
|
|
1346
|
+
async deduplicateContentFiles() {
|
|
1347
|
+
const removedFiles = [];
|
|
1348
|
+
const errors = [];
|
|
1349
|
+
let freedSpace = 0;
|
|
1350
|
+
try {
|
|
1351
|
+
// Get all content metadata for content_dir storage
|
|
1352
|
+
const contentMetadata = await getContentMetadataByStorageType(this.db, 'content_dir');
|
|
1353
|
+
// Group by content hash
|
|
1354
|
+
const hashGroups = new Map();
|
|
1355
|
+
for (const metadata of contentMetadata) {
|
|
1356
|
+
const hash = metadata.contentHash;
|
|
1357
|
+
if (!hashGroups.has(hash)) {
|
|
1358
|
+
hashGroups.set(hash, []);
|
|
1359
|
+
}
|
|
1360
|
+
hashGroups.get(hash).push(metadata);
|
|
1361
|
+
}
|
|
1362
|
+
// Process groups with duplicates
|
|
1363
|
+
for (const [hash, group] of hashGroups) {
|
|
1364
|
+
if (group.length > 1) {
|
|
1365
|
+
// Keep the first one, remove the rest
|
|
1366
|
+
const [keep, ...remove] = group.sort((a, b) => a.createdAt.getTime() - b.createdAt.getTime());
|
|
1367
|
+
for (const duplicate of remove) {
|
|
1368
|
+
try {
|
|
1369
|
+
// Remove file
|
|
1370
|
+
const stats = await fs.stat(duplicate.contentPath);
|
|
1371
|
+
await fs.unlink(duplicate.contentPath);
|
|
1372
|
+
// Remove metadata
|
|
1373
|
+
await deleteContentMetadata(this.db, duplicate.id);
|
|
1374
|
+
removedFiles.push(basename(duplicate.contentPath));
|
|
1375
|
+
freedSpace += stats.size;
|
|
1376
|
+
}
|
|
1377
|
+
catch (error) {
|
|
1378
|
+
errors.push(`Failed to remove duplicate ${duplicate.id}: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1379
|
+
}
|
|
1380
|
+
}
|
|
1381
|
+
}
|
|
1382
|
+
}
|
|
1383
|
+
// Update storage stats after deduplication
|
|
1384
|
+
if (removedFiles.length > 0) {
|
|
1385
|
+
await this.updateStorageStats();
|
|
1386
|
+
}
|
|
1387
|
+
return { removedFiles, errors, freedSpace };
|
|
1388
|
+
}
|
|
1389
|
+
catch (error) {
|
|
1390
|
+
throw new Error(`Failed to deduplicate content files: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1391
|
+
}
|
|
1392
|
+
}
|
|
1393
|
+
/**
|
|
1394
|
+
* Ensures content directory has proper permissions
|
|
1395
|
+
* @returns Promise that resolves when permissions are set
|
|
1396
|
+
*/
|
|
1397
|
+
async ensureContentDirectoryPermissions() {
|
|
1398
|
+
try {
|
|
1399
|
+
await this.ensureContentDirectory();
|
|
1400
|
+
// Set directory permissions to 755 (owner: rwx, group: rx, others: rx)
|
|
1401
|
+
await fs.chmod(this.config.contentDir, 0o755);
|
|
1402
|
+
}
|
|
1403
|
+
catch (error) {
|
|
1404
|
+
throw new Error(`Failed to set content directory permissions: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1405
|
+
}
|
|
1406
|
+
}
|
|
1407
|
+
/**
|
|
1408
|
+
* Validates content directory structure and repairs if needed
|
|
1409
|
+
* @returns Promise that resolves to validation results
|
|
1410
|
+
*/
|
|
1411
|
+
async validateAndRepairContentDirectory() {
|
|
1412
|
+
const issues = [];
|
|
1413
|
+
const repaired = [];
|
|
1414
|
+
try {
|
|
1415
|
+
// Check if content directory exists
|
|
1416
|
+
try {
|
|
1417
|
+
const stats = await fs.stat(this.config.contentDir);
|
|
1418
|
+
if (!stats.isDirectory()) {
|
|
1419
|
+
issues.push('Content path exists but is not a directory');
|
|
1420
|
+
}
|
|
1421
|
+
}
|
|
1422
|
+
catch {
|
|
1423
|
+
// Directory doesn't exist - create it
|
|
1424
|
+
await this.ensureContentDirectory();
|
|
1425
|
+
repaired.push('Created missing content directory');
|
|
1426
|
+
}
|
|
1427
|
+
// Check permissions
|
|
1428
|
+
try {
|
|
1429
|
+
await fs.access(this.config.contentDir, fs.constants.R_OK | fs.constants.W_OK);
|
|
1430
|
+
}
|
|
1431
|
+
catch {
|
|
1432
|
+
issues.push('Content directory is not readable/writable');
|
|
1433
|
+
try {
|
|
1434
|
+
await this.ensureContentDirectoryPermissions();
|
|
1435
|
+
repaired.push('Fixed content directory permissions');
|
|
1436
|
+
}
|
|
1437
|
+
catch {
|
|
1438
|
+
issues.push('Failed to fix content directory permissions');
|
|
1439
|
+
}
|
|
1440
|
+
}
|
|
1441
|
+
// Validate storage stats consistency
|
|
1442
|
+
try {
|
|
1443
|
+
const dbStats = await getStorageStats(this.db);
|
|
1444
|
+
const actualStats = await this.getActualDirectoryStats();
|
|
1445
|
+
if (!dbStats ||
|
|
1446
|
+
dbStats.contentDirFiles !== actualStats.files ||
|
|
1447
|
+
Math.abs(dbStats.contentDirSize - actualStats.size) > 1024) { // Allow 1KB tolerance
|
|
1448
|
+
await this.updateStorageStats();
|
|
1449
|
+
repaired.push('Updated inconsistent storage statistics');
|
|
1450
|
+
}
|
|
1451
|
+
}
|
|
1452
|
+
catch (error) {
|
|
1453
|
+
issues.push(`Failed to validate storage stats: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1454
|
+
}
|
|
1455
|
+
return {
|
|
1456
|
+
isValid: issues.length === 0,
|
|
1457
|
+
issues,
|
|
1458
|
+
repaired
|
|
1459
|
+
};
|
|
1460
|
+
}
|
|
1461
|
+
catch (error) {
|
|
1462
|
+
throw new Error(`Failed to validate content directory: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
1463
|
+
}
|
|
1464
|
+
}
|
|
1465
|
+
/**
|
|
1466
|
+
* Gets actual directory statistics by scanning the filesystem
|
|
1467
|
+
* @returns Promise that resolves to actual directory stats
|
|
1468
|
+
*/
|
|
1469
|
+
async getActualDirectoryStats() {
|
|
1470
|
+
let files = 0;
|
|
1471
|
+
let size = 0;
|
|
1472
|
+
try {
|
|
1473
|
+
const contentDirContents = await fs.readdir(this.config.contentDir);
|
|
1474
|
+
for (const filename of contentDirContents) {
|
|
1475
|
+
const filePath = join(this.config.contentDir, filename);
|
|
1476
|
+
try {
|
|
1477
|
+
const stats = await fs.stat(filePath);
|
|
1478
|
+
if (stats.isFile()) {
|
|
1479
|
+
files++;
|
|
1480
|
+
size += stats.size;
|
|
1481
|
+
}
|
|
1482
|
+
}
|
|
1483
|
+
catch {
|
|
1484
|
+
// Skip files that can't be accessed
|
|
1485
|
+
}
|
|
1486
|
+
}
|
|
1487
|
+
}
|
|
1488
|
+
catch {
|
|
1489
|
+
// Directory doesn't exist or can't be read
|
|
1490
|
+
}
|
|
1491
|
+
return { files, size };
|
|
1492
|
+
}
|
|
1493
|
+
/**
|
|
1494
|
+
* Cleanup resources to prevent memory leaks and hanging processes
|
|
1495
|
+
* Should be called when ContentManager is no longer needed
|
|
1496
|
+
*/
|
|
1497
|
+
cleanup() {
|
|
1498
|
+
// Clean up performance optimizer interval that prevents process exit
|
|
1499
|
+
if (this.performanceOptimizer && typeof this.performanceOptimizer.cleanup === 'function') {
|
|
1500
|
+
this.performanceOptimizer.cleanup();
|
|
1501
|
+
}
|
|
1502
|
+
}
|
|
1503
|
+
}
|
|
1504
|
+
//# sourceMappingURL=content-manager.js.map
|