@soulcraft/brainy 4.10.2 → 4.10.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/brainy.js +50 -14
- package/dist/import/ImportCoordinator.js +243 -173
- package/dist/storage/adapters/azureBlobStorage.d.ts +15 -1
- package/dist/storage/adapters/azureBlobStorage.js +25 -0
- package/dist/storage/adapters/baseStorageAdapter.d.ts +13 -0
- package/dist/storage/adapters/baseStorageAdapter.js +26 -0
- package/dist/storage/adapters/fileSystemStorage.d.ts +17 -1
- package/dist/storage/adapters/fileSystemStorage.js +111 -16
- package/dist/storage/adapters/gcsStorage.d.ts +16 -1
- package/dist/storage/adapters/gcsStorage.js +26 -0
- package/dist/storage/adapters/memoryStorage.d.ts +14 -1
- package/dist/storage/adapters/memoryStorage.js +24 -0
- package/dist/storage/adapters/opfsStorage.d.ts +14 -1
- package/dist/storage/adapters/opfsStorage.js +24 -0
- package/dist/storage/adapters/r2Storage.d.ts +18 -1
- package/dist/storage/adapters/r2Storage.js +28 -0
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +15 -1
- package/dist/storage/adapters/s3CompatibleStorage.js +25 -0
- package/dist/storage/baseStorage.d.ts +24 -0
- package/dist/utils/adaptiveBackpressure.d.ts +17 -10
- package/dist/utils/adaptiveBackpressure.js +98 -48
- package/package.json +1 -1
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
* Provides common functionality for all storage adapters, including statistics tracking
|
|
4
4
|
*/
|
|
5
5
|
import { StatisticsData, StorageAdapter, HNSWNoun, HNSWVerb, HNSWNounWithMetadata, HNSWVerbWithMetadata, NounMetadata, VerbMetadata } from '../../coreTypes.js';
|
|
6
|
+
import { StorageBatchConfig } from '../baseStorage.js';
|
|
6
7
|
/**
|
|
7
8
|
* Base class for storage adapters that implements statistics tracking
|
|
8
9
|
*/
|
|
@@ -50,6 +51,18 @@ export declare abstract class BaseStorageAdapter implements StorageAdapter {
|
|
|
50
51
|
quota: number | null;
|
|
51
52
|
details?: Record<string, any>;
|
|
52
53
|
}>;
|
|
54
|
+
/**
|
|
55
|
+
* Get optimal batch configuration for this storage adapter
|
|
56
|
+
* Override in subclasses to provide storage-specific optimization
|
|
57
|
+
*
|
|
58
|
+
* This method allows each storage adapter to declare its optimal batch behavior
|
|
59
|
+
* for rate limiting and performance. The configuration is used by addMany(),
|
|
60
|
+
* relateMany(), and import operations to automatically adapt to storage capabilities.
|
|
61
|
+
*
|
|
62
|
+
* @returns Batch configuration optimized for this storage type
|
|
63
|
+
* @since v4.11.0
|
|
64
|
+
*/
|
|
65
|
+
getBatchConfig(): StorageBatchConfig;
|
|
53
66
|
/**
|
|
54
67
|
* Get nouns with pagination and filtering
|
|
55
68
|
* @param options Pagination and filtering options
|
|
@@ -60,6 +60,32 @@ export class BaseStorageAdapter {
|
|
|
60
60
|
this.countPersistBatchSize = 10; // Operations before forcing persist (cloud storage)
|
|
61
61
|
this.countPersistInterval = 5000; // Milliseconds before forcing persist (cloud storage)
|
|
62
62
|
}
|
|
63
|
+
/**
|
|
64
|
+
* Get optimal batch configuration for this storage adapter
|
|
65
|
+
* Override in subclasses to provide storage-specific optimization
|
|
66
|
+
*
|
|
67
|
+
* This method allows each storage adapter to declare its optimal batch behavior
|
|
68
|
+
* for rate limiting and performance. The configuration is used by addMany(),
|
|
69
|
+
* relateMany(), and import operations to automatically adapt to storage capabilities.
|
|
70
|
+
*
|
|
71
|
+
* @returns Batch configuration optimized for this storage type
|
|
72
|
+
* @since v4.11.0
|
|
73
|
+
*/
|
|
74
|
+
getBatchConfig() {
|
|
75
|
+
// Conservative defaults that work safely across all storage types
|
|
76
|
+
// Cloud storage adapters should override with higher throughput values
|
|
77
|
+
// Local storage adapters should override with no delays
|
|
78
|
+
return {
|
|
79
|
+
maxBatchSize: 50,
|
|
80
|
+
batchDelayMs: 100,
|
|
81
|
+
maxConcurrent: 50,
|
|
82
|
+
supportsParallelWrites: false,
|
|
83
|
+
rateLimit: {
|
|
84
|
+
operationsPerSecond: 100,
|
|
85
|
+
burstCapacity: 200
|
|
86
|
+
}
|
|
87
|
+
};
|
|
88
|
+
}
|
|
63
89
|
/**
|
|
64
90
|
* Save statistics data
|
|
65
91
|
* @param statistics The statistics data to save
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* File system storage adapter for Node.js environments
|
|
4
4
|
*/
|
|
5
5
|
import { HNSWNoun, HNSWVerb, HNSWNounWithMetadata, HNSWVerbWithMetadata, StatisticsData } from '../../coreTypes.js';
|
|
6
|
-
import { BaseStorage } from '../baseStorage.js';
|
|
6
|
+
import { BaseStorage, StorageBatchConfig } from '../baseStorage.js';
|
|
7
7
|
type HNSWNode = HNSWNoun;
|
|
8
8
|
type Edge = HNSWVerb;
|
|
9
9
|
/**
|
|
@@ -39,6 +39,19 @@ export declare class FileSystemStorage extends BaseStorage {
|
|
|
39
39
|
compression?: boolean;
|
|
40
40
|
compressionLevel?: number;
|
|
41
41
|
});
|
|
42
|
+
/**
|
|
43
|
+
* Get FileSystem-optimized batch configuration
|
|
44
|
+
*
|
|
45
|
+
* File system storage is I/O bound but not rate limited:
|
|
46
|
+
* - Large batch sizes (500 items)
|
|
47
|
+
* - No delays needed (0ms)
|
|
48
|
+
* - Moderate concurrency (100 operations) - limited by I/O threads
|
|
49
|
+
* - Parallel processing supported
|
|
50
|
+
*
|
|
51
|
+
* @returns FileSystem-optimized batch configuration
|
|
52
|
+
* @since v4.11.0
|
|
53
|
+
*/
|
|
54
|
+
getBatchConfig(): StorageBatchConfig;
|
|
42
55
|
/**
|
|
43
56
|
* Initialize the storage adapter
|
|
44
57
|
*/
|
|
@@ -53,6 +66,7 @@ export declare class FileSystemStorage extends BaseStorage {
|
|
|
53
66
|
private ensureDirectoryExists;
|
|
54
67
|
/**
|
|
55
68
|
* Save a node to storage
|
|
69
|
+
* CRITICAL FIX (v4.10.3): Added atomic write pattern to prevent file corruption during concurrent imports
|
|
56
70
|
*/
|
|
57
71
|
protected saveNode(node: HNSWNode): Promise<void>;
|
|
58
72
|
/**
|
|
@@ -78,6 +92,7 @@ export declare class FileSystemStorage extends BaseStorage {
|
|
|
78
92
|
protected deleteNode(id: string): Promise<void>;
|
|
79
93
|
/**
|
|
80
94
|
* Save an edge to storage
|
|
95
|
+
* CRITICAL FIX (v4.10.3): Added atomic write pattern to prevent file corruption during concurrent imports
|
|
81
96
|
*/
|
|
82
97
|
protected saveEdge(edge: Edge): Promise<void>;
|
|
83
98
|
/**
|
|
@@ -110,6 +125,7 @@ export declare class FileSystemStorage extends BaseStorage {
|
|
|
110
125
|
* Primitive operation: Write object to path
|
|
111
126
|
* All metadata operations use this internally via base class routing
|
|
112
127
|
* v4.0.0: Supports gzip compression for 60-80% disk savings
|
|
128
|
+
* CRITICAL FIX (v4.10.3): Added atomic write pattern to prevent file corruption during concurrent imports
|
|
113
129
|
*/
|
|
114
130
|
protected writeObjectToPath(pathStr: string, data: any): Promise<void>;
|
|
115
131
|
/**
|
|
@@ -70,6 +70,30 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
70
70
|
}
|
|
71
71
|
// Defer path operations until init() when path module is guaranteed to be loaded
|
|
72
72
|
}
|
|
73
|
+
/**
|
|
74
|
+
* Get FileSystem-optimized batch configuration
|
|
75
|
+
*
|
|
76
|
+
* File system storage is I/O bound but not rate limited:
|
|
77
|
+
* - Large batch sizes (500 items)
|
|
78
|
+
* - No delays needed (0ms)
|
|
79
|
+
* - Moderate concurrency (100 operations) - limited by I/O threads
|
|
80
|
+
* - Parallel processing supported
|
|
81
|
+
*
|
|
82
|
+
* @returns FileSystem-optimized batch configuration
|
|
83
|
+
* @since v4.11.0
|
|
84
|
+
*/
|
|
85
|
+
getBatchConfig() {
|
|
86
|
+
return {
|
|
87
|
+
maxBatchSize: 500,
|
|
88
|
+
batchDelayMs: 0,
|
|
89
|
+
maxConcurrent: 100,
|
|
90
|
+
supportsParallelWrites: true, // Filesystem handles parallel I/O
|
|
91
|
+
rateLimit: {
|
|
92
|
+
operationsPerSecond: 5000, // Depends on disk speed
|
|
93
|
+
burstCapacity: 2000
|
|
94
|
+
}
|
|
95
|
+
};
|
|
96
|
+
}
|
|
73
97
|
/**
|
|
74
98
|
* Initialize the storage adapter
|
|
75
99
|
*/
|
|
@@ -180,6 +204,7 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
180
204
|
}
|
|
181
205
|
/**
|
|
182
206
|
* Save a node to storage
|
|
207
|
+
* CRITICAL FIX (v4.10.3): Added atomic write pattern to prevent file corruption during concurrent imports
|
|
183
208
|
*/
|
|
184
209
|
async saveNode(node) {
|
|
185
210
|
await this.ensureInitialized();
|
|
@@ -194,8 +219,25 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
194
219
|
// NO metadata field - saved separately for scalability
|
|
195
220
|
};
|
|
196
221
|
const filePath = this.getNodePath(node.id);
|
|
197
|
-
|
|
198
|
-
|
|
222
|
+
const tempPath = `${filePath}.tmp.${Date.now()}.${Math.random().toString(36).substring(2)}`;
|
|
223
|
+
try {
|
|
224
|
+
// ATOMIC WRITE SEQUENCE (v4.10.3):
|
|
225
|
+
// 1. Write to temp file
|
|
226
|
+
await this.ensureDirectoryExists(path.dirname(tempPath));
|
|
227
|
+
await fs.promises.writeFile(tempPath, JSON.stringify(serializableNode, null, 2));
|
|
228
|
+
// 2. Atomic rename temp → final (crash-safe, prevents truncation during concurrent writes)
|
|
229
|
+
await fs.promises.rename(tempPath, filePath);
|
|
230
|
+
}
|
|
231
|
+
catch (error) {
|
|
232
|
+
// Clean up temp file on any error
|
|
233
|
+
try {
|
|
234
|
+
await fs.promises.unlink(tempPath);
|
|
235
|
+
}
|
|
236
|
+
catch (cleanupError) {
|
|
237
|
+
// Ignore cleanup errors
|
|
238
|
+
}
|
|
239
|
+
throw error;
|
|
240
|
+
}
|
|
199
241
|
// Count tracking happens in baseStorage.saveNounMetadata_internal (v4.1.2)
|
|
200
242
|
// This fixes the race condition where metadata didn't exist yet
|
|
201
243
|
}
|
|
@@ -344,6 +386,7 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
344
386
|
}
|
|
345
387
|
/**
|
|
346
388
|
* Save an edge to storage
|
|
389
|
+
* CRITICAL FIX (v4.10.3): Added atomic write pattern to prevent file corruption during concurrent imports
|
|
347
390
|
*/
|
|
348
391
|
async saveEdge(edge) {
|
|
349
392
|
await this.ensureInitialized();
|
|
@@ -362,8 +405,25 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
362
405
|
// metadata field is saved separately via saveVerbMetadata()
|
|
363
406
|
};
|
|
364
407
|
const filePath = this.getVerbPath(edge.id);
|
|
365
|
-
|
|
366
|
-
|
|
408
|
+
const tempPath = `${filePath}.tmp.${Date.now()}.${Math.random().toString(36).substring(2)}`;
|
|
409
|
+
try {
|
|
410
|
+
// ATOMIC WRITE SEQUENCE (v4.10.3):
|
|
411
|
+
// 1. Write to temp file
|
|
412
|
+
await this.ensureDirectoryExists(path.dirname(tempPath));
|
|
413
|
+
await fs.promises.writeFile(tempPath, JSON.stringify(serializableEdge, null, 2));
|
|
414
|
+
// 2. Atomic rename temp → final (crash-safe, prevents truncation during concurrent writes)
|
|
415
|
+
await fs.promises.rename(tempPath, filePath);
|
|
416
|
+
}
|
|
417
|
+
catch (error) {
|
|
418
|
+
// Clean up temp file on any error
|
|
419
|
+
try {
|
|
420
|
+
await fs.promises.unlink(tempPath);
|
|
421
|
+
}
|
|
422
|
+
catch (cleanupError) {
|
|
423
|
+
// Ignore cleanup errors
|
|
424
|
+
}
|
|
425
|
+
throw error;
|
|
426
|
+
}
|
|
367
427
|
// Count tracking happens in baseStorage.saveVerbMetadata_internal (v4.1.2)
|
|
368
428
|
// This fixes the race condition where metadata didn't exist yet
|
|
369
429
|
}
|
|
@@ -507,24 +567,42 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
507
567
|
* Primitive operation: Write object to path
|
|
508
568
|
* All metadata operations use this internally via base class routing
|
|
509
569
|
* v4.0.0: Supports gzip compression for 60-80% disk savings
|
|
570
|
+
* CRITICAL FIX (v4.10.3): Added atomic write pattern to prevent file corruption during concurrent imports
|
|
510
571
|
*/
|
|
511
572
|
async writeObjectToPath(pathStr, data) {
|
|
512
573
|
await this.ensureInitialized();
|
|
513
574
|
const fullPath = path.join(this.rootDir, pathStr);
|
|
514
575
|
await this.ensureDirectoryExists(path.dirname(fullPath));
|
|
515
576
|
if (this.compressionEnabled) {
|
|
516
|
-
// Write compressed data with .gz extension
|
|
577
|
+
// Write compressed data with .gz extension using atomic pattern
|
|
517
578
|
const compressedPath = `${fullPath}.gz`;
|
|
518
|
-
const
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
579
|
+
const tempPath = `${compressedPath}.tmp.${Date.now()}.${Math.random().toString(36).substring(2)}`;
|
|
580
|
+
try {
|
|
581
|
+
// ATOMIC WRITE SEQUENCE (v4.10.3):
|
|
582
|
+
// 1. Compress and write to temp file
|
|
583
|
+
const jsonString = JSON.stringify(data, null, 2);
|
|
584
|
+
const compressed = await new Promise((resolve, reject) => {
|
|
585
|
+
zlib.gzip(Buffer.from(jsonString, 'utf-8'), { level: this.compressionLevel }, (err, result) => {
|
|
586
|
+
if (err)
|
|
587
|
+
reject(err);
|
|
588
|
+
else
|
|
589
|
+
resolve(result);
|
|
590
|
+
});
|
|
525
591
|
});
|
|
526
|
-
|
|
527
|
-
|
|
592
|
+
await fs.promises.writeFile(tempPath, compressed);
|
|
593
|
+
// 2. Atomic rename temp → final (crash-safe, prevents truncation during concurrent writes)
|
|
594
|
+
await fs.promises.rename(tempPath, compressedPath);
|
|
595
|
+
}
|
|
596
|
+
catch (error) {
|
|
597
|
+
// Clean up temp file on any error
|
|
598
|
+
try {
|
|
599
|
+
await fs.promises.unlink(tempPath);
|
|
600
|
+
}
|
|
601
|
+
catch (cleanupError) {
|
|
602
|
+
// Ignore cleanup errors
|
|
603
|
+
}
|
|
604
|
+
throw error;
|
|
605
|
+
}
|
|
528
606
|
// Clean up uncompressed file if it exists (migration from uncompressed)
|
|
529
607
|
try {
|
|
530
608
|
await fs.promises.unlink(fullPath);
|
|
@@ -537,8 +615,25 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
537
615
|
}
|
|
538
616
|
}
|
|
539
617
|
else {
|
|
540
|
-
// Write uncompressed data
|
|
541
|
-
|
|
618
|
+
// Write uncompressed data using atomic pattern
|
|
619
|
+
const tempPath = `${fullPath}.tmp.${Date.now()}.${Math.random().toString(36).substring(2)}`;
|
|
620
|
+
try {
|
|
621
|
+
// ATOMIC WRITE SEQUENCE (v4.10.3):
|
|
622
|
+
// 1. Write to temp file
|
|
623
|
+
await fs.promises.writeFile(tempPath, JSON.stringify(data, null, 2));
|
|
624
|
+
// 2. Atomic rename temp → final (crash-safe, prevents truncation during concurrent writes)
|
|
625
|
+
await fs.promises.rename(tempPath, fullPath);
|
|
626
|
+
}
|
|
627
|
+
catch (error) {
|
|
628
|
+
// Clean up temp file on any error
|
|
629
|
+
try {
|
|
630
|
+
await fs.promises.unlink(tempPath);
|
|
631
|
+
}
|
|
632
|
+
catch (cleanupError) {
|
|
633
|
+
// Ignore cleanup errors
|
|
634
|
+
}
|
|
635
|
+
throw error;
|
|
636
|
+
}
|
|
542
637
|
}
|
|
543
638
|
}
|
|
544
639
|
/**
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
* 4. HMAC Keys (fallback for backward compatibility)
|
|
10
10
|
*/
|
|
11
11
|
import { HNSWNoun, HNSWVerb, HNSWNounWithMetadata, HNSWVerbWithMetadata, StatisticsData } from '../../coreTypes.js';
|
|
12
|
-
import { BaseStorage } from '../baseStorage.js';
|
|
12
|
+
import { BaseStorage, StorageBatchConfig } from '../baseStorage.js';
|
|
13
13
|
type HNSWNode = HNSWNoun;
|
|
14
14
|
type Edge = HNSWVerb;
|
|
15
15
|
/**
|
|
@@ -76,6 +76,21 @@ export declare class GcsStorage extends BaseStorage {
|
|
|
76
76
|
};
|
|
77
77
|
readOnly?: boolean;
|
|
78
78
|
});
|
|
79
|
+
/**
|
|
80
|
+
* Get GCS-optimized batch configuration
|
|
81
|
+
*
|
|
82
|
+
* GCS has strict rate limits (~5000 writes/second per bucket) and benefits from:
|
|
83
|
+
* - Moderate batch sizes (50 items)
|
|
84
|
+
* - Sequential processing (not parallel)
|
|
85
|
+
* - Delays between batches (100ms)
|
|
86
|
+
*
|
|
87
|
+
* Note: Each entity write involves 2 operations (vector + metadata),
|
|
88
|
+
* so 800 ops/sec = ~400 entities/sec = ~2500 actual GCS writes/sec
|
|
89
|
+
*
|
|
90
|
+
* @returns GCS-optimized batch configuration
|
|
91
|
+
* @since v4.11.0
|
|
92
|
+
*/
|
|
93
|
+
getBatchConfig(): StorageBatchConfig;
|
|
79
94
|
/**
|
|
80
95
|
* Initialize the storage adapter
|
|
81
96
|
*/
|
|
@@ -92,6 +92,32 @@ export class GcsStorage extends BaseStorage {
|
|
|
92
92
|
prodLog.info('🚀 High-volume mode FORCED via BRAINY_FORCE_HIGH_VOLUME environment variable');
|
|
93
93
|
}
|
|
94
94
|
}
|
|
95
|
+
/**
|
|
96
|
+
* Get GCS-optimized batch configuration
|
|
97
|
+
*
|
|
98
|
+
* GCS has strict rate limits (~5000 writes/second per bucket) and benefits from:
|
|
99
|
+
* - Moderate batch sizes (50 items)
|
|
100
|
+
* - Sequential processing (not parallel)
|
|
101
|
+
* - Delays between batches (100ms)
|
|
102
|
+
*
|
|
103
|
+
* Note: Each entity write involves 2 operations (vector + metadata),
|
|
104
|
+
* so 800 ops/sec = ~400 entities/sec = ~2500 actual GCS writes/sec
|
|
105
|
+
*
|
|
106
|
+
* @returns GCS-optimized batch configuration
|
|
107
|
+
* @since v4.11.0
|
|
108
|
+
*/
|
|
109
|
+
getBatchConfig() {
|
|
110
|
+
return {
|
|
111
|
+
maxBatchSize: 50,
|
|
112
|
+
batchDelayMs: 100,
|
|
113
|
+
maxConcurrent: 50,
|
|
114
|
+
supportsParallelWrites: false, // Sequential is safer for GCS rate limits
|
|
115
|
+
rateLimit: {
|
|
116
|
+
operationsPerSecond: 800, // Conservative estimate for entity operations
|
|
117
|
+
burstCapacity: 200
|
|
118
|
+
}
|
|
119
|
+
};
|
|
120
|
+
}
|
|
95
121
|
/**
|
|
96
122
|
* Initialize the storage adapter
|
|
97
123
|
*/
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* In-memory storage adapter for environments where persistent storage is not available or needed
|
|
4
4
|
*/
|
|
5
5
|
import { HNSWNoun, HNSWVerb, HNSWNounWithMetadata, HNSWVerbWithMetadata, StatisticsData } from '../../coreTypes.js';
|
|
6
|
-
import { BaseStorage } from '../baseStorage.js';
|
|
6
|
+
import { BaseStorage, StorageBatchConfig } from '../baseStorage.js';
|
|
7
7
|
/**
|
|
8
8
|
* In-memory storage adapter
|
|
9
9
|
* Uses Maps to store data in memory
|
|
@@ -17,6 +17,19 @@ export declare class MemoryStorage extends BaseStorage {
|
|
|
17
17
|
private get nounMetadata();
|
|
18
18
|
private get verbMetadata();
|
|
19
19
|
constructor();
|
|
20
|
+
/**
|
|
21
|
+
* Get Memory-optimized batch configuration
|
|
22
|
+
*
|
|
23
|
+
* Memory storage has no rate limits and can handle very high throughput:
|
|
24
|
+
* - Large batch sizes (1000 items)
|
|
25
|
+
* - No delays needed (0ms)
|
|
26
|
+
* - High concurrency (1000 operations)
|
|
27
|
+
* - Parallel processing maximizes throughput
|
|
28
|
+
*
|
|
29
|
+
* @returns Memory-optimized batch configuration
|
|
30
|
+
* @since v4.11.0
|
|
31
|
+
*/
|
|
32
|
+
getBatchConfig(): StorageBatchConfig;
|
|
20
33
|
/**
|
|
21
34
|
* Initialize the storage adapter
|
|
22
35
|
* Nothing to initialize for in-memory storage
|
|
@@ -32,6 +32,30 @@ export class MemoryStorage extends BaseStorage {
|
|
|
32
32
|
// Even in-memory operations need serialization to prevent async race conditions
|
|
33
33
|
this.hnswLocks = new Map();
|
|
34
34
|
}
|
|
35
|
+
/**
|
|
36
|
+
* Get Memory-optimized batch configuration
|
|
37
|
+
*
|
|
38
|
+
* Memory storage has no rate limits and can handle very high throughput:
|
|
39
|
+
* - Large batch sizes (1000 items)
|
|
40
|
+
* - No delays needed (0ms)
|
|
41
|
+
* - High concurrency (1000 operations)
|
|
42
|
+
* - Parallel processing maximizes throughput
|
|
43
|
+
*
|
|
44
|
+
* @returns Memory-optimized batch configuration
|
|
45
|
+
* @since v4.11.0
|
|
46
|
+
*/
|
|
47
|
+
getBatchConfig() {
|
|
48
|
+
return {
|
|
49
|
+
maxBatchSize: 1000,
|
|
50
|
+
batchDelayMs: 0,
|
|
51
|
+
maxConcurrent: 1000,
|
|
52
|
+
supportsParallelWrites: true, // Memory loves parallel operations
|
|
53
|
+
rateLimit: {
|
|
54
|
+
operationsPerSecond: 100000, // Virtually unlimited
|
|
55
|
+
burstCapacity: 100000
|
|
56
|
+
}
|
|
57
|
+
};
|
|
58
|
+
}
|
|
35
59
|
/**
|
|
36
60
|
* Initialize the storage adapter
|
|
37
61
|
* Nothing to initialize for in-memory storage
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* Provides persistent storage for the vector database using the Origin Private File System API
|
|
4
4
|
*/
|
|
5
5
|
import { HNSWNoun, HNSWVerb, HNSWNounWithMetadata, HNSWVerbWithMetadata, StatisticsData } from '../../coreTypes.js';
|
|
6
|
-
import { BaseStorage } from '../baseStorage.js';
|
|
6
|
+
import { BaseStorage, StorageBatchConfig } from '../baseStorage.js';
|
|
7
7
|
import '../../types/fileSystemTypes.js';
|
|
8
8
|
type HNSWNode = HNSWNoun;
|
|
9
9
|
/**
|
|
@@ -30,6 +30,19 @@ export declare class OPFSStorage extends BaseStorage {
|
|
|
30
30
|
private activeLocks;
|
|
31
31
|
private lockPrefix;
|
|
32
32
|
constructor();
|
|
33
|
+
/**
|
|
34
|
+
* Get OPFS-optimized batch configuration
|
|
35
|
+
*
|
|
36
|
+
* OPFS (Origin Private File System) is browser-based storage with moderate performance:
|
|
37
|
+
* - Moderate batch sizes (100 items)
|
|
38
|
+
* - Small delays (10ms) for browser event loop
|
|
39
|
+
* - Limited concurrency (50 operations) - browser constraints
|
|
40
|
+
* - Sequential processing preferred for stability
|
|
41
|
+
*
|
|
42
|
+
* @returns OPFS-optimized batch configuration
|
|
43
|
+
* @since v4.11.0
|
|
44
|
+
*/
|
|
45
|
+
getBatchConfig(): StorageBatchConfig;
|
|
33
46
|
/**
|
|
34
47
|
* Initialize the storage adapter
|
|
35
48
|
*/
|
|
@@ -51,6 +51,30 @@ export class OPFSStorage extends BaseStorage {
|
|
|
51
51
|
'storage' in navigator &&
|
|
52
52
|
'getDirectory' in navigator.storage;
|
|
53
53
|
}
|
|
54
|
+
/**
|
|
55
|
+
* Get OPFS-optimized batch configuration
|
|
56
|
+
*
|
|
57
|
+
* OPFS (Origin Private File System) is browser-based storage with moderate performance:
|
|
58
|
+
* - Moderate batch sizes (100 items)
|
|
59
|
+
* - Small delays (10ms) for browser event loop
|
|
60
|
+
* - Limited concurrency (50 operations) - browser constraints
|
|
61
|
+
* - Sequential processing preferred for stability
|
|
62
|
+
*
|
|
63
|
+
* @returns OPFS-optimized batch configuration
|
|
64
|
+
* @since v4.11.0
|
|
65
|
+
*/
|
|
66
|
+
getBatchConfig() {
|
|
67
|
+
return {
|
|
68
|
+
maxBatchSize: 100,
|
|
69
|
+
batchDelayMs: 10,
|
|
70
|
+
maxConcurrent: 50,
|
|
71
|
+
supportsParallelWrites: false, // Sequential safer in browser
|
|
72
|
+
rateLimit: {
|
|
73
|
+
operationsPerSecond: 1000,
|
|
74
|
+
burstCapacity: 500
|
|
75
|
+
}
|
|
76
|
+
};
|
|
77
|
+
}
|
|
54
78
|
/**
|
|
55
79
|
* Initialize the storage adapter
|
|
56
80
|
*/
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
* Based on latest GCS and S3 implementations with R2-specific enhancements
|
|
13
13
|
*/
|
|
14
14
|
import { HNSWNoun, HNSWVerb, HNSWNounWithMetadata, HNSWVerbWithMetadata, StatisticsData } from '../../coreTypes.js';
|
|
15
|
-
import { BaseStorage } from '../baseStorage.js';
|
|
15
|
+
import { BaseStorage, StorageBatchConfig } from '../baseStorage.js';
|
|
16
16
|
type HNSWNode = HNSWNoun;
|
|
17
17
|
type Edge = HNSWVerb;
|
|
18
18
|
/**
|
|
@@ -75,6 +75,23 @@ export declare class R2Storage extends BaseStorage {
|
|
|
75
75
|
};
|
|
76
76
|
readOnly?: boolean;
|
|
77
77
|
});
|
|
78
|
+
/**
|
|
79
|
+
* Get R2-optimized batch configuration
|
|
80
|
+
*
|
|
81
|
+
* Cloudflare R2 has S3-compatible characteristics with some advantages:
|
|
82
|
+
* - Zero egress fees (can cache more aggressively)
|
|
83
|
+
* - Global edge network
|
|
84
|
+
* - Similar throughput to S3
|
|
85
|
+
*
|
|
86
|
+
* R2 benefits from the same configuration as S3:
|
|
87
|
+
* - Larger batch sizes (100 items)
|
|
88
|
+
* - Parallel processing
|
|
89
|
+
* - Short delays (50ms)
|
|
90
|
+
*
|
|
91
|
+
* @returns R2-optimized batch configuration
|
|
92
|
+
* @since v4.11.0
|
|
93
|
+
*/
|
|
94
|
+
getBatchConfig(): StorageBatchConfig;
|
|
78
95
|
/**
|
|
79
96
|
* Initialize the storage adapter
|
|
80
97
|
*/
|
|
@@ -94,6 +94,34 @@ export class R2Storage extends BaseStorage {
|
|
|
94
94
|
prodLog.info('🚀 R2: High-volume mode FORCED via environment variable');
|
|
95
95
|
}
|
|
96
96
|
}
|
|
97
|
+
/**
|
|
98
|
+
* Get R2-optimized batch configuration
|
|
99
|
+
*
|
|
100
|
+
* Cloudflare R2 has S3-compatible characteristics with some advantages:
|
|
101
|
+
* - Zero egress fees (can cache more aggressively)
|
|
102
|
+
* - Global edge network
|
|
103
|
+
* - Similar throughput to S3
|
|
104
|
+
*
|
|
105
|
+
* R2 benefits from the same configuration as S3:
|
|
106
|
+
* - Larger batch sizes (100 items)
|
|
107
|
+
* - Parallel processing
|
|
108
|
+
* - Short delays (50ms)
|
|
109
|
+
*
|
|
110
|
+
* @returns R2-optimized batch configuration
|
|
111
|
+
* @since v4.11.0
|
|
112
|
+
*/
|
|
113
|
+
getBatchConfig() {
|
|
114
|
+
return {
|
|
115
|
+
maxBatchSize: 100,
|
|
116
|
+
batchDelayMs: 50,
|
|
117
|
+
maxConcurrent: 100,
|
|
118
|
+
supportsParallelWrites: true, // R2 handles parallel writes like S3
|
|
119
|
+
rateLimit: {
|
|
120
|
+
operationsPerSecond: 3500, // Similar to S3 throughput
|
|
121
|
+
burstCapacity: 1000
|
|
122
|
+
}
|
|
123
|
+
};
|
|
124
|
+
}
|
|
97
125
|
/**
|
|
98
126
|
* Initialize the storage adapter
|
|
99
127
|
*/
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* including Amazon S3, Cloudflare R2, and Google Cloud Storage
|
|
5
5
|
*/
|
|
6
6
|
import { Change, HNSWNoun, HNSWVerb, HNSWNounWithMetadata, HNSWVerbWithMetadata, StatisticsData } from '../../coreTypes.js';
|
|
7
|
-
import { BaseStorage } from '../baseStorage.js';
|
|
7
|
+
import { BaseStorage, StorageBatchConfig } from '../baseStorage.js';
|
|
8
8
|
import { OperationConfig } from '../../utils/operationUtils.js';
|
|
9
9
|
type HNSWNode = HNSWNoun;
|
|
10
10
|
type Edge = HNSWVerb;
|
|
@@ -96,6 +96,20 @@ export declare class S3CompatibleStorage extends BaseStorage {
|
|
|
96
96
|
};
|
|
97
97
|
readOnly?: boolean;
|
|
98
98
|
});
|
|
99
|
+
/**
|
|
100
|
+
* Get S3-optimized batch configuration
|
|
101
|
+
*
|
|
102
|
+
* S3 has higher throughput than GCS and handles parallel writes efficiently:
|
|
103
|
+
* - Larger batch sizes (100 items)
|
|
104
|
+
* - Parallel processing supported
|
|
105
|
+
* - Shorter delays between batches (50ms)
|
|
106
|
+
*
|
|
107
|
+
* S3 can handle ~3500 operations/second per bucket with good performance
|
|
108
|
+
*
|
|
109
|
+
* @returns S3-optimized batch configuration
|
|
110
|
+
* @since v4.11.0
|
|
111
|
+
*/
|
|
112
|
+
getBatchConfig(): StorageBatchConfig;
|
|
99
113
|
/**
|
|
100
114
|
* Initialize the storage adapter
|
|
101
115
|
*/
|
|
@@ -114,6 +114,31 @@ export class S3CompatibleStorage extends BaseStorage {
|
|
|
114
114
|
this.nounCacheManager = new CacheManager(options.cacheConfig);
|
|
115
115
|
this.verbCacheManager = new CacheManager(options.cacheConfig);
|
|
116
116
|
}
|
|
117
|
+
/**
|
|
118
|
+
* Get S3-optimized batch configuration
|
|
119
|
+
*
|
|
120
|
+
* S3 has higher throughput than GCS and handles parallel writes efficiently:
|
|
121
|
+
* - Larger batch sizes (100 items)
|
|
122
|
+
* - Parallel processing supported
|
|
123
|
+
* - Shorter delays between batches (50ms)
|
|
124
|
+
*
|
|
125
|
+
* S3 can handle ~3500 operations/second per bucket with good performance
|
|
126
|
+
*
|
|
127
|
+
* @returns S3-optimized batch configuration
|
|
128
|
+
* @since v4.11.0
|
|
129
|
+
*/
|
|
130
|
+
getBatchConfig() {
|
|
131
|
+
return {
|
|
132
|
+
maxBatchSize: 100,
|
|
133
|
+
batchDelayMs: 50,
|
|
134
|
+
maxConcurrent: 100,
|
|
135
|
+
supportsParallelWrites: true, // S3 handles parallel writes efficiently
|
|
136
|
+
rateLimit: {
|
|
137
|
+
operationsPerSecond: 3500, // S3 is more permissive than GCS
|
|
138
|
+
burstCapacity: 1000
|
|
139
|
+
}
|
|
140
|
+
};
|
|
141
|
+
}
|
|
117
142
|
/**
|
|
118
143
|
* Initialize the storage adapter
|
|
119
144
|
*/
|
|
@@ -5,6 +5,30 @@
|
|
|
5
5
|
import { GraphAdjacencyIndex } from '../graph/graphAdjacencyIndex.js';
|
|
6
6
|
import { GraphVerb, HNSWNoun, HNSWVerb, NounMetadata, VerbMetadata, HNSWNounWithMetadata, HNSWVerbWithMetadata, StatisticsData } from '../coreTypes.js';
|
|
7
7
|
import { BaseStorageAdapter } from './adapters/baseStorageAdapter.js';
|
|
8
|
+
/**
|
|
9
|
+
* Storage adapter batch configuration profile
|
|
10
|
+
* Each storage adapter declares its optimal batch behavior for rate limiting
|
|
11
|
+
* and performance optimization
|
|
12
|
+
*
|
|
13
|
+
* @since v4.11.0
|
|
14
|
+
*/
|
|
15
|
+
export interface StorageBatchConfig {
|
|
16
|
+
/** Maximum items per batch */
|
|
17
|
+
maxBatchSize: number;
|
|
18
|
+
/** Delay between batches in milliseconds (for rate limiting) */
|
|
19
|
+
batchDelayMs: number;
|
|
20
|
+
/** Maximum concurrent operations this storage can handle */
|
|
21
|
+
maxConcurrent: number;
|
|
22
|
+
/** Whether storage can handle parallel writes efficiently */
|
|
23
|
+
supportsParallelWrites: boolean;
|
|
24
|
+
/** Rate limit characteristics of this storage adapter */
|
|
25
|
+
rateLimit: {
|
|
26
|
+
/** Approximate operations per second this storage can handle */
|
|
27
|
+
operationsPerSecond: number;
|
|
28
|
+
/** Maximum burst capacity before throttling occurs */
|
|
29
|
+
burstCapacity: number;
|
|
30
|
+
};
|
|
31
|
+
}
|
|
8
32
|
export declare const NOUNS_METADATA_DIR = "entities/nouns/metadata";
|
|
9
33
|
export declare const VERBS_METADATA_DIR = "entities/verbs/metadata";
|
|
10
34
|
export declare const SYSTEM_DIR = "_system";
|