@soulcraft/brainy 5.11.1 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +155 -5
- package/README.md +2 -6
- package/dist/api/DataAPI.d.ts +0 -40
- package/dist/api/DataAPI.js +0 -235
- package/dist/brainy.d.ts +28 -106
- package/dist/brainy.js +53 -370
- package/dist/cli/commands/cow.d.ts +1 -9
- package/dist/cli/commands/cow.js +1 -61
- package/dist/cli/commands/data.d.ts +1 -13
- package/dist/cli/commands/data.js +1 -74
- package/dist/cli/index.js +1 -16
- package/dist/neural/embeddedTypeEmbeddings.d.ts +1 -1
- package/dist/neural/embeddedTypeEmbeddings.js +2 -2
- package/dist/storage/adapters/azureBlobStorage.d.ts +21 -7
- package/dist/storage/adapters/azureBlobStorage.js +69 -14
- package/dist/storage/adapters/fileSystemStorage.js +2 -1
- package/dist/storage/adapters/gcsStorage.d.ts +29 -15
- package/dist/storage/adapters/gcsStorage.js +82 -27
- package/dist/storage/adapters/historicalStorageAdapter.js +2 -2
- package/dist/storage/adapters/memoryStorage.d.ts +1 -1
- package/dist/storage/adapters/memoryStorage.js +9 -11
- package/dist/storage/adapters/opfsStorage.js +2 -1
- package/dist/storage/adapters/r2Storage.d.ts +21 -10
- package/dist/storage/adapters/r2Storage.js +73 -17
- package/dist/storage/adapters/s3CompatibleStorage.d.ts +20 -7
- package/dist/storage/adapters/s3CompatibleStorage.js +72 -14
- package/dist/storage/baseStorage.d.ts +153 -24
- package/dist/storage/baseStorage.js +758 -459
- package/dist/vfs/PathResolver.js +6 -2
- package/dist/vfs/VirtualFileSystem.d.ts +46 -24
- package/dist/vfs/VirtualFileSystem.js +176 -156
- package/package.json +1 -1
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* 🧠 BRAINY EMBEDDED TYPE EMBEDDINGS
|
|
3
3
|
*
|
|
4
4
|
* AUTO-GENERATED - DO NOT EDIT
|
|
5
|
-
* Generated: 2025-11-
|
|
5
|
+
* Generated: 2025-11-19T21:22:15.103Z
|
|
6
6
|
* Noun Types: 42
|
|
7
7
|
* Verb Types: 127
|
|
8
8
|
*
|
|
@@ -15,7 +15,7 @@ export const TYPE_METADATA = {
|
|
|
15
15
|
verbTypes: 127,
|
|
16
16
|
totalTypes: 169,
|
|
17
17
|
embeddingDimensions: 384,
|
|
18
|
-
generatedAt: "2025-11-
|
|
18
|
+
generatedAt: "2025-11-19T21:22:15.103Z",
|
|
19
19
|
sizeBytes: {
|
|
20
20
|
embeddings: 259584,
|
|
21
21
|
base64: 346112
|
|
@@ -78,19 +78,33 @@ export declare class AzureBlobStorage extends BaseStorage {
|
|
|
78
78
|
readOnly?: boolean;
|
|
79
79
|
});
|
|
80
80
|
/**
|
|
81
|
-
* Get Azure Blob-optimized batch configuration
|
|
81
|
+
* Get Azure Blob-optimized batch configuration with native batch API support
|
|
82
82
|
*
|
|
83
|
-
* Azure Blob Storage has
|
|
84
|
-
* -
|
|
85
|
-
* -
|
|
86
|
-
* -
|
|
83
|
+
* Azure Blob Storage has good throughput with parallel operations:
|
|
84
|
+
* - Large batch sizes (up to 1000 blobs)
|
|
85
|
+
* - No artificial delay needed
|
|
86
|
+
* - High concurrency (100 parallel optimal)
|
|
87
87
|
*
|
|
88
|
-
* Azure
|
|
88
|
+
* Azure supports ~3000 operations/second with burst up to 6000
|
|
89
|
+
* Recent Azure improvements make parallel downloads very efficient
|
|
89
90
|
*
|
|
90
91
|
* @returns Azure Blob-optimized batch configuration
|
|
91
|
-
* @since
|
|
92
|
+
* @since v5.12.0 - Updated for native batch API
|
|
92
93
|
*/
|
|
93
94
|
getBatchConfig(): StorageBatchConfig;
|
|
95
|
+
/**
|
|
96
|
+
* Batch read operation using Azure's parallel blob download
|
|
97
|
+
*
|
|
98
|
+
* Uses Promise.allSettled() for maximum parallelism with BlockBlobClient.
|
|
99
|
+
* Azure Blob Storage handles concurrent downloads efficiently.
|
|
100
|
+
*
|
|
101
|
+
* Performance: ~100 concurrent requests = <600ms for 100 blobs
|
|
102
|
+
*
|
|
103
|
+
* @param paths - Array of Azure blob paths to read
|
|
104
|
+
* @returns Map of path -> parsed JSON data (only successful reads)
|
|
105
|
+
* @since v5.12.0
|
|
106
|
+
*/
|
|
107
|
+
readBatch(paths: string[]): Promise<Map<string, any>>;
|
|
94
108
|
/**
|
|
95
109
|
* Initialize the storage adapter
|
|
96
110
|
*/
|
|
@@ -91,30 +91,84 @@ export class AzureBlobStorage extends BaseStorage {
|
|
|
91
91
|
}
|
|
92
92
|
}
|
|
93
93
|
/**
|
|
94
|
-
* Get Azure Blob-optimized batch configuration
|
|
94
|
+
* Get Azure Blob-optimized batch configuration with native batch API support
|
|
95
95
|
*
|
|
96
|
-
* Azure Blob Storage has
|
|
97
|
-
* -
|
|
98
|
-
* -
|
|
99
|
-
* -
|
|
96
|
+
* Azure Blob Storage has good throughput with parallel operations:
|
|
97
|
+
* - Large batch sizes (up to 1000 blobs)
|
|
98
|
+
* - No artificial delay needed
|
|
99
|
+
* - High concurrency (100 parallel optimal)
|
|
100
100
|
*
|
|
101
|
-
* Azure
|
|
101
|
+
* Azure supports ~3000 operations/second with burst up to 6000
|
|
102
|
+
* Recent Azure improvements make parallel downloads very efficient
|
|
102
103
|
*
|
|
103
104
|
* @returns Azure Blob-optimized batch configuration
|
|
104
|
-
* @since
|
|
105
|
+
* @since v5.12.0 - Updated for native batch API
|
|
105
106
|
*/
|
|
106
107
|
getBatchConfig() {
|
|
107
108
|
return {
|
|
108
|
-
maxBatchSize:
|
|
109
|
-
batchDelayMs:
|
|
110
|
-
maxConcurrent:
|
|
111
|
-
supportsParallelWrites: true, // Azure handles parallel
|
|
109
|
+
maxBatchSize: 1000, // Azure can handle large batches
|
|
110
|
+
batchDelayMs: 0, // No rate limiting needed
|
|
111
|
+
maxConcurrent: 100, // Optimal for Azure Blob Storage
|
|
112
|
+
supportsParallelWrites: true, // Azure handles parallel well
|
|
112
113
|
rateLimit: {
|
|
113
|
-
operationsPerSecond:
|
|
114
|
-
burstCapacity:
|
|
114
|
+
operationsPerSecond: 3000, // Good throughput
|
|
115
|
+
burstCapacity: 6000
|
|
115
116
|
}
|
|
116
117
|
};
|
|
117
118
|
}
|
|
119
|
+
/**
|
|
120
|
+
* Batch read operation using Azure's parallel blob download
|
|
121
|
+
*
|
|
122
|
+
* Uses Promise.allSettled() for maximum parallelism with BlockBlobClient.
|
|
123
|
+
* Azure Blob Storage handles concurrent downloads efficiently.
|
|
124
|
+
*
|
|
125
|
+
* Performance: ~100 concurrent requests = <600ms for 100 blobs
|
|
126
|
+
*
|
|
127
|
+
* @param paths - Array of Azure blob paths to read
|
|
128
|
+
* @returns Map of path -> parsed JSON data (only successful reads)
|
|
129
|
+
* @since v5.12.0
|
|
130
|
+
*/
|
|
131
|
+
async readBatch(paths) {
|
|
132
|
+
await this.ensureInitialized();
|
|
133
|
+
const results = new Map();
|
|
134
|
+
if (paths.length === 0)
|
|
135
|
+
return results;
|
|
136
|
+
const batchConfig = this.getBatchConfig();
|
|
137
|
+
const chunkSize = batchConfig.maxConcurrent || 100;
|
|
138
|
+
this.logger.debug(`[Azure Batch] Reading ${paths.length} blobs in chunks of ${chunkSize}`);
|
|
139
|
+
// Process in chunks to respect concurrency limits
|
|
140
|
+
for (let i = 0; i < paths.length; i += chunkSize) {
|
|
141
|
+
const chunk = paths.slice(i, i + chunkSize);
|
|
142
|
+
// Parallel download for this chunk
|
|
143
|
+
const chunkResults = await Promise.allSettled(chunk.map(async (path) => {
|
|
144
|
+
try {
|
|
145
|
+
const blockBlobClient = this.containerClient.getBlockBlobClient(path);
|
|
146
|
+
const downloadResponse = await blockBlobClient.download(0);
|
|
147
|
+
if (!downloadResponse.readableStreamBody) {
|
|
148
|
+
return { path, data: null, success: false };
|
|
149
|
+
}
|
|
150
|
+
const downloaded = await this.streamToBuffer(downloadResponse.readableStreamBody);
|
|
151
|
+
const data = JSON.parse(downloaded.toString());
|
|
152
|
+
return { path, data, success: true };
|
|
153
|
+
}
|
|
154
|
+
catch (error) {
|
|
155
|
+
// 404 and other errors are expected (not all paths may exist)
|
|
156
|
+
if (error.statusCode !== 404 && error.code !== 'BlobNotFound') {
|
|
157
|
+
this.logger.warn(`[Azure Batch] Failed to read ${path}: ${error.message}`);
|
|
158
|
+
}
|
|
159
|
+
return { path, data: null, success: false };
|
|
160
|
+
}
|
|
161
|
+
}));
|
|
162
|
+
// Collect successful results
|
|
163
|
+
for (const result of chunkResults) {
|
|
164
|
+
if (result.status === 'fulfilled' && result.value.success && result.value.data !== null) {
|
|
165
|
+
results.set(result.value.path, result.value.data);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
this.logger.debug(`[Azure Batch] Successfully read ${results.size}/${paths.length} blobs`);
|
|
170
|
+
return results;
|
|
171
|
+
}
|
|
118
172
|
/**
|
|
119
173
|
* Initialize the storage adapter
|
|
120
174
|
*/
|
|
@@ -184,7 +238,8 @@ export class AzureBlobStorage extends BaseStorage {
|
|
|
184
238
|
this.nounCacheManager.clear();
|
|
185
239
|
this.verbCacheManager.clear();
|
|
186
240
|
prodLog.info('✅ Cache cleared - starting fresh');
|
|
187
|
-
|
|
241
|
+
// v6.0.0: Initialize GraphAdjacencyIndex and type statistics
|
|
242
|
+
await super.init();
|
|
188
243
|
}
|
|
189
244
|
catch (error) {
|
|
190
245
|
this.logger.error('Failed to initialize Azure Blob Storage:', error);
|
|
@@ -174,7 +174,8 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
174
174
|
}
|
|
175
175
|
// Always use fixed depth after migration/detection
|
|
176
176
|
this.cachedShardingDepth = this.SHARDING_DEPTH;
|
|
177
|
-
|
|
177
|
+
// v6.0.0: Initialize GraphAdjacencyIndex and type statistics
|
|
178
|
+
await super.init();
|
|
178
179
|
}
|
|
179
180
|
catch (error) {
|
|
180
181
|
console.error('Error initializing FileSystemStorage:', error);
|
|
@@ -83,21 +83,6 @@ export declare class GcsStorage extends BaseStorage {
|
|
|
83
83
|
};
|
|
84
84
|
readOnly?: boolean;
|
|
85
85
|
});
|
|
86
|
-
/**
|
|
87
|
-
* Get GCS-optimized batch configuration
|
|
88
|
-
*
|
|
89
|
-
* GCS has strict rate limits (~5000 writes/second per bucket) and benefits from:
|
|
90
|
-
* - Moderate batch sizes (50 items)
|
|
91
|
-
* - Sequential processing (not parallel)
|
|
92
|
-
* - Delays between batches (100ms)
|
|
93
|
-
*
|
|
94
|
-
* Note: Each entity write involves 2 operations (vector + metadata),
|
|
95
|
-
* so 800 ops/sec = ~400 entities/sec = ~2500 actual GCS writes/sec
|
|
96
|
-
*
|
|
97
|
-
* @returns GCS-optimized batch configuration
|
|
98
|
-
* @since v4.11.0
|
|
99
|
-
*/
|
|
100
|
-
getBatchConfig(): StorageBatchConfig;
|
|
101
86
|
/**
|
|
102
87
|
* Initialize the storage adapter
|
|
103
88
|
*/
|
|
@@ -184,6 +169,35 @@ export declare class GcsStorage extends BaseStorage {
|
|
|
184
169
|
* @protected
|
|
185
170
|
*/
|
|
186
171
|
protected readObjectFromPath(path: string): Promise<any | null>;
|
|
172
|
+
/**
|
|
173
|
+
* Batch read multiple objects from GCS (v5.12.0 - Cloud Storage Optimization)
|
|
174
|
+
*
|
|
175
|
+
* **Performance**: GCS-optimized parallel downloads
|
|
176
|
+
* - Uses Promise.all() for concurrent requests
|
|
177
|
+
* - Respects GCS rate limits (100 concurrent by default)
|
|
178
|
+
* - Chunks large batches to prevent memory issues
|
|
179
|
+
*
|
|
180
|
+
* **GCS Specifics**:
|
|
181
|
+
* - No true "batch API" - uses parallel GetObject operations
|
|
182
|
+
* - Optimal concurrency: 50-100 concurrent downloads
|
|
183
|
+
* - Each download is a separate HTTPS request
|
|
184
|
+
*
|
|
185
|
+
* @param paths Array of GCS object paths to read
|
|
186
|
+
* @returns Map of path → data (only successful reads included)
|
|
187
|
+
*
|
|
188
|
+
* @public - Called by baseStorage.readBatchFromAdapter()
|
|
189
|
+
* @since v5.12.0
|
|
190
|
+
*/
|
|
191
|
+
readBatch(paths: string[]): Promise<Map<string, any>>;
|
|
192
|
+
/**
|
|
193
|
+
* Get GCS-specific batch configuration (v5.12.0)
|
|
194
|
+
*
|
|
195
|
+
* GCS performs well with high concurrency due to HTTP/2 multiplexing
|
|
196
|
+
*
|
|
197
|
+
* @public - Overrides BaseStorage.getBatchConfig()
|
|
198
|
+
* @since v5.12.0
|
|
199
|
+
*/
|
|
200
|
+
getBatchConfig(): StorageBatchConfig;
|
|
187
201
|
/**
|
|
188
202
|
* Delete an object from a specific path in GCS
|
|
189
203
|
* Primitive operation required by base class
|
|
@@ -99,32 +99,6 @@ export class GcsStorage extends BaseStorage {
|
|
|
99
99
|
prodLog.info('🚀 High-volume mode FORCED via BRAINY_FORCE_HIGH_VOLUME environment variable');
|
|
100
100
|
}
|
|
101
101
|
}
|
|
102
|
-
/**
|
|
103
|
-
* Get GCS-optimized batch configuration
|
|
104
|
-
*
|
|
105
|
-
* GCS has strict rate limits (~5000 writes/second per bucket) and benefits from:
|
|
106
|
-
* - Moderate batch sizes (50 items)
|
|
107
|
-
* - Sequential processing (not parallel)
|
|
108
|
-
* - Delays between batches (100ms)
|
|
109
|
-
*
|
|
110
|
-
* Note: Each entity write involves 2 operations (vector + metadata),
|
|
111
|
-
* so 800 ops/sec = ~400 entities/sec = ~2500 actual GCS writes/sec
|
|
112
|
-
*
|
|
113
|
-
* @returns GCS-optimized batch configuration
|
|
114
|
-
* @since v4.11.0
|
|
115
|
-
*/
|
|
116
|
-
getBatchConfig() {
|
|
117
|
-
return {
|
|
118
|
-
maxBatchSize: 50,
|
|
119
|
-
batchDelayMs: 100,
|
|
120
|
-
maxConcurrent: 50,
|
|
121
|
-
supportsParallelWrites: false, // Sequential is safer for GCS rate limits
|
|
122
|
-
rateLimit: {
|
|
123
|
-
operationsPerSecond: 800, // Conservative estimate for entity operations
|
|
124
|
-
burstCapacity: 200
|
|
125
|
-
}
|
|
126
|
-
};
|
|
127
|
-
}
|
|
128
102
|
/**
|
|
129
103
|
* Initialize the storage adapter
|
|
130
104
|
*/
|
|
@@ -191,7 +165,8 @@ export class GcsStorage extends BaseStorage {
|
|
|
191
165
|
this.nounCacheManager.clear();
|
|
192
166
|
this.verbCacheManager.clear();
|
|
193
167
|
prodLog.info('✅ Cache cleared - starting fresh');
|
|
194
|
-
|
|
168
|
+
// v6.0.0: Initialize GraphAdjacencyIndex and type statistics
|
|
169
|
+
await super.init();
|
|
195
170
|
}
|
|
196
171
|
catch (error) {
|
|
197
172
|
this.logger.error('Failed to initialize GCS storage:', error);
|
|
@@ -540,6 +515,86 @@ export class GcsStorage extends BaseStorage {
|
|
|
540
515
|
throw BrainyError.fromError(error, `readObjectFromPath(${path})`);
|
|
541
516
|
}
|
|
542
517
|
}
|
|
518
|
+
/**
|
|
519
|
+
* Batch read multiple objects from GCS (v5.12.0 - Cloud Storage Optimization)
|
|
520
|
+
*
|
|
521
|
+
* **Performance**: GCS-optimized parallel downloads
|
|
522
|
+
* - Uses Promise.all() for concurrent requests
|
|
523
|
+
* - Respects GCS rate limits (100 concurrent by default)
|
|
524
|
+
* - Chunks large batches to prevent memory issues
|
|
525
|
+
*
|
|
526
|
+
* **GCS Specifics**:
|
|
527
|
+
* - No true "batch API" - uses parallel GetObject operations
|
|
528
|
+
* - Optimal concurrency: 50-100 concurrent downloads
|
|
529
|
+
* - Each download is a separate HTTPS request
|
|
530
|
+
*
|
|
531
|
+
* @param paths Array of GCS object paths to read
|
|
532
|
+
* @returns Map of path → data (only successful reads included)
|
|
533
|
+
*
|
|
534
|
+
* @public - Called by baseStorage.readBatchFromAdapter()
|
|
535
|
+
* @since v5.12.0
|
|
536
|
+
*/
|
|
537
|
+
async readBatch(paths) {
|
|
538
|
+
await this.ensureInitialized();
|
|
539
|
+
const results = new Map();
|
|
540
|
+
if (paths.length === 0)
|
|
541
|
+
return results;
|
|
542
|
+
// Get batch configuration for optimal GCS performance
|
|
543
|
+
const batchConfig = this.getBatchConfig();
|
|
544
|
+
const chunkSize = batchConfig.maxConcurrent || 100;
|
|
545
|
+
this.logger.debug(`[GCS Batch] Reading ${paths.length} objects in chunks of ${chunkSize}`);
|
|
546
|
+
// Process in chunks to respect rate limits and prevent memory issues
|
|
547
|
+
for (let i = 0; i < paths.length; i += chunkSize) {
|
|
548
|
+
const chunk = paths.slice(i, i + chunkSize);
|
|
549
|
+
this.logger.trace(`[GCS Batch] Processing chunk ${Math.floor(i / chunkSize) + 1}/${Math.ceil(paths.length / chunkSize)}`);
|
|
550
|
+
// Parallel download for this chunk
|
|
551
|
+
const chunkResults = await Promise.allSettled(chunk.map(async (path) => {
|
|
552
|
+
try {
|
|
553
|
+
const file = this.bucket.file(path);
|
|
554
|
+
const [contents] = await file.download();
|
|
555
|
+
const data = JSON.parse(contents.toString());
|
|
556
|
+
return { path, data, success: true };
|
|
557
|
+
}
|
|
558
|
+
catch (error) {
|
|
559
|
+
// Silently skip 404s (expected for missing entities)
|
|
560
|
+
if (error.code === 404) {
|
|
561
|
+
return { path, data: null, success: false };
|
|
562
|
+
}
|
|
563
|
+
// Log other errors but don't fail the batch
|
|
564
|
+
this.logger.warn(`[GCS Batch] Failed to read ${path}: ${error.message}`);
|
|
565
|
+
return { path, data: null, success: false };
|
|
566
|
+
}
|
|
567
|
+
}));
|
|
568
|
+
// Collect successful results
|
|
569
|
+
for (const result of chunkResults) {
|
|
570
|
+
if (result.status === 'fulfilled' && result.value.success && result.value.data !== null) {
|
|
571
|
+
results.set(result.value.path, result.value.data);
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
this.logger.debug(`[GCS Batch] Successfully read ${results.size}/${paths.length} objects`);
|
|
576
|
+
return results;
|
|
577
|
+
}
|
|
578
|
+
/**
|
|
579
|
+
* Get GCS-specific batch configuration (v5.12.0)
|
|
580
|
+
*
|
|
581
|
+
* GCS performs well with high concurrency due to HTTP/2 multiplexing
|
|
582
|
+
*
|
|
583
|
+
* @public - Overrides BaseStorage.getBatchConfig()
|
|
584
|
+
* @since v5.12.0
|
|
585
|
+
*/
|
|
586
|
+
getBatchConfig() {
|
|
587
|
+
return {
|
|
588
|
+
maxBatchSize: 1000, // GCS can handle large batches
|
|
589
|
+
batchDelayMs: 0, // No rate limiting needed (HTTP/2 handles it)
|
|
590
|
+
maxConcurrent: 100, // Optimal for GCS (tested up to 200)
|
|
591
|
+
supportsParallelWrites: true,
|
|
592
|
+
rateLimit: {
|
|
593
|
+
operationsPerSecond: 1000, // GCS is fast
|
|
594
|
+
burstCapacity: 5000
|
|
595
|
+
}
|
|
596
|
+
};
|
|
597
|
+
}
|
|
543
598
|
/**
|
|
544
599
|
* Delete an object from a specific path in GCS
|
|
545
600
|
* Primitive operation required by base class
|
|
@@ -107,8 +107,8 @@ export class HistoricalStorageAdapter extends BaseStorage {
|
|
|
107
107
|
if (!commit) {
|
|
108
108
|
throw new Error(`Commit not found: ${this.commitId}`);
|
|
109
109
|
}
|
|
110
|
-
//
|
|
111
|
-
|
|
110
|
+
// v6.0.0: Initialize GraphAdjacencyIndex and type statistics
|
|
111
|
+
await super.init();
|
|
112
112
|
}
|
|
113
113
|
// ============= Abstract Method Implementations =============
|
|
114
114
|
/**
|
|
@@ -30,7 +30,7 @@ export declare class MemoryStorage extends BaseStorage {
|
|
|
30
30
|
getBatchConfig(): StorageBatchConfig;
|
|
31
31
|
/**
|
|
32
32
|
* Initialize the storage adapter
|
|
33
|
-
*
|
|
33
|
+
* v6.0.0: Calls super.init() to initialize GraphAdjacencyIndex and type statistics
|
|
34
34
|
*/
|
|
35
35
|
init(): Promise<void>;
|
|
36
36
|
/**
|
|
@@ -55,10 +55,10 @@ export class MemoryStorage extends BaseStorage {
|
|
|
55
55
|
}
|
|
56
56
|
/**
|
|
57
57
|
* Initialize the storage adapter
|
|
58
|
-
*
|
|
58
|
+
* v6.0.0: Calls super.init() to initialize GraphAdjacencyIndex and type statistics
|
|
59
59
|
*/
|
|
60
60
|
async init() {
|
|
61
|
-
|
|
61
|
+
await super.init();
|
|
62
62
|
}
|
|
63
63
|
// v5.4.0: Removed saveNoun_internal and getNoun_internal - using BaseStorage's type-first implementation
|
|
64
64
|
/**
|
|
@@ -248,25 +248,23 @@ export class MemoryStorage extends BaseStorage {
|
|
|
248
248
|
* Initialize counts from in-memory storage - O(1) operation (v4.0.0)
|
|
249
249
|
*/
|
|
250
250
|
async initializeCounts() {
|
|
251
|
-
//
|
|
251
|
+
// v6.0.0: Scan objectStore paths (ID-first structure) to count entities
|
|
252
252
|
this.entityCounts.clear();
|
|
253
253
|
this.verbCounts.clear();
|
|
254
254
|
let totalNouns = 0;
|
|
255
255
|
let totalVerbs = 0;
|
|
256
256
|
// Scan all paths in objectStore
|
|
257
257
|
for (const path of this.objectStore.keys()) {
|
|
258
|
-
// Count nouns
|
|
259
|
-
const nounMatch = path.match(/^entities\/nouns\/
|
|
258
|
+
// Count nouns (entities/nouns/{shard}/{id}/vectors.json)
|
|
259
|
+
const nounMatch = path.match(/^entities\/nouns\/[0-9a-f]{2}\/[^/]+\/vectors\.json$/);
|
|
260
260
|
if (nounMatch) {
|
|
261
|
-
|
|
262
|
-
this.entityCounts.set(type, (this.entityCounts.get(type) || 0) + 1);
|
|
261
|
+
// v6.0.0: Type is in metadata, not path - just count total
|
|
263
262
|
totalNouns++;
|
|
264
263
|
}
|
|
265
|
-
// Count verbs
|
|
266
|
-
const verbMatch = path.match(/^entities\/verbs\/
|
|
264
|
+
// Count verbs (entities/verbs/{shard}/{id}/vectors.json)
|
|
265
|
+
const verbMatch = path.match(/^entities\/verbs\/[0-9a-f]{2}\/[^/]+\/vectors\.json$/);
|
|
267
266
|
if (verbMatch) {
|
|
268
|
-
|
|
269
|
-
this.verbCounts.set(type, (this.verbCounts.get(type) || 0) + 1);
|
|
267
|
+
// v6.0.0: Type is in metadata, not path - just count total
|
|
270
268
|
totalVerbs++;
|
|
271
269
|
}
|
|
272
270
|
}
|
|
@@ -131,7 +131,8 @@ export class OPFSStorage extends BaseStorage {
|
|
|
131
131
|
});
|
|
132
132
|
// Initialize counts from storage
|
|
133
133
|
await this.initializeCounts();
|
|
134
|
-
|
|
134
|
+
// v6.0.0: Initialize GraphAdjacencyIndex and type statistics
|
|
135
|
+
await super.init();
|
|
135
136
|
}
|
|
136
137
|
catch (error) {
|
|
137
138
|
console.error('Failed to initialize OPFS storage:', error);
|
|
@@ -83,22 +83,33 @@ export declare class R2Storage extends BaseStorage {
|
|
|
83
83
|
readOnly?: boolean;
|
|
84
84
|
});
|
|
85
85
|
/**
|
|
86
|
-
* Get R2-optimized batch configuration
|
|
86
|
+
* Get R2-optimized batch configuration with native batch API support
|
|
87
87
|
*
|
|
88
|
-
*
|
|
89
|
-
* -
|
|
90
|
-
* -
|
|
91
|
-
* -
|
|
88
|
+
* R2 excels at parallel operations with Cloudflare's global edge network:
|
|
89
|
+
* - Very large batch sizes (up to 1000 paths)
|
|
90
|
+
* - Zero delay (Cloudflare handles rate limiting automatically)
|
|
91
|
+
* - High concurrency (150 parallel optimal, R2 has no egress fees)
|
|
92
92
|
*
|
|
93
|
-
* R2
|
|
94
|
-
*
|
|
95
|
-
* - Parallel processing
|
|
96
|
-
* - Short delays (50ms)
|
|
93
|
+
* R2 supports very high throughput (~6000+ ops/sec with burst up to 12,000)
|
|
94
|
+
* Zero egress fees enable aggressive caching and parallel downloads
|
|
97
95
|
*
|
|
98
96
|
* @returns R2-optimized batch configuration
|
|
99
|
-
* @since
|
|
97
|
+
* @since v5.12.0 - Updated for native batch API
|
|
100
98
|
*/
|
|
101
99
|
getBatchConfig(): StorageBatchConfig;
|
|
100
|
+
/**
|
|
101
|
+
* Batch read operation using R2's S3-compatible parallel download
|
|
102
|
+
*
|
|
103
|
+
* Uses Promise.allSettled() for maximum parallelism with GetObjectCommand.
|
|
104
|
+
* R2's global edge network and zero egress fees make this extremely efficient.
|
|
105
|
+
*
|
|
106
|
+
* Performance: ~150 concurrent requests = <400ms for 150 objects (faster than S3)
|
|
107
|
+
*
|
|
108
|
+
* @param paths - Array of R2 object keys to read
|
|
109
|
+
* @returns Map of path -> parsed JSON data (only successful reads)
|
|
110
|
+
* @since v5.12.0
|
|
111
|
+
*/
|
|
112
|
+
readBatch(paths: string[]): Promise<Map<string, any>>;
|
|
102
113
|
/**
|
|
103
114
|
* Initialize the storage adapter
|
|
104
115
|
*/
|
|
@@ -102,33 +102,88 @@ export class R2Storage extends BaseStorage {
|
|
|
102
102
|
}
|
|
103
103
|
}
|
|
104
104
|
/**
|
|
105
|
-
* Get R2-optimized batch configuration
|
|
105
|
+
* Get R2-optimized batch configuration with native batch API support
|
|
106
106
|
*
|
|
107
|
-
*
|
|
108
|
-
* -
|
|
109
|
-
* -
|
|
110
|
-
* -
|
|
107
|
+
* R2 excels at parallel operations with Cloudflare's global edge network:
|
|
108
|
+
* - Very large batch sizes (up to 1000 paths)
|
|
109
|
+
* - Zero delay (Cloudflare handles rate limiting automatically)
|
|
110
|
+
* - High concurrency (150 parallel optimal, R2 has no egress fees)
|
|
111
111
|
*
|
|
112
|
-
* R2
|
|
113
|
-
*
|
|
114
|
-
* - Parallel processing
|
|
115
|
-
* - Short delays (50ms)
|
|
112
|
+
* R2 supports very high throughput (~6000+ ops/sec with burst up to 12,000)
|
|
113
|
+
* Zero egress fees enable aggressive caching and parallel downloads
|
|
116
114
|
*
|
|
117
115
|
* @returns R2-optimized batch configuration
|
|
118
|
-
* @since
|
|
116
|
+
* @since v5.12.0 - Updated for native batch API
|
|
119
117
|
*/
|
|
120
118
|
getBatchConfig() {
|
|
121
119
|
return {
|
|
122
|
-
maxBatchSize:
|
|
123
|
-
batchDelayMs:
|
|
124
|
-
maxConcurrent:
|
|
125
|
-
supportsParallelWrites: true, // R2
|
|
120
|
+
maxBatchSize: 1000, // R2 can handle very large batches
|
|
121
|
+
batchDelayMs: 0, // No artificial delay needed
|
|
122
|
+
maxConcurrent: 150, // Optimal for R2's global network
|
|
123
|
+
supportsParallelWrites: true, // R2 excels at parallel operations
|
|
126
124
|
rateLimit: {
|
|
127
|
-
operationsPerSecond:
|
|
128
|
-
burstCapacity:
|
|
125
|
+
operationsPerSecond: 6000, // R2 has excellent throughput
|
|
126
|
+
burstCapacity: 12000 // High burst capacity
|
|
129
127
|
}
|
|
130
128
|
};
|
|
131
129
|
}
|
|
130
|
+
/**
|
|
131
|
+
* Batch read operation using R2's S3-compatible parallel download
|
|
132
|
+
*
|
|
133
|
+
* Uses Promise.allSettled() for maximum parallelism with GetObjectCommand.
|
|
134
|
+
* R2's global edge network and zero egress fees make this extremely efficient.
|
|
135
|
+
*
|
|
136
|
+
* Performance: ~150 concurrent requests = <400ms for 150 objects (faster than S3)
|
|
137
|
+
*
|
|
138
|
+
* @param paths - Array of R2 object keys to read
|
|
139
|
+
* @returns Map of path -> parsed JSON data (only successful reads)
|
|
140
|
+
* @since v5.12.0
|
|
141
|
+
*/
|
|
142
|
+
async readBatch(paths) {
|
|
143
|
+
await this.ensureInitialized();
|
|
144
|
+
const results = new Map();
|
|
145
|
+
if (paths.length === 0)
|
|
146
|
+
return results;
|
|
147
|
+
const batchConfig = this.getBatchConfig();
|
|
148
|
+
const chunkSize = batchConfig.maxConcurrent || 150;
|
|
149
|
+
this.logger.debug(`[R2 Batch] Reading ${paths.length} objects in chunks of ${chunkSize}`);
|
|
150
|
+
// Import GetObjectCommand (R2 uses S3-compatible API)
|
|
151
|
+
const { GetObjectCommand } = await import('@aws-sdk/client-s3');
|
|
152
|
+
// Process in chunks to respect concurrency limits
|
|
153
|
+
for (let i = 0; i < paths.length; i += chunkSize) {
|
|
154
|
+
const chunk = paths.slice(i, i + chunkSize);
|
|
155
|
+
// Parallel download for this chunk
|
|
156
|
+
const chunkResults = await Promise.allSettled(chunk.map(async (path) => {
|
|
157
|
+
try {
|
|
158
|
+
const response = await this.s3Client.send(new GetObjectCommand({
|
|
159
|
+
Bucket: this.bucketName,
|
|
160
|
+
Key: path
|
|
161
|
+
}));
|
|
162
|
+
if (!response || !response.Body) {
|
|
163
|
+
return { path, data: null, success: false };
|
|
164
|
+
}
|
|
165
|
+
const bodyContents = await response.Body.transformToString();
|
|
166
|
+
const data = JSON.parse(bodyContents);
|
|
167
|
+
return { path, data, success: true };
|
|
168
|
+
}
|
|
169
|
+
catch (error) {
|
|
170
|
+
// 404 and other errors are expected (not all paths may exist)
|
|
171
|
+
if (error.name !== 'NoSuchKey' && error.$metadata?.httpStatusCode !== 404) {
|
|
172
|
+
this.logger.warn(`[R2 Batch] Failed to read ${path}: ${error.message}`);
|
|
173
|
+
}
|
|
174
|
+
return { path, data: null, success: false };
|
|
175
|
+
}
|
|
176
|
+
}));
|
|
177
|
+
// Collect successful results
|
|
178
|
+
for (const result of chunkResults) {
|
|
179
|
+
if (result.status === 'fulfilled' && result.value.success && result.value.data !== null) {
|
|
180
|
+
results.set(result.value.path, result.value.data);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
this.logger.debug(`[R2 Batch] Successfully read ${results.size}/${paths.length} objects`);
|
|
185
|
+
return results;
|
|
186
|
+
}
|
|
132
187
|
/**
|
|
133
188
|
* Initialize the storage adapter
|
|
134
189
|
*/
|
|
@@ -177,7 +232,8 @@ export class R2Storage extends BaseStorage {
|
|
|
177
232
|
prodLog.info('🧹 R2: Clearing cache from previous run');
|
|
178
233
|
this.nounCacheManager.clear();
|
|
179
234
|
this.verbCacheManager.clear();
|
|
180
|
-
|
|
235
|
+
// v6.0.0: Initialize GraphAdjacencyIndex and type statistics
|
|
236
|
+
await super.init();
|
|
181
237
|
}
|
|
182
238
|
catch (error) {
|
|
183
239
|
this.logger.error('Failed to initialize R2 storage:', error);
|
|
@@ -104,19 +104,32 @@ export declare class S3CompatibleStorage extends BaseStorage {
|
|
|
104
104
|
readOnly?: boolean;
|
|
105
105
|
});
|
|
106
106
|
/**
|
|
107
|
-
* Get S3-optimized batch configuration
|
|
107
|
+
* Get S3-optimized batch configuration with native batch API support
|
|
108
108
|
*
|
|
109
|
-
* S3 has
|
|
110
|
-
* -
|
|
111
|
-
* -
|
|
112
|
-
* -
|
|
109
|
+
* S3 has excellent throughput and handles parallel operations efficiently:
|
|
110
|
+
* - Large batch sizes (up to 1000 paths)
|
|
111
|
+
* - No artificial delay needed (S3 handles load automatically)
|
|
112
|
+
* - High concurrency (150 parallel requests optimal for most workloads)
|
|
113
113
|
*
|
|
114
|
-
* S3
|
|
114
|
+
* S3 supports ~5000 operations/second with burst capacity up to 10,000
|
|
115
115
|
*
|
|
116
116
|
* @returns S3-optimized batch configuration
|
|
117
|
-
* @since
|
|
117
|
+
* @since v5.12.0 - Updated for native batch API
|
|
118
118
|
*/
|
|
119
119
|
getBatchConfig(): StorageBatchConfig;
|
|
120
|
+
/**
|
|
121
|
+
* Batch read operation using S3's parallel download capabilities
|
|
122
|
+
*
|
|
123
|
+
* Uses Promise.allSettled() for maximum parallelism with GetObjectCommand.
|
|
124
|
+
* S3's HTTP/2 and connection pooling make this extremely efficient.
|
|
125
|
+
*
|
|
126
|
+
* Performance: ~150 concurrent requests = <500ms for 150 objects
|
|
127
|
+
*
|
|
128
|
+
* @param paths - Array of S3 object keys to read
|
|
129
|
+
* @returns Map of path -> parsed JSON data (only successful reads)
|
|
130
|
+
* @since v5.12.0
|
|
131
|
+
*/
|
|
132
|
+
readBatch(paths: string[]): Promise<Map<string, any>>;
|
|
120
133
|
/**
|
|
121
134
|
* Initialize the storage adapter
|
|
122
135
|
*/
|