@soulcraft/brainy 3.23.0 → 3.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/dist/coreTypes.d.ts +6 -0
- package/dist/storage/adapters/baseStorageAdapter.d.ts +1 -0
- package/dist/storage/adapters/fileSystemStorage.d.ts +65 -8
- package/dist/storage/adapters/fileSystemStorage.js +500 -220
- package/dist/storage/adapters/opfsStorage.js +1 -1
- package/dist/utils/metadataIndex.js +1 -1
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,11 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
|
|
4
4
|
|
|
5
|
+
### [3.24.0](https://github.com/soulcraftlabs/brainy/compare/v3.23.1...v3.24.0) (2025-10-07)
|
|
6
|
+
|
|
7
|
+
- feat: simplify sharding to fixed depth-1 for reliability and performance (87515b9)
|
|
8
|
+
|
|
9
|
+
|
|
5
10
|
### [3.23.0](https://github.com/soulcraftlabs/brainy/compare/v3.22.0...v3.23.0) (2025-10-04)
|
|
6
11
|
|
|
7
12
|
- refactor: streamline core API surface
|
package/dist/coreTypes.d.ts
CHANGED
|
@@ -419,6 +419,12 @@ export interface StorageAdapter {
|
|
|
419
419
|
* @returns Promise that resolves to a Map of id -> metadata
|
|
420
420
|
*/
|
|
421
421
|
getMetadataBatch?(ids: string[]): Promise<Map<string, any>>;
|
|
422
|
+
/**
|
|
423
|
+
* Get noun metadata from storage
|
|
424
|
+
* @param id The ID of the noun
|
|
425
|
+
* @returns Promise that resolves to the metadata or null if not found
|
|
426
|
+
*/
|
|
427
|
+
getNounMetadata(id: string): Promise<any | null>;
|
|
422
428
|
/**
|
|
423
429
|
* Save verb metadata to storage
|
|
424
430
|
* @param id The ID of the verb
|
|
@@ -20,6 +20,7 @@ export declare abstract class BaseStorageAdapter implements StorageAdapter {
|
|
|
20
20
|
abstract deleteVerb(id: string): Promise<void>;
|
|
21
21
|
abstract saveMetadata(id: string, metadata: any): Promise<void>;
|
|
22
22
|
abstract getMetadata(id: string): Promise<any | null>;
|
|
23
|
+
abstract getNounMetadata(id: string): Promise<any | null>;
|
|
23
24
|
abstract saveVerbMetadata(id: string, metadata: any): Promise<void>;
|
|
24
25
|
abstract getVerbMetadata(id: string): Promise<any | null>;
|
|
25
26
|
abstract clear(): Promise<void>;
|
|
@@ -12,9 +12,9 @@ type Edge = HNSWVerb;
|
|
|
12
12
|
*/
|
|
13
13
|
export declare class FileSystemStorage extends BaseStorage {
|
|
14
14
|
private countsFilePath?;
|
|
15
|
-
private readonly
|
|
16
|
-
private readonly
|
|
17
|
-
private cachedShardingDepth
|
|
15
|
+
private readonly SHARDING_DEPTH;
|
|
16
|
+
private readonly MAX_SHARDS;
|
|
17
|
+
private cachedShardingDepth;
|
|
18
18
|
private rootDir;
|
|
19
19
|
private nounsDir;
|
|
20
20
|
private verbsDir;
|
|
@@ -270,8 +270,55 @@ export declare class FileSystemStorage extends BaseStorage {
|
|
|
270
270
|
*/
|
|
271
271
|
protected persistCounts(): Promise<void>;
|
|
272
272
|
/**
|
|
273
|
-
*
|
|
274
|
-
*
|
|
273
|
+
* Migrate files from one sharding depth to another
|
|
274
|
+
* Handles: 0→1 (flat to single-level), 2→1 (deep to single-level)
|
|
275
|
+
* Uses atomic file operations and comprehensive error handling
|
|
276
|
+
*
|
|
277
|
+
* @param fromDepth - Source sharding depth
|
|
278
|
+
* @param toDepth - Target sharding depth (must be 1)
|
|
279
|
+
*/
|
|
280
|
+
private migrateShardingStructure;
|
|
281
|
+
/**
|
|
282
|
+
* Discover all files that need to be migrated
|
|
283
|
+
* Constructs correct oldPath based on source depth
|
|
284
|
+
*/
|
|
285
|
+
private discoverFilesForMigration;
|
|
286
|
+
/**
|
|
287
|
+
* Get all files at a specific depth
|
|
288
|
+
*/
|
|
289
|
+
private getAllFilesAtDepth;
|
|
290
|
+
/**
|
|
291
|
+
* Create all 256 shard directories (00-ff)
|
|
292
|
+
*/
|
|
293
|
+
private createAllShardDirectories;
|
|
294
|
+
/**
|
|
295
|
+
* Migrate a single file atomically
|
|
296
|
+
*/
|
|
297
|
+
private migrateFile;
|
|
298
|
+
/**
|
|
299
|
+
* Clean up empty directories after migration
|
|
300
|
+
*/
|
|
301
|
+
private cleanupEmptyDirectories;
|
|
302
|
+
/**
|
|
303
|
+
* Count files in the current structure
|
|
304
|
+
*/
|
|
305
|
+
private countFilesInStructure;
|
|
306
|
+
/**
|
|
307
|
+
* Detect the actual sharding depth used by existing files
|
|
308
|
+
* Examines directory structure to determine current sharding strategy
|
|
309
|
+
* Returns null if no files exist yet (new installation)
|
|
310
|
+
*/
|
|
311
|
+
private detectExistingShardingDepth;
|
|
312
|
+
/**
|
|
313
|
+
* Get sharding depth
|
|
314
|
+
* Always returns 1 (single-level sharding) for optimal balance of
|
|
315
|
+
* simplicity, performance, and reliability across all dataset sizes
|
|
316
|
+
*
|
|
317
|
+
* Single-level sharding (depth=1):
|
|
318
|
+
* - 256 shard directories (00-ff)
|
|
319
|
+
* - Handles 2.5M+ entities with excellent performance
|
|
320
|
+
* - No dynamic depth changes = no path mismatch bugs
|
|
321
|
+
* - Industry standard approach (Git uses similar)
|
|
275
322
|
*/
|
|
276
323
|
private getOptimalShardingDepth;
|
|
277
324
|
/**
|
|
@@ -285,12 +332,17 @@ export declare class FileSystemStorage extends BaseStorage {
|
|
|
285
332
|
private getVerbPath;
|
|
286
333
|
/**
|
|
287
334
|
* Universal sharded path generator
|
|
288
|
-
*
|
|
335
|
+
* Always uses depth=1 (single-level sharding) for consistency
|
|
336
|
+
*
|
|
337
|
+
* Format: baseDir/ab/uuid.json
|
|
338
|
+
* Where 'ab' = first 2 hex characters of UUID (lowercase)
|
|
339
|
+
*
|
|
340
|
+
* Validates UUID format and throws descriptive errors
|
|
289
341
|
*/
|
|
290
342
|
private getShardedPath;
|
|
291
343
|
/**
|
|
292
|
-
* Get all JSON files from
|
|
293
|
-
*
|
|
344
|
+
* Get all JSON files from the single-level sharded directory structure
|
|
345
|
+
* Traverses all shard subdirectories (00-ff)
|
|
294
346
|
*/
|
|
295
347
|
private getAllShardedFiles;
|
|
296
348
|
/**
|
|
@@ -302,6 +354,11 @@ export declare class FileSystemStorage extends BaseStorage {
|
|
|
302
354
|
* Stream through sharded files without loading all names into memory
|
|
303
355
|
* Production-scale implementation for millions of files
|
|
304
356
|
*/
|
|
357
|
+
/**
|
|
358
|
+
* Stream through files in single-level sharded structure
|
|
359
|
+
* Calls processor for each file until processor returns false
|
|
360
|
+
* Returns true if more files exist (processor stopped early), false if all processed
|
|
361
|
+
*/
|
|
305
362
|
private streamShardedFiles;
|
|
306
363
|
/**
|
|
307
364
|
* Check if a file exists (handles both sharded and non-sharded)
|
|
@@ -37,9 +37,15 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
37
37
|
*/
|
|
38
38
|
constructor(rootDirectory) {
|
|
39
39
|
super();
|
|
40
|
-
//
|
|
41
|
-
|
|
42
|
-
|
|
40
|
+
// Fixed sharding configuration for optimal balance of simplicity and performance
|
|
41
|
+
// Single-level sharding (depth=1) provides excellent performance for 1-2.5M entities
|
|
42
|
+
// Structure: nouns/ab/uuid.json where 'ab' = first 2 hex chars of UUID
|
|
43
|
+
// - 256 shard directories (00-ff)
|
|
44
|
+
// - Handles 2.5M+ entities with < 10K files per shard
|
|
45
|
+
// - Eliminates dynamic depth changes that cause path mismatch bugs
|
|
46
|
+
this.SHARDING_DEPTH = 1;
|
|
47
|
+
this.MAX_SHARDS = 256; // Hex range: 00-ff
|
|
48
|
+
this.cachedShardingDepth = this.SHARDING_DEPTH; // Always use fixed depth
|
|
43
49
|
this.useDualWrite = true; // Write to both locations during migration
|
|
44
50
|
this.activeLocks = new Set();
|
|
45
51
|
this.lockTimers = new Map(); // Track timers for cleanup
|
|
@@ -100,11 +106,27 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
100
106
|
// Initialize count management
|
|
101
107
|
this.countsFilePath = path.join(this.systemDir, 'counts.json');
|
|
102
108
|
await this.initializeCounts();
|
|
103
|
-
//
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
109
|
+
// Detect existing sharding structure and migrate if needed
|
|
110
|
+
const detectedDepth = await this.detectExistingShardingDepth();
|
|
111
|
+
if (detectedDepth !== null && detectedDepth !== this.SHARDING_DEPTH) {
|
|
112
|
+
// Migration needed: existing structure doesn't match our fixed depth
|
|
113
|
+
console.log(`📦 Brainy Storage Migration`);
|
|
114
|
+
console.log(` Current structure: depth ${detectedDepth}`);
|
|
115
|
+
console.log(` Target structure: depth ${this.SHARDING_DEPTH}`);
|
|
116
|
+
console.log(` Entities to migrate: ${this.totalNounCount}`);
|
|
117
|
+
await this.migrateShardingStructure(detectedDepth, this.SHARDING_DEPTH);
|
|
118
|
+
console.log(`✅ Migration complete - now using depth ${this.SHARDING_DEPTH} sharding`);
|
|
119
|
+
}
|
|
120
|
+
else if (detectedDepth === null) {
|
|
121
|
+
// New installation
|
|
122
|
+
console.log(`📁 New installation: using depth ${this.SHARDING_DEPTH} sharding (optimal for 1-2.5M entities)`);
|
|
123
|
+
}
|
|
124
|
+
else {
|
|
125
|
+
// Already using correct depth
|
|
126
|
+
console.log(`📁 Using depth ${this.SHARDING_DEPTH} sharding (${this.totalNounCount} entities)`);
|
|
127
|
+
}
|
|
128
|
+
// Always use fixed depth after migration/detection
|
|
129
|
+
this.cachedShardingDepth = this.SHARDING_DEPTH;
|
|
108
130
|
this.isInitialized = true;
|
|
109
131
|
}
|
|
110
132
|
catch (error) {
|
|
@@ -464,7 +486,7 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
464
486
|
const batch = ids.slice(i, i + batchSize);
|
|
465
487
|
const batchPromises = batch.map(async (id) => {
|
|
466
488
|
try {
|
|
467
|
-
const metadata = await this.
|
|
489
|
+
const metadata = await this.getNounMetadata(id);
|
|
468
490
|
return { id, metadata };
|
|
469
491
|
}
|
|
470
492
|
catch (error) {
|
|
@@ -1372,11 +1394,15 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
1372
1394
|
*/
|
|
1373
1395
|
async initializeCountsFromDisk() {
|
|
1374
1396
|
try {
|
|
1375
|
-
//
|
|
1376
|
-
|
|
1397
|
+
// CRITICAL: Detect existing depth before counting
|
|
1398
|
+
// Can't use getAllShardedFiles() which assumes depth=1
|
|
1399
|
+
const existingDepth = await this.detectExistingShardingDepth();
|
|
1400
|
+
const depthToUse = existingDepth !== null ? existingDepth : this.SHARDING_DEPTH;
|
|
1401
|
+
// Count nouns using detected depth
|
|
1402
|
+
const validNounFiles = await this.getAllFilesAtDepth(this.nounsDir, depthToUse);
|
|
1377
1403
|
this.totalNounCount = validNounFiles.length;
|
|
1378
|
-
// Count verbs
|
|
1379
|
-
const validVerbFiles = await this.
|
|
1404
|
+
// Count verbs using detected depth
|
|
1405
|
+
const validVerbFiles = await this.getAllFilesAtDepth(this.verbsDir, depthToUse);
|
|
1380
1406
|
this.totalVerbCount = validVerbFiles.length;
|
|
1381
1407
|
// Sample some files to get type distribution (don't read all)
|
|
1382
1408
|
const sampleSize = Math.min(100, validNounFiles.length);
|
|
@@ -1384,7 +1410,22 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
1384
1410
|
try {
|
|
1385
1411
|
const file = validNounFiles[i];
|
|
1386
1412
|
const id = file.replace('.json', '');
|
|
1387
|
-
|
|
1413
|
+
// Construct path using detected depth (not cached depth which may be wrong)
|
|
1414
|
+
let filePath;
|
|
1415
|
+
switch (depthToUse) {
|
|
1416
|
+
case 0:
|
|
1417
|
+
filePath = path.join(this.nounsDir, `${id}.json`);
|
|
1418
|
+
break;
|
|
1419
|
+
case 1:
|
|
1420
|
+
filePath = path.join(this.nounsDir, id.substring(0, 2), `${id}.json`);
|
|
1421
|
+
break;
|
|
1422
|
+
case 2:
|
|
1423
|
+
filePath = path.join(this.nounsDir, id.substring(0, 2), id.substring(2, 4), `${id}.json`);
|
|
1424
|
+
break;
|
|
1425
|
+
default:
|
|
1426
|
+
throw new Error(`Unsupported depth: ${depthToUse}`);
|
|
1427
|
+
}
|
|
1428
|
+
const data = await fs.promises.readFile(filePath, 'utf-8');
|
|
1388
1429
|
const noun = JSON.parse(data);
|
|
1389
1430
|
const type = noun.metadata?.type || noun.metadata?.nounType || 'default';
|
|
1390
1431
|
this.entityCounts.set(type, (this.entityCounts.get(type) || 0) + 1);
|
|
@@ -1430,154 +1471,457 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
1430
1471
|
// Intelligent Directory Sharding
|
|
1431
1472
|
// =============================================
|
|
1432
1473
|
/**
|
|
1433
|
-
*
|
|
1434
|
-
*
|
|
1474
|
+
* Migrate files from one sharding depth to another
|
|
1475
|
+
* Handles: 0→1 (flat to single-level), 2→1 (deep to single-level)
|
|
1476
|
+
* Uses atomic file operations and comprehensive error handling
|
|
1477
|
+
*
|
|
1478
|
+
* @param fromDepth - Source sharding depth
|
|
1479
|
+
* @param toDepth - Target sharding depth (must be 1)
|
|
1435
1480
|
*/
|
|
1436
|
-
|
|
1437
|
-
//
|
|
1438
|
-
if (
|
|
1439
|
-
|
|
1440
|
-
}
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1481
|
+
async migrateShardingStructure(fromDepth, toDepth) {
|
|
1482
|
+
// Validation
|
|
1483
|
+
if (fromDepth === toDepth) {
|
|
1484
|
+
throw new Error(`Migration not needed: already at depth ${toDepth}`);
|
|
1485
|
+
}
|
|
1486
|
+
if (toDepth !== 1) {
|
|
1487
|
+
throw new Error(`Migration only supports target depth 1 (got ${toDepth})`);
|
|
1488
|
+
}
|
|
1489
|
+
if (fromDepth !== 0 && fromDepth !== 2) {
|
|
1490
|
+
throw new Error(`Migration only supports source depth 0 or 2 (got ${fromDepth})`);
|
|
1491
|
+
}
|
|
1492
|
+
// Create migration lock to prevent concurrent migrations
|
|
1493
|
+
const lockFile = path.join(this.systemDir, '.migration-lock');
|
|
1494
|
+
const lockExists = await this.fileExists(lockFile);
|
|
1495
|
+
if (lockExists) {
|
|
1496
|
+
// Check if lock is stale (> 1 hour old)
|
|
1497
|
+
try {
|
|
1498
|
+
const stats = await fs.promises.stat(lockFile);
|
|
1499
|
+
const lockAge = Date.now() - stats.mtimeMs;
|
|
1500
|
+
const ONE_HOUR = 60 * 60 * 1000;
|
|
1501
|
+
if (lockAge < ONE_HOUR) {
|
|
1502
|
+
throw new Error('Migration already in progress. If this is incorrect, delete .migration-lock file.');
|
|
1503
|
+
}
|
|
1504
|
+
// Lock is stale, remove it
|
|
1505
|
+
console.log('⚠️ Removing stale migration lock (> 1 hour old)');
|
|
1506
|
+
await fs.promises.unlink(lockFile);
|
|
1507
|
+
}
|
|
1508
|
+
catch (error) {
|
|
1509
|
+
if (error.code !== 'ENOENT') {
|
|
1510
|
+
throw error;
|
|
1511
|
+
}
|
|
1512
|
+
}
|
|
1444
1513
|
}
|
|
1445
|
-
|
|
1446
|
-
|
|
1514
|
+
try {
|
|
1515
|
+
// Create lock file
|
|
1516
|
+
await fs.promises.writeFile(lockFile, JSON.stringify({
|
|
1517
|
+
startedAt: new Date().toISOString(),
|
|
1518
|
+
fromDepth,
|
|
1519
|
+
toDepth,
|
|
1520
|
+
pid: process.pid
|
|
1521
|
+
}));
|
|
1522
|
+
// Discover all files to migrate
|
|
1523
|
+
console.log('📊 Discovering files to migrate...');
|
|
1524
|
+
const filesToMigrate = await this.discoverFilesForMigration(fromDepth);
|
|
1525
|
+
if (filesToMigrate.length === 0) {
|
|
1526
|
+
console.log('ℹ️ No files to migrate');
|
|
1527
|
+
return;
|
|
1528
|
+
}
|
|
1529
|
+
console.log(`📦 Migrating ${filesToMigrate.length} files...`);
|
|
1530
|
+
// Create all target shard directories upfront
|
|
1531
|
+
await this.createAllShardDirectories(this.nounsDir);
|
|
1532
|
+
await this.createAllShardDirectories(this.verbsDir);
|
|
1533
|
+
// Migrate files with progress tracking
|
|
1534
|
+
let migratedCount = 0;
|
|
1535
|
+
let skippedCount = 0;
|
|
1536
|
+
const errors = [];
|
|
1537
|
+
for (const fileInfo of filesToMigrate) {
|
|
1538
|
+
try {
|
|
1539
|
+
await this.migrateFile(fileInfo, fromDepth, toDepth);
|
|
1540
|
+
migratedCount++;
|
|
1541
|
+
// Progress update every 1000 files
|
|
1542
|
+
if (migratedCount % 1000 === 0) {
|
|
1543
|
+
const percent = ((migratedCount / filesToMigrate.length) * 100).toFixed(1);
|
|
1544
|
+
console.log(` 📊 Progress: ${migratedCount}/${filesToMigrate.length} (${percent}%)`);
|
|
1545
|
+
}
|
|
1546
|
+
// Yield to event loop every 100 files to prevent blocking
|
|
1547
|
+
if (migratedCount % 100 === 0) {
|
|
1548
|
+
await new Promise(resolve => setImmediate(resolve));
|
|
1549
|
+
}
|
|
1550
|
+
}
|
|
1551
|
+
catch (error) {
|
|
1552
|
+
skippedCount++;
|
|
1553
|
+
errors.push({
|
|
1554
|
+
file: fileInfo.oldPath,
|
|
1555
|
+
error: error.message
|
|
1556
|
+
});
|
|
1557
|
+
// Log first few errors
|
|
1558
|
+
if (errors.length <= 5) {
|
|
1559
|
+
console.warn(`⚠️ Skipped ${fileInfo.oldPath}: ${error.message}`);
|
|
1560
|
+
}
|
|
1561
|
+
}
|
|
1562
|
+
}
|
|
1563
|
+
// Final summary
|
|
1564
|
+
console.log(`\n✅ Migration Results:`);
|
|
1565
|
+
console.log(` Migrated: ${migratedCount} files`);
|
|
1566
|
+
console.log(` Skipped: ${skippedCount} files`);
|
|
1567
|
+
if (errors.length > 0) {
|
|
1568
|
+
console.warn(`\n⚠️ ${errors.length} files could not be migrated`);
|
|
1569
|
+
if (errors.length > 5) {
|
|
1570
|
+
console.warn(` (First 5 errors shown above, ${errors.length - 5} more occurred)`);
|
|
1571
|
+
}
|
|
1572
|
+
}
|
|
1573
|
+
// Cleanup: Remove empty old directories
|
|
1574
|
+
if (fromDepth === 0) {
|
|
1575
|
+
// No subdirectories to clean for flat structure
|
|
1576
|
+
}
|
|
1577
|
+
else if (fromDepth === 2) {
|
|
1578
|
+
await this.cleanupEmptyDirectories(this.nounsDir, fromDepth);
|
|
1579
|
+
await this.cleanupEmptyDirectories(this.verbsDir, fromDepth);
|
|
1580
|
+
}
|
|
1581
|
+
// Verification: Count files in new structure
|
|
1582
|
+
const verifyCount = await this.countFilesInStructure(toDepth);
|
|
1583
|
+
console.log(`\n🔍 Verification: ${verifyCount} files in new structure`);
|
|
1584
|
+
if (verifyCount < migratedCount) {
|
|
1585
|
+
console.warn(`⚠️ Warning: Verification count (${verifyCount}) < migrated count (${migratedCount})`);
|
|
1586
|
+
}
|
|
1447
1587
|
}
|
|
1448
|
-
|
|
1449
|
-
|
|
1588
|
+
finally {
|
|
1589
|
+
// Always remove lock file
|
|
1590
|
+
try {
|
|
1591
|
+
await fs.promises.unlink(lockFile);
|
|
1592
|
+
}
|
|
1593
|
+
catch (error) {
|
|
1594
|
+
// Ignore error if lock file doesn't exist
|
|
1595
|
+
}
|
|
1450
1596
|
}
|
|
1451
1597
|
}
|
|
1452
1598
|
/**
|
|
1453
|
-
*
|
|
1454
|
-
*
|
|
1599
|
+
* Discover all files that need to be migrated
|
|
1600
|
+
* Constructs correct oldPath based on source depth
|
|
1455
1601
|
*/
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1602
|
+
async discoverFilesForMigration(fromDepth) {
|
|
1603
|
+
const files = [];
|
|
1604
|
+
// Discover noun files
|
|
1605
|
+
const nounFiles = await this.getAllFilesAtDepth(this.nounsDir, fromDepth);
|
|
1606
|
+
for (const filename of nounFiles) {
|
|
1607
|
+
const id = filename.replace('.json', '');
|
|
1608
|
+
// Construct correct oldPath based on fromDepth
|
|
1609
|
+
let oldPath;
|
|
1610
|
+
switch (fromDepth) {
|
|
1611
|
+
case 0:
|
|
1612
|
+
// Flat: nouns/uuid.json
|
|
1613
|
+
oldPath = path.join(this.nounsDir, `${id}.json`);
|
|
1614
|
+
break;
|
|
1615
|
+
case 1:
|
|
1616
|
+
// Single-level: nouns/ab/uuid.json
|
|
1617
|
+
oldPath = path.join(this.nounsDir, id.substring(0, 2), `${id}.json`);
|
|
1618
|
+
break;
|
|
1619
|
+
case 2:
|
|
1620
|
+
// Deep: nouns/ab/cd/uuid.json
|
|
1621
|
+
oldPath = path.join(this.nounsDir, id.substring(0, 2), id.substring(2, 4), `${id}.json`);
|
|
1622
|
+
break;
|
|
1623
|
+
default:
|
|
1624
|
+
throw new Error(`Unsupported fromDepth: ${fromDepth}`);
|
|
1625
|
+
}
|
|
1626
|
+
files.push({ oldPath, id, type: 'noun' });
|
|
1627
|
+
}
|
|
1628
|
+
// Discover verb files
|
|
1629
|
+
const verbFiles = await this.getAllFilesAtDepth(this.verbsDir, fromDepth);
|
|
1630
|
+
for (const filename of verbFiles) {
|
|
1631
|
+
const id = filename.replace('.json', '');
|
|
1632
|
+
// Construct correct oldPath based on fromDepth
|
|
1633
|
+
let oldPath;
|
|
1634
|
+
switch (fromDepth) {
|
|
1635
|
+
case 0:
|
|
1636
|
+
// Flat: verbs/uuid.json
|
|
1637
|
+
oldPath = path.join(this.verbsDir, `${id}.json`);
|
|
1638
|
+
break;
|
|
1639
|
+
case 1:
|
|
1640
|
+
// Single-level: verbs/ab/uuid.json
|
|
1641
|
+
oldPath = path.join(this.verbsDir, id.substring(0, 2), `${id}.json`);
|
|
1642
|
+
break;
|
|
1643
|
+
case 2:
|
|
1644
|
+
// Deep: verbs/ab/cd/uuid.json
|
|
1645
|
+
oldPath = path.join(this.verbsDir, id.substring(0, 2), id.substring(2, 4), `${id}.json`);
|
|
1646
|
+
break;
|
|
1647
|
+
default:
|
|
1648
|
+
throw new Error(`Unsupported fromDepth: ${fromDepth}`);
|
|
1649
|
+
}
|
|
1650
|
+
files.push({ oldPath, id, type: 'verb' });
|
|
1651
|
+
}
|
|
1652
|
+
return files;
|
|
1464
1653
|
}
|
|
1465
1654
|
/**
|
|
1466
|
-
*
|
|
1467
|
-
* Consistent across all entity types
|
|
1468
|
-
*/
|
|
1469
|
-
getShardedPath(baseDir, id) {
|
|
1470
|
-
const depth = this.cachedShardingDepth ?? this.getOptimalShardingDepth();
|
|
1471
|
-
switch (depth) {
|
|
1472
|
-
case 0:
|
|
1473
|
-
// Flat structure: /nouns/uuid.json
|
|
1474
|
-
return path.join(baseDir, `${id}.json`);
|
|
1475
|
-
case 1:
|
|
1476
|
-
// Single-level sharding: /nouns/ab/uuid.json
|
|
1477
|
-
const shard1 = id.substring(0, 2);
|
|
1478
|
-
return path.join(baseDir, shard1, `${id}.json`);
|
|
1479
|
-
case 2:
|
|
1480
|
-
default:
|
|
1481
|
-
// Deep sharding: /nouns/ab/cd/uuid.json
|
|
1482
|
-
const shard1Deep = id.substring(0, 2);
|
|
1483
|
-
const shard2Deep = id.substring(2, 4);
|
|
1484
|
-
return path.join(baseDir, shard1Deep, shard2Deep, `${id}.json`);
|
|
1485
|
-
}
|
|
1486
|
-
}
|
|
1487
|
-
/**
|
|
1488
|
-
* Get all JSON files from a sharded directory structure
|
|
1489
|
-
* Properly traverses sharded subdirectories based on current sharding depth
|
|
1655
|
+
* Get all files at a specific depth
|
|
1490
1656
|
*/
|
|
1491
|
-
async
|
|
1657
|
+
async getAllFilesAtDepth(baseDir, depth) {
|
|
1492
1658
|
const allFiles = [];
|
|
1493
|
-
const depth = this.cachedShardingDepth ?? this.getOptimalShardingDepth();
|
|
1494
1659
|
try {
|
|
1660
|
+
const dirExists = await this.directoryExists(baseDir);
|
|
1661
|
+
if (!dirExists) {
|
|
1662
|
+
return [];
|
|
1663
|
+
}
|
|
1495
1664
|
switch (depth) {
|
|
1496
1665
|
case 0:
|
|
1497
|
-
// Flat
|
|
1498
|
-
const
|
|
1499
|
-
for (const
|
|
1500
|
-
if (
|
|
1501
|
-
allFiles.push(
|
|
1666
|
+
// Flat: files directly in baseDir
|
|
1667
|
+
const entries = await fs.promises.readdir(baseDir);
|
|
1668
|
+
for (const entry of entries) {
|
|
1669
|
+
if (entry.endsWith('.json')) {
|
|
1670
|
+
allFiles.push(entry);
|
|
1502
1671
|
}
|
|
1503
1672
|
}
|
|
1504
1673
|
break;
|
|
1505
1674
|
case 1:
|
|
1506
|
-
// Single-level
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
allFiles.push(file);
|
|
1518
|
-
}
|
|
1675
|
+
// Single-level: baseDir/ab/uuid.json
|
|
1676
|
+
const shardDirs = await fs.promises.readdir(baseDir);
|
|
1677
|
+
for (const shard of shardDirs) {
|
|
1678
|
+
const shardPath = path.join(baseDir, shard);
|
|
1679
|
+
try {
|
|
1680
|
+
const stat = await fs.promises.stat(shardPath);
|
|
1681
|
+
if (stat.isDirectory()) {
|
|
1682
|
+
const shardFiles = await fs.promises.readdir(shardPath);
|
|
1683
|
+
for (const file of shardFiles) {
|
|
1684
|
+
if (file.endsWith('.json')) {
|
|
1685
|
+
allFiles.push(file);
|
|
1519
1686
|
}
|
|
1520
1687
|
}
|
|
1521
1688
|
}
|
|
1522
|
-
catch (shardError) {
|
|
1523
|
-
// Skip inaccessible shard directories
|
|
1524
|
-
continue;
|
|
1525
|
-
}
|
|
1526
1689
|
}
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
// If baseDir doesn't exist, return empty array
|
|
1530
|
-
if (baseError.code === 'ENOENT') {
|
|
1531
|
-
return [];
|
|
1690
|
+
catch (error) {
|
|
1691
|
+
// Skip inaccessible directories
|
|
1532
1692
|
}
|
|
1533
|
-
throw baseError;
|
|
1534
1693
|
}
|
|
1535
1694
|
break;
|
|
1536
1695
|
case 2:
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
|
|
1540
|
-
const
|
|
1541
|
-
|
|
1542
|
-
const
|
|
1543
|
-
|
|
1544
|
-
const
|
|
1545
|
-
|
|
1546
|
-
const
|
|
1547
|
-
|
|
1548
|
-
const
|
|
1549
|
-
|
|
1550
|
-
const
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
if (file.endsWith('.json')) {
|
|
1555
|
-
allFiles.push(file);
|
|
1556
|
-
}
|
|
1696
|
+
// Deep: baseDir/ab/cd/uuid.json
|
|
1697
|
+
const level1Dirs = await fs.promises.readdir(baseDir);
|
|
1698
|
+
for (const level1 of level1Dirs) {
|
|
1699
|
+
const level1Path = path.join(baseDir, level1);
|
|
1700
|
+
try {
|
|
1701
|
+
const level1Stat = await fs.promises.stat(level1Path);
|
|
1702
|
+
if (level1Stat.isDirectory()) {
|
|
1703
|
+
const level2Dirs = await fs.promises.readdir(level1Path);
|
|
1704
|
+
for (const level2 of level2Dirs) {
|
|
1705
|
+
const level2Path = path.join(level1Path, level2);
|
|
1706
|
+
try {
|
|
1707
|
+
const level2Stat = await fs.promises.stat(level2Path);
|
|
1708
|
+
if (level2Stat.isDirectory()) {
|
|
1709
|
+
const files = await fs.promises.readdir(level2Path);
|
|
1710
|
+
for (const file of files) {
|
|
1711
|
+
if (file.endsWith('.json')) {
|
|
1712
|
+
allFiles.push(file);
|
|
1557
1713
|
}
|
|
1558
1714
|
}
|
|
1559
1715
|
}
|
|
1560
|
-
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
}
|
|
1716
|
+
}
|
|
1717
|
+
catch (error) {
|
|
1718
|
+
// Skip inaccessible directories
|
|
1564
1719
|
}
|
|
1565
1720
|
}
|
|
1566
1721
|
}
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
|
|
1722
|
+
}
|
|
1723
|
+
catch (error) {
|
|
1724
|
+
// Skip inaccessible directories
|
|
1725
|
+
}
|
|
1726
|
+
}
|
|
1727
|
+
break;
|
|
1728
|
+
}
|
|
1729
|
+
}
|
|
1730
|
+
catch (error) {
|
|
1731
|
+
// Directory doesn't exist or not accessible
|
|
1732
|
+
}
|
|
1733
|
+
return allFiles;
|
|
1734
|
+
}
|
|
1735
|
+
/**
|
|
1736
|
+
* Create all 256 shard directories (00-ff)
|
|
1737
|
+
*/
|
|
1738
|
+
async createAllShardDirectories(baseDir) {
|
|
1739
|
+
for (let i = 0; i < this.MAX_SHARDS; i++) {
|
|
1740
|
+
const shard = i.toString(16).padStart(2, '0');
|
|
1741
|
+
const shardDir = path.join(baseDir, shard);
|
|
1742
|
+
await this.ensureDirectoryExists(shardDir);
|
|
1743
|
+
}
|
|
1744
|
+
}
|
|
1745
|
+
/**
|
|
1746
|
+
* Migrate a single file atomically
|
|
1747
|
+
*/
|
|
1748
|
+
async migrateFile(fileInfo, fromDepth, toDepth) {
|
|
1749
|
+
const baseDir = fileInfo.type === 'noun' ? this.nounsDir : this.verbsDir;
|
|
1750
|
+
// Calculate old path (already known)
|
|
1751
|
+
const oldPath = fileInfo.oldPath;
|
|
1752
|
+
// Calculate new path using target depth
|
|
1753
|
+
const shard = fileInfo.id.substring(0, 2).toLowerCase();
|
|
1754
|
+
const newPath = path.join(baseDir, shard, `${fileInfo.id}.json`);
|
|
1755
|
+
// Check if file already exists at new location
|
|
1756
|
+
if (await this.fileExists(newPath)) {
|
|
1757
|
+
// File already migrated or duplicate - skip
|
|
1758
|
+
return;
|
|
1759
|
+
}
|
|
1760
|
+
// Atomic rename/move
|
|
1761
|
+
await fs.promises.rename(oldPath, newPath);
|
|
1762
|
+
}
|
|
1763
|
+
/**
|
|
1764
|
+
* Clean up empty directories after migration
|
|
1765
|
+
*/
|
|
1766
|
+
async cleanupEmptyDirectories(baseDir, depth) {
|
|
1767
|
+
try {
|
|
1768
|
+
if (depth === 2) {
|
|
1769
|
+
// Clean up level2 and level1 directories
|
|
1770
|
+
const level1Dirs = await fs.promises.readdir(baseDir);
|
|
1771
|
+
for (const level1 of level1Dirs) {
|
|
1772
|
+
const level1Path = path.join(baseDir, level1);
|
|
1773
|
+
try {
|
|
1774
|
+
const level1Stat = await fs.promises.stat(level1Path);
|
|
1775
|
+
if (level1Stat.isDirectory()) {
|
|
1776
|
+
const level2Dirs = await fs.promises.readdir(level1Path);
|
|
1777
|
+
for (const level2 of level2Dirs) {
|
|
1778
|
+
const level2Path = path.join(level1Path, level2);
|
|
1779
|
+
try {
|
|
1780
|
+
// Try to remove level2 directory (will fail if not empty)
|
|
1781
|
+
await fs.promises.rmdir(level2Path);
|
|
1782
|
+
}
|
|
1783
|
+
catch (error) {
|
|
1784
|
+
// Directory not empty or other error - ignore
|
|
1785
|
+
}
|
|
1570
1786
|
}
|
|
1787
|
+
// Try to remove level1 directory
|
|
1788
|
+
await fs.promises.rmdir(level1Path);
|
|
1571
1789
|
}
|
|
1572
1790
|
}
|
|
1573
|
-
catch (
|
|
1574
|
-
//
|
|
1575
|
-
|
|
1576
|
-
|
|
1791
|
+
catch (error) {
|
|
1792
|
+
// Directory not empty or other error - ignore
|
|
1793
|
+
}
|
|
1794
|
+
}
|
|
1795
|
+
}
|
|
1796
|
+
}
|
|
1797
|
+
catch (error) {
|
|
1798
|
+
// Cleanup is best-effort, don't throw
|
|
1799
|
+
}
|
|
1800
|
+
}
|
|
1801
|
+
/**
|
|
1802
|
+
* Count files in the current structure
|
|
1803
|
+
*/
|
|
1804
|
+
async countFilesInStructure(depth) {
|
|
1805
|
+
let count = 0;
|
|
1806
|
+
count += (await this.getAllFilesAtDepth(this.nounsDir, depth)).length;
|
|
1807
|
+
count += (await this.getAllFilesAtDepth(this.verbsDir, depth)).length;
|
|
1808
|
+
return count;
|
|
1809
|
+
}
|
|
1810
|
+
/**
|
|
1811
|
+
* Detect the actual sharding depth used by existing files
|
|
1812
|
+
* Examines directory structure to determine current sharding strategy
|
|
1813
|
+
* Returns null if no files exist yet (new installation)
|
|
1814
|
+
*/
|
|
1815
|
+
async detectExistingShardingDepth() {
|
|
1816
|
+
try {
|
|
1817
|
+
// Check if nouns directory exists and has content
|
|
1818
|
+
const dirExists = await this.directoryExists(this.nounsDir);
|
|
1819
|
+
if (!dirExists) {
|
|
1820
|
+
return null; // New installation
|
|
1821
|
+
}
|
|
1822
|
+
const entries = await fs.promises.readdir(this.nounsDir, { withFileTypes: true });
|
|
1823
|
+
// Check if there are any .json files directly in nounsDir (flat structure)
|
|
1824
|
+
const hasDirectJsonFiles = entries.some((e) => e.isFile() && e.name.endsWith('.json'));
|
|
1825
|
+
if (hasDirectJsonFiles) {
|
|
1826
|
+
return 0; // Flat structure: nouns/uuid.json
|
|
1827
|
+
}
|
|
1828
|
+
// Check for subdirectories with hex names (sharding directories)
|
|
1829
|
+
const subdirs = entries.filter((e) => e.isDirectory() && /^[0-9a-f]{2}$/i.test(e.name));
|
|
1830
|
+
if (subdirs.length === 0) {
|
|
1831
|
+
return null; // No files yet
|
|
1832
|
+
}
|
|
1833
|
+
// Check first subdir to see if it has files or more subdirs
|
|
1834
|
+
const firstSubdir = subdirs[0].name;
|
|
1835
|
+
const subdirPath = path.join(this.nounsDir, firstSubdir);
|
|
1836
|
+
const subdirEntries = await fs.promises.readdir(subdirPath, { withFileTypes: true });
|
|
1837
|
+
const hasJsonFiles = subdirEntries.some((e) => e.isFile() && e.name.endsWith('.json'));
|
|
1838
|
+
if (hasJsonFiles) {
|
|
1839
|
+
return 1; // Single-level sharding: nouns/ab/uuid.json
|
|
1840
|
+
}
|
|
1841
|
+
const hasSubSubdirs = subdirEntries.some((e) => e.isDirectory() && /^[0-9a-f]{2}$/i.test(e.name));
|
|
1842
|
+
if (hasSubSubdirs) {
|
|
1843
|
+
return 2; // Deep sharding: nouns/ab/cd/uuid.json
|
|
1844
|
+
}
|
|
1845
|
+
return 1; // Default to single-level if structure is unclear
|
|
1846
|
+
}
|
|
1847
|
+
catch (error) {
|
|
1848
|
+
// If we can't read the directory, assume new installation
|
|
1849
|
+
return null;
|
|
1850
|
+
}
|
|
1851
|
+
}
|
|
1852
|
+
/**
|
|
1853
|
+
* Get sharding depth
|
|
1854
|
+
* Always returns 1 (single-level sharding) for optimal balance of
|
|
1855
|
+
* simplicity, performance, and reliability across all dataset sizes
|
|
1856
|
+
*
|
|
1857
|
+
* Single-level sharding (depth=1):
|
|
1858
|
+
* - 256 shard directories (00-ff)
|
|
1859
|
+
* - Handles 2.5M+ entities with excellent performance
|
|
1860
|
+
* - No dynamic depth changes = no path mismatch bugs
|
|
1861
|
+
* - Industry standard approach (Git uses similar)
|
|
1862
|
+
*/
|
|
1863
|
+
getOptimalShardingDepth() {
|
|
1864
|
+
return this.SHARDING_DEPTH;
|
|
1865
|
+
}
|
|
1866
|
+
/**
|
|
1867
|
+
* Get the path for a node with consistent sharding strategy
|
|
1868
|
+
* Clean, predictable path generation
|
|
1869
|
+
*/
|
|
1870
|
+
getNodePath(id) {
|
|
1871
|
+
return this.getShardedPath(this.nounsDir, id);
|
|
1872
|
+
}
|
|
1873
|
+
/**
|
|
1874
|
+
* Get the path for a verb with consistent sharding strategy
|
|
1875
|
+
*/
|
|
1876
|
+
getVerbPath(id) {
|
|
1877
|
+
return this.getShardedPath(this.verbsDir, id);
|
|
1878
|
+
}
|
|
1879
|
+
/**
|
|
1880
|
+
* Universal sharded path generator
|
|
1881
|
+
* Always uses depth=1 (single-level sharding) for consistency
|
|
1882
|
+
*
|
|
1883
|
+
* Format: baseDir/ab/uuid.json
|
|
1884
|
+
* Where 'ab' = first 2 hex characters of UUID (lowercase)
|
|
1885
|
+
*
|
|
1886
|
+
* Validates UUID format and throws descriptive errors
|
|
1887
|
+
*/
|
|
1888
|
+
getShardedPath(baseDir, id) {
|
|
1889
|
+
// Extract first 2 characters for shard directory
|
|
1890
|
+
const shard = id.substring(0, 2).toLowerCase();
|
|
1891
|
+
// Validate shard is valid hex (00-ff)
|
|
1892
|
+
if (!/^[0-9a-f]{2}$/.test(shard)) {
|
|
1893
|
+
throw new Error(`Invalid entity ID format: ${id}. ` +
|
|
1894
|
+
`Expected UUID starting with 2 hex characters, got '${shard}'. ` +
|
|
1895
|
+
`IDs must be UUIDs or hex strings.`);
|
|
1896
|
+
}
|
|
1897
|
+
// Single-level sharding: baseDir/ab/uuid.json
|
|
1898
|
+
return path.join(baseDir, shard, `${id}.json`);
|
|
1899
|
+
}
|
|
1900
|
+
/**
|
|
1901
|
+
* Get all JSON files from the single-level sharded directory structure
|
|
1902
|
+
* Traverses all shard subdirectories (00-ff)
|
|
1903
|
+
*/
|
|
1904
|
+
async getAllShardedFiles(baseDir) {
|
|
1905
|
+
const allFiles = [];
|
|
1906
|
+
try {
|
|
1907
|
+
const shardDirs = await fs.promises.readdir(baseDir);
|
|
1908
|
+
for (const shardDir of shardDirs) {
|
|
1909
|
+
const shardPath = path.join(baseDir, shardDir);
|
|
1910
|
+
try {
|
|
1911
|
+
const stat = await fs.promises.stat(shardPath);
|
|
1912
|
+
if (stat.isDirectory()) {
|
|
1913
|
+
const shardFiles = await fs.promises.readdir(shardPath);
|
|
1914
|
+
for (const file of shardFiles) {
|
|
1915
|
+
if (file.endsWith('.json')) {
|
|
1916
|
+
allFiles.push(file);
|
|
1917
|
+
}
|
|
1577
1918
|
}
|
|
1578
|
-
throw baseError;
|
|
1579
1919
|
}
|
|
1580
|
-
|
|
1920
|
+
}
|
|
1921
|
+
catch (shardError) {
|
|
1922
|
+
// Skip inaccessible shard directories
|
|
1923
|
+
continue;
|
|
1924
|
+
}
|
|
1581
1925
|
}
|
|
1582
1926
|
// Sort for consistent ordering
|
|
1583
1927
|
allFiles.sort();
|
|
@@ -1702,109 +2046,45 @@ export class FileSystemStorage extends BaseStorage {
|
|
|
1702
2046
|
* Stream through sharded files without loading all names into memory
|
|
1703
2047
|
* Production-scale implementation for millions of files
|
|
1704
2048
|
*/
|
|
2049
|
+
/**
|
|
2050
|
+
* Stream through files in single-level sharded structure
|
|
2051
|
+
* Calls processor for each file until processor returns false
|
|
2052
|
+
* Returns true if more files exist (processor stopped early), false if all processed
|
|
2053
|
+
*/
|
|
1705
2054
|
async streamShardedFiles(baseDir, depth, processor) {
|
|
1706
2055
|
let hasMore = true;
|
|
1707
|
-
|
|
1708
|
-
|
|
1709
|
-
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
|
-
|
|
1713
|
-
for (const file of sortedFiles) {
|
|
1714
|
-
const shouldContinue = await processor(file, path.join(baseDir, file));
|
|
1715
|
-
if (!shouldContinue) {
|
|
1716
|
-
hasMore = false;
|
|
1717
|
-
break;
|
|
1718
|
-
}
|
|
1719
|
-
}
|
|
1720
|
-
}
|
|
1721
|
-
catch (error) {
|
|
1722
|
-
if (error.code === 'ENOENT')
|
|
1723
|
-
hasMore = false;
|
|
1724
|
-
}
|
|
1725
|
-
break;
|
|
1726
|
-
case 1:
|
|
1727
|
-
// Single-level sharding: ab/
|
|
1728
|
-
try {
|
|
1729
|
-
const shardDirs = await fs.promises.readdir(baseDir);
|
|
1730
|
-
const sortedShardDirs = shardDirs.sort();
|
|
1731
|
-
for (const shardDir of sortedShardDirs) {
|
|
1732
|
-
const shardPath = path.join(baseDir, shardDir);
|
|
1733
|
-
try {
|
|
1734
|
-
const stat = await fs.promises.stat(shardPath);
|
|
1735
|
-
if (stat.isDirectory()) {
|
|
1736
|
-
const files = await fs.promises.readdir(shardPath);
|
|
1737
|
-
const sortedFiles = files.filter((f) => f.endsWith('.json')).sort();
|
|
1738
|
-
for (const file of sortedFiles) {
|
|
1739
|
-
const shouldContinue = await processor(file, path.join(shardPath, file));
|
|
1740
|
-
if (!shouldContinue) {
|
|
1741
|
-
hasMore = false;
|
|
1742
|
-
break;
|
|
1743
|
-
}
|
|
1744
|
-
}
|
|
1745
|
-
if (!hasMore)
|
|
1746
|
-
break;
|
|
1747
|
-
}
|
|
1748
|
-
}
|
|
1749
|
-
catch (shardError) {
|
|
1750
|
-
continue; // Skip inaccessible shard directories
|
|
1751
|
-
}
|
|
1752
|
-
}
|
|
1753
|
-
}
|
|
1754
|
-
catch (error) {
|
|
1755
|
-
if (error.code === 'ENOENT')
|
|
1756
|
-
hasMore = false;
|
|
1757
|
-
}
|
|
1758
|
-
break;
|
|
1759
|
-
case 2:
|
|
1760
|
-
default:
|
|
1761
|
-
// Deep sharding: ab/cd/
|
|
2056
|
+
// Single-level sharding (depth=1): baseDir/ab/uuid.json
|
|
2057
|
+
try {
|
|
2058
|
+
const shardDirs = await fs.promises.readdir(baseDir);
|
|
2059
|
+
const sortedShardDirs = shardDirs.sort();
|
|
2060
|
+
for (const shardDir of sortedShardDirs) {
|
|
2061
|
+
const shardPath = path.join(baseDir, shardDir);
|
|
1762
2062
|
try {
|
|
1763
|
-
const
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
const
|
|
1767
|
-
|
|
1768
|
-
const
|
|
1769
|
-
if (
|
|
1770
|
-
|
|
1771
|
-
|
|
1772
|
-
for (const level2Dir of sortedLevel2Dirs) {
|
|
1773
|
-
const level2Path = path.join(level1Path, level2Dir);
|
|
1774
|
-
try {
|
|
1775
|
-
const level2Stat = await fs.promises.stat(level2Path);
|
|
1776
|
-
if (level2Stat.isDirectory()) {
|
|
1777
|
-
const files = await fs.promises.readdir(level2Path);
|
|
1778
|
-
const sortedFiles = files.filter((f) => f.endsWith('.json')).sort();
|
|
1779
|
-
for (const file of sortedFiles) {
|
|
1780
|
-
const shouldContinue = await processor(file, path.join(level2Path, file));
|
|
1781
|
-
if (!shouldContinue) {
|
|
1782
|
-
hasMore = false;
|
|
1783
|
-
break;
|
|
1784
|
-
}
|
|
1785
|
-
}
|
|
1786
|
-
if (!hasMore)
|
|
1787
|
-
break;
|
|
1788
|
-
}
|
|
1789
|
-
}
|
|
1790
|
-
catch (level2Error) {
|
|
1791
|
-
continue; // Skip inaccessible level2 directories
|
|
1792
|
-
}
|
|
1793
|
-
}
|
|
1794
|
-
if (!hasMore)
|
|
1795
|
-
break;
|
|
2063
|
+
const stat = await fs.promises.stat(shardPath);
|
|
2064
|
+
if (stat.isDirectory()) {
|
|
2065
|
+
const files = await fs.promises.readdir(shardPath);
|
|
2066
|
+
const sortedFiles = files.filter((f) => f.endsWith('.json')).sort();
|
|
2067
|
+
for (const file of sortedFiles) {
|
|
2068
|
+
const shouldContinue = await processor(file, path.join(shardPath, file));
|
|
2069
|
+
if (!shouldContinue) {
|
|
2070
|
+
hasMore = false;
|
|
2071
|
+
break;
|
|
1796
2072
|
}
|
|
1797
2073
|
}
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
}
|
|
2074
|
+
if (!hasMore)
|
|
2075
|
+
break;
|
|
1801
2076
|
}
|
|
1802
2077
|
}
|
|
1803
|
-
catch (
|
|
1804
|
-
|
|
1805
|
-
|
|
2078
|
+
catch (shardError) {
|
|
2079
|
+
// Skip inaccessible shard directories
|
|
2080
|
+
continue;
|
|
1806
2081
|
}
|
|
1807
|
-
|
|
2082
|
+
}
|
|
2083
|
+
}
|
|
2084
|
+
catch (error) {
|
|
2085
|
+
if (error.code === 'ENOENT') {
|
|
2086
|
+
hasMore = false;
|
|
2087
|
+
}
|
|
1808
2088
|
}
|
|
1809
2089
|
return hasMore;
|
|
1810
2090
|
}
|
|
@@ -518,7 +518,7 @@ export class OPFSStorage extends BaseStorage {
|
|
|
518
518
|
const batch = ids.slice(i, i + batchSize);
|
|
519
519
|
const batchPromises = batch.map(async (id) => {
|
|
520
520
|
try {
|
|
521
|
-
const metadata = await this.
|
|
521
|
+
const metadata = await this.getNounMetadata(id);
|
|
522
522
|
return { id, metadata };
|
|
523
523
|
}
|
|
524
524
|
catch (error) {
|
|
@@ -1356,7 +1356,7 @@ export class MetadataIndexManager {
|
|
|
1356
1356
|
const batch = nounIds.slice(i, i + CONCURRENCY_LIMIT);
|
|
1357
1357
|
const batchPromises = batch.map(async (id) => {
|
|
1358
1358
|
try {
|
|
1359
|
-
const metadata = await this.storage.
|
|
1359
|
+
const metadata = await this.storage.getNounMetadata(id);
|
|
1360
1360
|
return { id, metadata };
|
|
1361
1361
|
}
|
|
1362
1362
|
catch (error) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.24.0",
|
|
4
4
|
"description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|