@soulcraft/brainy 3.23.1 → 3.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,11 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
4
4
 
5
+ ### [3.24.0](https://github.com/soulcraftlabs/brainy/compare/v3.23.1...v3.24.0) (2025-10-07)
6
+
7
+ - feat: simplify sharding to fixed depth-1 for reliability and performance (87515b9)
8
+
9
+
5
10
  ### [3.23.0](https://github.com/soulcraftlabs/brainy/compare/v3.22.0...v3.23.0) (2025-10-04)
6
11
 
7
12
  - refactor: streamline core API surface
@@ -12,9 +12,9 @@ type Edge = HNSWVerb;
12
12
  */
13
13
  export declare class FileSystemStorage extends BaseStorage {
14
14
  private countsFilePath?;
15
- private readonly shardingDepth;
16
- private readonly SHARDING_THRESHOLD;
17
- private cachedShardingDepth?;
15
+ private readonly SHARDING_DEPTH;
16
+ private readonly MAX_SHARDS;
17
+ private cachedShardingDepth;
18
18
  private rootDir;
19
19
  private nounsDir;
20
20
  private verbsDir;
@@ -270,8 +270,55 @@ export declare class FileSystemStorage extends BaseStorage {
270
270
  */
271
271
  protected persistCounts(): Promise<void>;
272
272
  /**
273
- * Determine optimal sharding depth based on dataset size
274
- * This is called once during initialization for consistent behavior
273
+ * Migrate files from one sharding depth to another
274
+ * Handles: 0→1 (flat to single-level), 2→1 (deep to single-level)
275
+ * Uses atomic file operations and comprehensive error handling
276
+ *
277
+ * @param fromDepth - Source sharding depth
278
+ * @param toDepth - Target sharding depth (must be 1)
279
+ */
280
+ private migrateShardingStructure;
281
+ /**
282
+ * Discover all files that need to be migrated
283
+ * Constructs correct oldPath based on source depth
284
+ */
285
+ private discoverFilesForMigration;
286
+ /**
287
+ * Get all files at a specific depth
288
+ */
289
+ private getAllFilesAtDepth;
290
+ /**
291
+ * Create all 256 shard directories (00-ff)
292
+ */
293
+ private createAllShardDirectories;
294
+ /**
295
+ * Migrate a single file atomically
296
+ */
297
+ private migrateFile;
298
+ /**
299
+ * Clean up empty directories after migration
300
+ */
301
+ private cleanupEmptyDirectories;
302
+ /**
303
+ * Count files in the current structure
304
+ */
305
+ private countFilesInStructure;
306
+ /**
307
+ * Detect the actual sharding depth used by existing files
308
+ * Examines directory structure to determine current sharding strategy
309
+ * Returns null if no files exist yet (new installation)
310
+ */
311
+ private detectExistingShardingDepth;
312
+ /**
313
+ * Get sharding depth
314
+ * Always returns 1 (single-level sharding) for optimal balance of
315
+ * simplicity, performance, and reliability across all dataset sizes
316
+ *
317
+ * Single-level sharding (depth=1):
318
+ * - 256 shard directories (00-ff)
319
+ * - Handles 2.5M+ entities with excellent performance
320
+ * - No dynamic depth changes = no path mismatch bugs
321
+ * - Industry standard approach (Git uses similar)
275
322
  */
276
323
  private getOptimalShardingDepth;
277
324
  /**
@@ -285,12 +332,17 @@ export declare class FileSystemStorage extends BaseStorage {
285
332
  private getVerbPath;
286
333
  /**
287
334
  * Universal sharded path generator
288
- * Consistent across all entity types
335
+ * Always uses depth=1 (single-level sharding) for consistency
336
+ *
337
+ * Format: baseDir/ab/uuid.json
338
+ * Where 'ab' = first 2 hex characters of UUID (lowercase)
339
+ *
340
+ * Validates UUID format and throws descriptive errors
289
341
  */
290
342
  private getShardedPath;
291
343
  /**
292
- * Get all JSON files from a sharded directory structure
293
- * Properly traverses sharded subdirectories based on current sharding depth
344
+ * Get all JSON files from the single-level sharded directory structure
345
+ * Traverses all shard subdirectories (00-ff)
294
346
  */
295
347
  private getAllShardedFiles;
296
348
  /**
@@ -302,6 +354,11 @@ export declare class FileSystemStorage extends BaseStorage {
302
354
  * Stream through sharded files without loading all names into memory
303
355
  * Production-scale implementation for millions of files
304
356
  */
357
+ /**
358
+ * Stream through files in single-level sharded structure
359
+ * Calls processor for each file until processor returns false
360
+ * Returns true if more files exist (processor stopped early), false if all processed
361
+ */
305
362
  private streamShardedFiles;
306
363
  /**
307
364
  * Check if a file exists (handles both sharded and non-sharded)
@@ -37,9 +37,15 @@ export class FileSystemStorage extends BaseStorage {
37
37
  */
38
38
  constructor(rootDirectory) {
39
39
  super();
40
- // Intelligent sharding configuration
41
- this.shardingDepth = 2; // 0=flat, 1=ab/, 2=ab/cd/
42
- this.SHARDING_THRESHOLD = 100; // Enable deep sharding at 100 files for optimal performance
40
+ // Fixed sharding configuration for optimal balance of simplicity and performance
41
+ // Single-level sharding (depth=1) provides excellent performance for 1-2.5M entities
42
+ // Structure: nouns/ab/uuid.json where 'ab' = first 2 hex chars of UUID
43
+ // - 256 shard directories (00-ff)
44
+ // - Handles 2.5M+ entities with < 10K files per shard
45
+ // - Eliminates dynamic depth changes that cause path mismatch bugs
46
+ this.SHARDING_DEPTH = 1;
47
+ this.MAX_SHARDS = 256; // Hex range: 00-ff
48
+ this.cachedShardingDepth = this.SHARDING_DEPTH; // Always use fixed depth
43
49
  this.useDualWrite = true; // Write to both locations during migration
44
50
  this.activeLocks = new Set();
45
51
  this.lockTimers = new Map(); // Track timers for cleanup
@@ -100,11 +106,27 @@ export class FileSystemStorage extends BaseStorage {
100
106
  // Initialize count management
101
107
  this.countsFilePath = path.join(this.systemDir, 'counts.json');
102
108
  await this.initializeCounts();
103
- // Cache sharding depth for consistency during this session
104
- this.cachedShardingDepth = this.getOptimalShardingDepth();
105
- // Log sharding strategy for transparency
106
- const strategy = this.cachedShardingDepth === 0 ? 'flat' : this.cachedShardingDepth === 1 ? 'single-level' : 'deep';
107
- console.log(`📁 Using ${strategy} sharding for optimal performance (${this.totalNounCount} items)`);
109
+ // Detect existing sharding structure and migrate if needed
110
+ const detectedDepth = await this.detectExistingShardingDepth();
111
+ if (detectedDepth !== null && detectedDepth !== this.SHARDING_DEPTH) {
112
+ // Migration needed: existing structure doesn't match our fixed depth
113
+ console.log(`📦 Brainy Storage Migration`);
114
+ console.log(` Current structure: depth ${detectedDepth}`);
115
+ console.log(` Target structure: depth ${this.SHARDING_DEPTH}`);
116
+ console.log(` Entities to migrate: ${this.totalNounCount}`);
117
+ await this.migrateShardingStructure(detectedDepth, this.SHARDING_DEPTH);
118
+ console.log(`✅ Migration complete - now using depth ${this.SHARDING_DEPTH} sharding`);
119
+ }
120
+ else if (detectedDepth === null) {
121
+ // New installation
122
+ console.log(`📁 New installation: using depth ${this.SHARDING_DEPTH} sharding (optimal for 1-2.5M entities)`);
123
+ }
124
+ else {
125
+ // Already using correct depth
126
+ console.log(`📁 Using depth ${this.SHARDING_DEPTH} sharding (${this.totalNounCount} entities)`);
127
+ }
128
+ // Always use fixed depth after migration/detection
129
+ this.cachedShardingDepth = this.SHARDING_DEPTH;
108
130
  this.isInitialized = true;
109
131
  }
110
132
  catch (error) {
@@ -1372,11 +1394,15 @@ export class FileSystemStorage extends BaseStorage {
1372
1394
  */
1373
1395
  async initializeCountsFromDisk() {
1374
1396
  try {
1375
- // Count nouns (handles sharding properly)
1376
- const validNounFiles = await this.getAllShardedFiles(this.nounsDir);
1397
+ // CRITICAL: Detect existing depth before counting
1398
+ // Can't use getAllShardedFiles() which assumes depth=1
1399
+ const existingDepth = await this.detectExistingShardingDepth();
1400
+ const depthToUse = existingDepth !== null ? existingDepth : this.SHARDING_DEPTH;
1401
+ // Count nouns using detected depth
1402
+ const validNounFiles = await this.getAllFilesAtDepth(this.nounsDir, depthToUse);
1377
1403
  this.totalNounCount = validNounFiles.length;
1378
- // Count verbs (handles sharding properly)
1379
- const validVerbFiles = await this.getAllShardedFiles(this.verbsDir);
1404
+ // Count verbs using detected depth
1405
+ const validVerbFiles = await this.getAllFilesAtDepth(this.verbsDir, depthToUse);
1380
1406
  this.totalVerbCount = validVerbFiles.length;
1381
1407
  // Sample some files to get type distribution (don't read all)
1382
1408
  const sampleSize = Math.min(100, validNounFiles.length);
@@ -1384,7 +1410,22 @@ export class FileSystemStorage extends BaseStorage {
1384
1410
  try {
1385
1411
  const file = validNounFiles[i];
1386
1412
  const id = file.replace('.json', '');
1387
- const data = await fs.promises.readFile(this.getNodePath(id), 'utf-8');
1413
+ // Construct path using detected depth (not cached depth which may be wrong)
1414
+ let filePath;
1415
+ switch (depthToUse) {
1416
+ case 0:
1417
+ filePath = path.join(this.nounsDir, `${id}.json`);
1418
+ break;
1419
+ case 1:
1420
+ filePath = path.join(this.nounsDir, id.substring(0, 2), `${id}.json`);
1421
+ break;
1422
+ case 2:
1423
+ filePath = path.join(this.nounsDir, id.substring(0, 2), id.substring(2, 4), `${id}.json`);
1424
+ break;
1425
+ default:
1426
+ throw new Error(`Unsupported depth: ${depthToUse}`);
1427
+ }
1428
+ const data = await fs.promises.readFile(filePath, 'utf-8');
1388
1429
  const noun = JSON.parse(data);
1389
1430
  const type = noun.metadata?.type || noun.metadata?.nounType || 'default';
1390
1431
  this.entityCounts.set(type, (this.entityCounts.get(type) || 0) + 1);
@@ -1430,154 +1471,457 @@ export class FileSystemStorage extends BaseStorage {
1430
1471
  // Intelligent Directory Sharding
1431
1472
  // =============================================
1432
1473
  /**
1433
- * Determine optimal sharding depth based on dataset size
1434
- * This is called once during initialization for consistent behavior
1474
+ * Migrate files from one sharding depth to another
1475
+ * Handles: 0→1 (flat to single-level), 2→1 (deep to single-level)
1476
+ * Uses atomic file operations and comprehensive error handling
1477
+ *
1478
+ * @param fromDepth - Source sharding depth
1479
+ * @param toDepth - Target sharding depth (must be 1)
1435
1480
  */
1436
- getOptimalShardingDepth() {
1437
- // For new installations, use intelligent defaults
1438
- if (this.totalNounCount === 0 && this.totalVerbCount === 0) {
1439
- return 1; // Default to single-level sharding for new installs
1440
- }
1441
- const maxCount = Math.max(this.totalNounCount, this.totalVerbCount);
1442
- if (maxCount >= this.SHARDING_THRESHOLD) {
1443
- return 2; // Deep sharding for large datasets
1481
+ async migrateShardingStructure(fromDepth, toDepth) {
1482
+ // Validation
1483
+ if (fromDepth === toDepth) {
1484
+ throw new Error(`Migration not needed: already at depth ${toDepth}`);
1485
+ }
1486
+ if (toDepth !== 1) {
1487
+ throw new Error(`Migration only supports target depth 1 (got ${toDepth})`);
1488
+ }
1489
+ if (fromDepth !== 0 && fromDepth !== 2) {
1490
+ throw new Error(`Migration only supports source depth 0 or 2 (got ${fromDepth})`);
1491
+ }
1492
+ // Create migration lock to prevent concurrent migrations
1493
+ const lockFile = path.join(this.systemDir, '.migration-lock');
1494
+ const lockExists = await this.fileExists(lockFile);
1495
+ if (lockExists) {
1496
+ // Check if lock is stale (> 1 hour old)
1497
+ try {
1498
+ const stats = await fs.promises.stat(lockFile);
1499
+ const lockAge = Date.now() - stats.mtimeMs;
1500
+ const ONE_HOUR = 60 * 60 * 1000;
1501
+ if (lockAge < ONE_HOUR) {
1502
+ throw new Error('Migration already in progress. If this is incorrect, delete .migration-lock file.');
1503
+ }
1504
+ // Lock is stale, remove it
1505
+ console.log('⚠️ Removing stale migration lock (> 1 hour old)');
1506
+ await fs.promises.unlink(lockFile);
1507
+ }
1508
+ catch (error) {
1509
+ if (error.code !== 'ENOENT') {
1510
+ throw error;
1511
+ }
1512
+ }
1444
1513
  }
1445
- else if (maxCount >= 100) {
1446
- return 1; // Single-level sharding for medium datasets
1514
+ try {
1515
+ // Create lock file
1516
+ await fs.promises.writeFile(lockFile, JSON.stringify({
1517
+ startedAt: new Date().toISOString(),
1518
+ fromDepth,
1519
+ toDepth,
1520
+ pid: process.pid
1521
+ }));
1522
+ // Discover all files to migrate
1523
+ console.log('📊 Discovering files to migrate...');
1524
+ const filesToMigrate = await this.discoverFilesForMigration(fromDepth);
1525
+ if (filesToMigrate.length === 0) {
1526
+ console.log('ℹ️ No files to migrate');
1527
+ return;
1528
+ }
1529
+ console.log(`📦 Migrating ${filesToMigrate.length} files...`);
1530
+ // Create all target shard directories upfront
1531
+ await this.createAllShardDirectories(this.nounsDir);
1532
+ await this.createAllShardDirectories(this.verbsDir);
1533
+ // Migrate files with progress tracking
1534
+ let migratedCount = 0;
1535
+ let skippedCount = 0;
1536
+ const errors = [];
1537
+ for (const fileInfo of filesToMigrate) {
1538
+ try {
1539
+ await this.migrateFile(fileInfo, fromDepth, toDepth);
1540
+ migratedCount++;
1541
+ // Progress update every 1000 files
1542
+ if (migratedCount % 1000 === 0) {
1543
+ const percent = ((migratedCount / filesToMigrate.length) * 100).toFixed(1);
1544
+ console.log(` 📊 Progress: ${migratedCount}/${filesToMigrate.length} (${percent}%)`);
1545
+ }
1546
+ // Yield to event loop every 100 files to prevent blocking
1547
+ if (migratedCount % 100 === 0) {
1548
+ await new Promise(resolve => setImmediate(resolve));
1549
+ }
1550
+ }
1551
+ catch (error) {
1552
+ skippedCount++;
1553
+ errors.push({
1554
+ file: fileInfo.oldPath,
1555
+ error: error.message
1556
+ });
1557
+ // Log first few errors
1558
+ if (errors.length <= 5) {
1559
+ console.warn(`⚠️ Skipped ${fileInfo.oldPath}: ${error.message}`);
1560
+ }
1561
+ }
1562
+ }
1563
+ // Final summary
1564
+ console.log(`\n✅ Migration Results:`);
1565
+ console.log(` Migrated: ${migratedCount} files`);
1566
+ console.log(` Skipped: ${skippedCount} files`);
1567
+ if (errors.length > 0) {
1568
+ console.warn(`\n⚠️ ${errors.length} files could not be migrated`);
1569
+ if (errors.length > 5) {
1570
+ console.warn(` (First 5 errors shown above, ${errors.length - 5} more occurred)`);
1571
+ }
1572
+ }
1573
+ // Cleanup: Remove empty old directories
1574
+ if (fromDepth === 0) {
1575
+ // No subdirectories to clean for flat structure
1576
+ }
1577
+ else if (fromDepth === 2) {
1578
+ await this.cleanupEmptyDirectories(this.nounsDir, fromDepth);
1579
+ await this.cleanupEmptyDirectories(this.verbsDir, fromDepth);
1580
+ }
1581
+ // Verification: Count files in new structure
1582
+ const verifyCount = await this.countFilesInStructure(toDepth);
1583
+ console.log(`\n🔍 Verification: ${verifyCount} files in new structure`);
1584
+ if (verifyCount < migratedCount) {
1585
+ console.warn(`⚠️ Warning: Verification count (${verifyCount}) < migrated count (${migratedCount})`);
1586
+ }
1447
1587
  }
1448
- else {
1449
- return 1; // Always use at least single-level sharding for consistency
1588
+ finally {
1589
+ // Always remove lock file
1590
+ try {
1591
+ await fs.promises.unlink(lockFile);
1592
+ }
1593
+ catch (error) {
1594
+ // Ignore error if lock file doesn't exist
1595
+ }
1450
1596
  }
1451
1597
  }
1452
1598
  /**
1453
- * Get the path for a node with consistent sharding strategy
1454
- * Clean, predictable path generation
1599
+ * Discover all files that need to be migrated
1600
+ * Constructs correct oldPath based on source depth
1455
1601
  */
1456
- getNodePath(id) {
1457
- return this.getShardedPath(this.nounsDir, id);
1458
- }
1459
- /**
1460
- * Get the path for a verb with consistent sharding strategy
1461
- */
1462
- getVerbPath(id) {
1463
- return this.getShardedPath(this.verbsDir, id);
1602
+ async discoverFilesForMigration(fromDepth) {
1603
+ const files = [];
1604
+ // Discover noun files
1605
+ const nounFiles = await this.getAllFilesAtDepth(this.nounsDir, fromDepth);
1606
+ for (const filename of nounFiles) {
1607
+ const id = filename.replace('.json', '');
1608
+ // Construct correct oldPath based on fromDepth
1609
+ let oldPath;
1610
+ switch (fromDepth) {
1611
+ case 0:
1612
+ // Flat: nouns/uuid.json
1613
+ oldPath = path.join(this.nounsDir, `${id}.json`);
1614
+ break;
1615
+ case 1:
1616
+ // Single-level: nouns/ab/uuid.json
1617
+ oldPath = path.join(this.nounsDir, id.substring(0, 2), `${id}.json`);
1618
+ break;
1619
+ case 2:
1620
+ // Deep: nouns/ab/cd/uuid.json
1621
+ oldPath = path.join(this.nounsDir, id.substring(0, 2), id.substring(2, 4), `${id}.json`);
1622
+ break;
1623
+ default:
1624
+ throw new Error(`Unsupported fromDepth: ${fromDepth}`);
1625
+ }
1626
+ files.push({ oldPath, id, type: 'noun' });
1627
+ }
1628
+ // Discover verb files
1629
+ const verbFiles = await this.getAllFilesAtDepth(this.verbsDir, fromDepth);
1630
+ for (const filename of verbFiles) {
1631
+ const id = filename.replace('.json', '');
1632
+ // Construct correct oldPath based on fromDepth
1633
+ let oldPath;
1634
+ switch (fromDepth) {
1635
+ case 0:
1636
+ // Flat: verbs/uuid.json
1637
+ oldPath = path.join(this.verbsDir, `${id}.json`);
1638
+ break;
1639
+ case 1:
1640
+ // Single-level: verbs/ab/uuid.json
1641
+ oldPath = path.join(this.verbsDir, id.substring(0, 2), `${id}.json`);
1642
+ break;
1643
+ case 2:
1644
+ // Deep: verbs/ab/cd/uuid.json
1645
+ oldPath = path.join(this.verbsDir, id.substring(0, 2), id.substring(2, 4), `${id}.json`);
1646
+ break;
1647
+ default:
1648
+ throw new Error(`Unsupported fromDepth: ${fromDepth}`);
1649
+ }
1650
+ files.push({ oldPath, id, type: 'verb' });
1651
+ }
1652
+ return files;
1464
1653
  }
1465
1654
  /**
1466
- * Universal sharded path generator
1467
- * Consistent across all entity types
1468
- */
1469
- getShardedPath(baseDir, id) {
1470
- const depth = this.cachedShardingDepth ?? this.getOptimalShardingDepth();
1471
- switch (depth) {
1472
- case 0:
1473
- // Flat structure: /nouns/uuid.json
1474
- return path.join(baseDir, `${id}.json`);
1475
- case 1:
1476
- // Single-level sharding: /nouns/ab/uuid.json
1477
- const shard1 = id.substring(0, 2);
1478
- return path.join(baseDir, shard1, `${id}.json`);
1479
- case 2:
1480
- default:
1481
- // Deep sharding: /nouns/ab/cd/uuid.json
1482
- const shard1Deep = id.substring(0, 2);
1483
- const shard2Deep = id.substring(2, 4);
1484
- return path.join(baseDir, shard1Deep, shard2Deep, `${id}.json`);
1485
- }
1486
- }
1487
- /**
1488
- * Get all JSON files from a sharded directory structure
1489
- * Properly traverses sharded subdirectories based on current sharding depth
1655
+ * Get all files at a specific depth
1490
1656
  */
1491
- async getAllShardedFiles(baseDir) {
1657
+ async getAllFilesAtDepth(baseDir, depth) {
1492
1658
  const allFiles = [];
1493
- const depth = this.cachedShardingDepth ?? this.getOptimalShardingDepth();
1494
1659
  try {
1660
+ const dirExists = await this.directoryExists(baseDir);
1661
+ if (!dirExists) {
1662
+ return [];
1663
+ }
1495
1664
  switch (depth) {
1496
1665
  case 0:
1497
- // Flat structure: read directly from baseDir
1498
- const flatFiles = await fs.promises.readdir(baseDir);
1499
- for (const file of flatFiles) {
1500
- if (file.endsWith('.json')) {
1501
- allFiles.push(file);
1666
+ // Flat: files directly in baseDir
1667
+ const entries = await fs.promises.readdir(baseDir);
1668
+ for (const entry of entries) {
1669
+ if (entry.endsWith('.json')) {
1670
+ allFiles.push(entry);
1502
1671
  }
1503
1672
  }
1504
1673
  break;
1505
1674
  case 1:
1506
- // Single-level sharding: baseDir/ab/
1507
- try {
1508
- const shardDirs = await fs.promises.readdir(baseDir);
1509
- for (const shardDir of shardDirs) {
1510
- const shardPath = path.join(baseDir, shardDir);
1511
- try {
1512
- const stat = await fs.promises.stat(shardPath);
1513
- if (stat.isDirectory()) {
1514
- const shardFiles = await fs.promises.readdir(shardPath);
1515
- for (const file of shardFiles) {
1516
- if (file.endsWith('.json')) {
1517
- allFiles.push(file);
1518
- }
1675
+ // Single-level: baseDir/ab/uuid.json
1676
+ const shardDirs = await fs.promises.readdir(baseDir);
1677
+ for (const shard of shardDirs) {
1678
+ const shardPath = path.join(baseDir, shard);
1679
+ try {
1680
+ const stat = await fs.promises.stat(shardPath);
1681
+ if (stat.isDirectory()) {
1682
+ const shardFiles = await fs.promises.readdir(shardPath);
1683
+ for (const file of shardFiles) {
1684
+ if (file.endsWith('.json')) {
1685
+ allFiles.push(file);
1519
1686
  }
1520
1687
  }
1521
1688
  }
1522
- catch (shardError) {
1523
- // Skip inaccessible shard directories
1524
- continue;
1525
- }
1526
1689
  }
1527
- }
1528
- catch (baseError) {
1529
- // If baseDir doesn't exist, return empty array
1530
- if (baseError.code === 'ENOENT') {
1531
- return [];
1690
+ catch (error) {
1691
+ // Skip inaccessible directories
1532
1692
  }
1533
- throw baseError;
1534
1693
  }
1535
1694
  break;
1536
1695
  case 2:
1537
- default:
1538
- // Deep sharding: baseDir/ab/cd/
1539
- try {
1540
- const level1Dirs = await fs.promises.readdir(baseDir);
1541
- for (const level1Dir of level1Dirs) {
1542
- const level1Path = path.join(baseDir, level1Dir);
1543
- try {
1544
- const level1Stat = await fs.promises.stat(level1Path);
1545
- if (level1Stat.isDirectory()) {
1546
- const level2Dirs = await fs.promises.readdir(level1Path);
1547
- for (const level2Dir of level2Dirs) {
1548
- const level2Path = path.join(level1Path, level2Dir);
1549
- try {
1550
- const level2Stat = await fs.promises.stat(level2Path);
1551
- if (level2Stat.isDirectory()) {
1552
- const shardFiles = await fs.promises.readdir(level2Path);
1553
- for (const file of shardFiles) {
1554
- if (file.endsWith('.json')) {
1555
- allFiles.push(file);
1556
- }
1696
+ // Deep: baseDir/ab/cd/uuid.json
1697
+ const level1Dirs = await fs.promises.readdir(baseDir);
1698
+ for (const level1 of level1Dirs) {
1699
+ const level1Path = path.join(baseDir, level1);
1700
+ try {
1701
+ const level1Stat = await fs.promises.stat(level1Path);
1702
+ if (level1Stat.isDirectory()) {
1703
+ const level2Dirs = await fs.promises.readdir(level1Path);
1704
+ for (const level2 of level2Dirs) {
1705
+ const level2Path = path.join(level1Path, level2);
1706
+ try {
1707
+ const level2Stat = await fs.promises.stat(level2Path);
1708
+ if (level2Stat.isDirectory()) {
1709
+ const files = await fs.promises.readdir(level2Path);
1710
+ for (const file of files) {
1711
+ if (file.endsWith('.json')) {
1712
+ allFiles.push(file);
1557
1713
  }
1558
1714
  }
1559
1715
  }
1560
- catch (level2Error) {
1561
- // Skip inaccessible level2 directories
1562
- continue;
1563
- }
1716
+ }
1717
+ catch (error) {
1718
+ // Skip inaccessible directories
1564
1719
  }
1565
1720
  }
1566
1721
  }
1567
- catch (level1Error) {
1568
- // Skip inaccessible level1 directories
1569
- continue;
1722
+ }
1723
+ catch (error) {
1724
+ // Skip inaccessible directories
1725
+ }
1726
+ }
1727
+ break;
1728
+ }
1729
+ }
1730
+ catch (error) {
1731
+ // Directory doesn't exist or not accessible
1732
+ }
1733
+ return allFiles;
1734
+ }
1735
+ /**
1736
+ * Create all 256 shard directories (00-ff)
1737
+ */
1738
+ async createAllShardDirectories(baseDir) {
1739
+ for (let i = 0; i < this.MAX_SHARDS; i++) {
1740
+ const shard = i.toString(16).padStart(2, '0');
1741
+ const shardDir = path.join(baseDir, shard);
1742
+ await this.ensureDirectoryExists(shardDir);
1743
+ }
1744
+ }
1745
+ /**
1746
+ * Migrate a single file atomically
1747
+ */
1748
+ async migrateFile(fileInfo, fromDepth, toDepth) {
1749
+ const baseDir = fileInfo.type === 'noun' ? this.nounsDir : this.verbsDir;
1750
+ // Calculate old path (already known)
1751
+ const oldPath = fileInfo.oldPath;
1752
+ // Calculate new path using target depth
1753
+ const shard = fileInfo.id.substring(0, 2).toLowerCase();
1754
+ const newPath = path.join(baseDir, shard, `${fileInfo.id}.json`);
1755
+ // Check if file already exists at new location
1756
+ if (await this.fileExists(newPath)) {
1757
+ // File already migrated or duplicate - skip
1758
+ return;
1759
+ }
1760
+ // Atomic rename/move
1761
+ await fs.promises.rename(oldPath, newPath);
1762
+ }
1763
+ /**
1764
+ * Clean up empty directories after migration
1765
+ */
1766
+ async cleanupEmptyDirectories(baseDir, depth) {
1767
+ try {
1768
+ if (depth === 2) {
1769
+ // Clean up level2 and level1 directories
1770
+ const level1Dirs = await fs.promises.readdir(baseDir);
1771
+ for (const level1 of level1Dirs) {
1772
+ const level1Path = path.join(baseDir, level1);
1773
+ try {
1774
+ const level1Stat = await fs.promises.stat(level1Path);
1775
+ if (level1Stat.isDirectory()) {
1776
+ const level2Dirs = await fs.promises.readdir(level1Path);
1777
+ for (const level2 of level2Dirs) {
1778
+ const level2Path = path.join(level1Path, level2);
1779
+ try {
1780
+ // Try to remove level2 directory (will fail if not empty)
1781
+ await fs.promises.rmdir(level2Path);
1782
+ }
1783
+ catch (error) {
1784
+ // Directory not empty or other error - ignore
1785
+ }
1570
1786
  }
1787
+ // Try to remove level1 directory
1788
+ await fs.promises.rmdir(level1Path);
1571
1789
  }
1572
1790
  }
1573
- catch (baseError) {
1574
- // If baseDir doesn't exist, return empty array
1575
- if (baseError.code === 'ENOENT') {
1576
- return [];
1791
+ catch (error) {
1792
+ // Directory not empty or other error - ignore
1793
+ }
1794
+ }
1795
+ }
1796
+ }
1797
+ catch (error) {
1798
+ // Cleanup is best-effort, don't throw
1799
+ }
1800
+ }
1801
+ /**
1802
+ * Count files in the current structure
1803
+ */
1804
+ async countFilesInStructure(depth) {
1805
+ let count = 0;
1806
+ count += (await this.getAllFilesAtDepth(this.nounsDir, depth)).length;
1807
+ count += (await this.getAllFilesAtDepth(this.verbsDir, depth)).length;
1808
+ return count;
1809
+ }
1810
+ /**
1811
+ * Detect the actual sharding depth used by existing files
1812
+ * Examines directory structure to determine current sharding strategy
1813
+ * Returns null if no files exist yet (new installation)
1814
+ */
1815
+ async detectExistingShardingDepth() {
1816
+ try {
1817
+ // Check if nouns directory exists and has content
1818
+ const dirExists = await this.directoryExists(this.nounsDir);
1819
+ if (!dirExists) {
1820
+ return null; // New installation
1821
+ }
1822
+ const entries = await fs.promises.readdir(this.nounsDir, { withFileTypes: true });
1823
+ // Check if there are any .json files directly in nounsDir (flat structure)
1824
+ const hasDirectJsonFiles = entries.some((e) => e.isFile() && e.name.endsWith('.json'));
1825
+ if (hasDirectJsonFiles) {
1826
+ return 0; // Flat structure: nouns/uuid.json
1827
+ }
1828
+ // Check for subdirectories with hex names (sharding directories)
1829
+ const subdirs = entries.filter((e) => e.isDirectory() && /^[0-9a-f]{2}$/i.test(e.name));
1830
+ if (subdirs.length === 0) {
1831
+ return null; // No files yet
1832
+ }
1833
+ // Check first subdir to see if it has files or more subdirs
1834
+ const firstSubdir = subdirs[0].name;
1835
+ const subdirPath = path.join(this.nounsDir, firstSubdir);
1836
+ const subdirEntries = await fs.promises.readdir(subdirPath, { withFileTypes: true });
1837
+ const hasJsonFiles = subdirEntries.some((e) => e.isFile() && e.name.endsWith('.json'));
1838
+ if (hasJsonFiles) {
1839
+ return 1; // Single-level sharding: nouns/ab/uuid.json
1840
+ }
1841
+ const hasSubSubdirs = subdirEntries.some((e) => e.isDirectory() && /^[0-9a-f]{2}$/i.test(e.name));
1842
+ if (hasSubSubdirs) {
1843
+ return 2; // Deep sharding: nouns/ab/cd/uuid.json
1844
+ }
1845
+ return 1; // Default to single-level if structure is unclear
1846
+ }
1847
+ catch (error) {
1848
+ // If we can't read the directory, assume new installation
1849
+ return null;
1850
+ }
1851
+ }
1852
+ /**
1853
+ * Get sharding depth
1854
+ * Always returns 1 (single-level sharding) for optimal balance of
1855
+ * simplicity, performance, and reliability across all dataset sizes
1856
+ *
1857
+ * Single-level sharding (depth=1):
1858
+ * - 256 shard directories (00-ff)
1859
+ * - Handles 2.5M+ entities with excellent performance
1860
+ * - No dynamic depth changes = no path mismatch bugs
1861
+ * - Industry standard approach (Git uses similar)
1862
+ */
1863
+ getOptimalShardingDepth() {
1864
+ return this.SHARDING_DEPTH;
1865
+ }
1866
+ /**
1867
+ * Get the path for a node with consistent sharding strategy
1868
+ * Clean, predictable path generation
1869
+ */
1870
+ getNodePath(id) {
1871
+ return this.getShardedPath(this.nounsDir, id);
1872
+ }
1873
+ /**
1874
+ * Get the path for a verb with consistent sharding strategy
1875
+ */
1876
+ getVerbPath(id) {
1877
+ return this.getShardedPath(this.verbsDir, id);
1878
+ }
1879
+ /**
1880
+ * Universal sharded path generator
1881
+ * Always uses depth=1 (single-level sharding) for consistency
1882
+ *
1883
+ * Format: baseDir/ab/uuid.json
1884
+ * Where 'ab' = first 2 hex characters of UUID (lowercase)
1885
+ *
1886
+ * Validates UUID format and throws descriptive errors
1887
+ */
1888
+ getShardedPath(baseDir, id) {
1889
+ // Extract first 2 characters for shard directory
1890
+ const shard = id.substring(0, 2).toLowerCase();
1891
+ // Validate shard is valid hex (00-ff)
1892
+ if (!/^[0-9a-f]{2}$/.test(shard)) {
1893
+ throw new Error(`Invalid entity ID format: ${id}. ` +
1894
+ `Expected UUID starting with 2 hex characters, got '${shard}'. ` +
1895
+ `IDs must be UUIDs or hex strings.`);
1896
+ }
1897
+ // Single-level sharding: baseDir/ab/uuid.json
1898
+ return path.join(baseDir, shard, `${id}.json`);
1899
+ }
1900
+ /**
1901
+ * Get all JSON files from the single-level sharded directory structure
1902
+ * Traverses all shard subdirectories (00-ff)
1903
+ */
1904
+ async getAllShardedFiles(baseDir) {
1905
+ const allFiles = [];
1906
+ try {
1907
+ const shardDirs = await fs.promises.readdir(baseDir);
1908
+ for (const shardDir of shardDirs) {
1909
+ const shardPath = path.join(baseDir, shardDir);
1910
+ try {
1911
+ const stat = await fs.promises.stat(shardPath);
1912
+ if (stat.isDirectory()) {
1913
+ const shardFiles = await fs.promises.readdir(shardPath);
1914
+ for (const file of shardFiles) {
1915
+ if (file.endsWith('.json')) {
1916
+ allFiles.push(file);
1917
+ }
1577
1918
  }
1578
- throw baseError;
1579
1919
  }
1580
- break;
1920
+ }
1921
+ catch (shardError) {
1922
+ // Skip inaccessible shard directories
1923
+ continue;
1924
+ }
1581
1925
  }
1582
1926
  // Sort for consistent ordering
1583
1927
  allFiles.sort();
@@ -1702,109 +2046,45 @@ export class FileSystemStorage extends BaseStorage {
1702
2046
  * Stream through sharded files without loading all names into memory
1703
2047
  * Production-scale implementation for millions of files
1704
2048
  */
2049
+ /**
2050
+ * Stream through files in single-level sharded structure
2051
+ * Calls processor for each file until processor returns false
2052
+ * Returns true if more files exist (processor stopped early), false if all processed
2053
+ */
1705
2054
  async streamShardedFiles(baseDir, depth, processor) {
1706
2055
  let hasMore = true;
1707
- switch (depth) {
1708
- case 0:
1709
- // Flat structure
1710
- try {
1711
- const files = await fs.promises.readdir(baseDir);
1712
- const sortedFiles = files.filter((f) => f.endsWith('.json')).sort();
1713
- for (const file of sortedFiles) {
1714
- const shouldContinue = await processor(file, path.join(baseDir, file));
1715
- if (!shouldContinue) {
1716
- hasMore = false;
1717
- break;
1718
- }
1719
- }
1720
- }
1721
- catch (error) {
1722
- if (error.code === 'ENOENT')
1723
- hasMore = false;
1724
- }
1725
- break;
1726
- case 1:
1727
- // Single-level sharding: ab/
1728
- try {
1729
- const shardDirs = await fs.promises.readdir(baseDir);
1730
- const sortedShardDirs = shardDirs.sort();
1731
- for (const shardDir of sortedShardDirs) {
1732
- const shardPath = path.join(baseDir, shardDir);
1733
- try {
1734
- const stat = await fs.promises.stat(shardPath);
1735
- if (stat.isDirectory()) {
1736
- const files = await fs.promises.readdir(shardPath);
1737
- const sortedFiles = files.filter((f) => f.endsWith('.json')).sort();
1738
- for (const file of sortedFiles) {
1739
- const shouldContinue = await processor(file, path.join(shardPath, file));
1740
- if (!shouldContinue) {
1741
- hasMore = false;
1742
- break;
1743
- }
1744
- }
1745
- if (!hasMore)
1746
- break;
1747
- }
1748
- }
1749
- catch (shardError) {
1750
- continue; // Skip inaccessible shard directories
1751
- }
1752
- }
1753
- }
1754
- catch (error) {
1755
- if (error.code === 'ENOENT')
1756
- hasMore = false;
1757
- }
1758
- break;
1759
- case 2:
1760
- default:
1761
- // Deep sharding: ab/cd/
2056
+ // Single-level sharding (depth=1): baseDir/ab/uuid.json
2057
+ try {
2058
+ const shardDirs = await fs.promises.readdir(baseDir);
2059
+ const sortedShardDirs = shardDirs.sort();
2060
+ for (const shardDir of sortedShardDirs) {
2061
+ const shardPath = path.join(baseDir, shardDir);
1762
2062
  try {
1763
- const level1Dirs = await fs.promises.readdir(baseDir);
1764
- const sortedLevel1Dirs = level1Dirs.sort();
1765
- for (const level1Dir of sortedLevel1Dirs) {
1766
- const level1Path = path.join(baseDir, level1Dir);
1767
- try {
1768
- const level1Stat = await fs.promises.stat(level1Path);
1769
- if (level1Stat.isDirectory()) {
1770
- const level2Dirs = await fs.promises.readdir(level1Path);
1771
- const sortedLevel2Dirs = level2Dirs.sort();
1772
- for (const level2Dir of sortedLevel2Dirs) {
1773
- const level2Path = path.join(level1Path, level2Dir);
1774
- try {
1775
- const level2Stat = await fs.promises.stat(level2Path);
1776
- if (level2Stat.isDirectory()) {
1777
- const files = await fs.promises.readdir(level2Path);
1778
- const sortedFiles = files.filter((f) => f.endsWith('.json')).sort();
1779
- for (const file of sortedFiles) {
1780
- const shouldContinue = await processor(file, path.join(level2Path, file));
1781
- if (!shouldContinue) {
1782
- hasMore = false;
1783
- break;
1784
- }
1785
- }
1786
- if (!hasMore)
1787
- break;
1788
- }
1789
- }
1790
- catch (level2Error) {
1791
- continue; // Skip inaccessible level2 directories
1792
- }
1793
- }
1794
- if (!hasMore)
1795
- break;
2063
+ const stat = await fs.promises.stat(shardPath);
2064
+ if (stat.isDirectory()) {
2065
+ const files = await fs.promises.readdir(shardPath);
2066
+ const sortedFiles = files.filter((f) => f.endsWith('.json')).sort();
2067
+ for (const file of sortedFiles) {
2068
+ const shouldContinue = await processor(file, path.join(shardPath, file));
2069
+ if (!shouldContinue) {
2070
+ hasMore = false;
2071
+ break;
1796
2072
  }
1797
2073
  }
1798
- catch (level1Error) {
1799
- continue; // Skip inaccessible level1 directories
1800
- }
2074
+ if (!hasMore)
2075
+ break;
1801
2076
  }
1802
2077
  }
1803
- catch (error) {
1804
- if (error.code === 'ENOENT')
1805
- hasMore = false;
2078
+ catch (shardError) {
2079
+ // Skip inaccessible shard directories
2080
+ continue;
1806
2081
  }
1807
- break;
2082
+ }
2083
+ }
2084
+ catch (error) {
2085
+ if (error.code === 'ENOENT') {
2086
+ hasMore = false;
2087
+ }
1808
2088
  }
1809
2089
  return hasMore;
1810
2090
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@soulcraft/brainy",
3
- "version": "3.23.1",
3
+ "version": "3.24.0",
4
4
  "description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",