@soulcraft/brainy 3.16.0 → 3.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
4
4
 
5
+ ## [3.17.0](https://github.com/soulcraftlabs/brainy/compare/v3.16.0...v3.17.0) (2025-09-27)
6
+
5
7
  ## [3.15.0](https://github.com/soulcraftlabs/brainy/compare/v3.14.2...v3.15.0) (2025-09-26)
6
8
 
7
9
  ### Bug Fixes
@@ -288,6 +288,21 @@ export declare class FileSystemStorage extends BaseStorage {
288
288
  * Consistent across all entity types
289
289
  */
290
290
  private getShardedPath;
291
+ /**
292
+ * Get all JSON files from a sharded directory structure
293
+ * Properly traverses sharded subdirectories based on current sharding depth
294
+ */
295
+ private getAllShardedFiles;
296
+ /**
297
+ * Production-scale streaming pagination for very large datasets
298
+ * Avoids loading all filenames into memory
299
+ */
300
+ private getVerbsWithPaginationStreaming;
301
+ /**
302
+ * Stream through sharded files without loading all names into memory
303
+ * Production-scale implementation for millions of files
304
+ */
305
+ private streamShardedFiles;
291
306
  /**
292
307
  * Check if a file exists (handles both sharded and non-sharded)
293
308
  */
@@ -39,7 +39,7 @@ export class FileSystemStorage extends BaseStorage {
39
39
  super();
40
40
  // Intelligent sharding configuration
41
41
  this.shardingDepth = 2; // 0=flat, 1=ab/, 2=ab/cd/
42
- this.SHARDING_THRESHOLD = 1000; // Enable deep sharding at 1k files
42
+ this.SHARDING_THRESHOLD = 100; // Enable deep sharding at 100 files for optimal performance
43
43
  this.useDualWrite = true; // Write to both locations during migration
44
44
  this.activeLocks = new Set();
45
45
  this.lockTimers = new Map(); // Track timers for cleanup
@@ -182,7 +182,8 @@ export class FileSystemStorage extends BaseStorage {
182
182
  id: parsedNode.id,
183
183
  vector: parsedNode.vector,
184
184
  connections,
185
- level: parsedNode.level || 0
185
+ level: parsedNode.level || 0,
186
+ metadata: parsedNode.metadata
186
187
  };
187
188
  }
188
189
  catch (error) {
@@ -303,6 +304,8 @@ export class FileSystemStorage extends BaseStorage {
303
304
  */
304
305
  async saveEdge(edge) {
305
306
  await this.ensureInitialized();
307
+ // Check if this is a new edge to update counts
308
+ const isNew = !(await this.fileExists(this.getVerbPath(edge.id)));
306
309
  // Convert connections Map to a serializable format
307
310
  const serializableEdge = {
308
311
  ...edge,
@@ -311,6 +314,14 @@ export class FileSystemStorage extends BaseStorage {
311
314
  const filePath = this.getVerbPath(edge.id);
312
315
  await this.ensureDirectoryExists(path.dirname(filePath));
313
316
  await fs.promises.writeFile(filePath, JSON.stringify(serializableEdge, null, 2));
317
+ // Update verb count for new edges (production-scale optimizations)
318
+ if (isNew) {
319
+ this.totalVerbCount++;
320
+ // Persist counts periodically (every 10 operations for efficiency)
321
+ if (this.totalVerbCount % 10 === 0) {
322
+ this.persistCounts(); // Async persist, don't await
323
+ }
324
+ }
314
325
  }
315
326
  /**
316
327
  * Get an edge from storage
@@ -502,8 +513,22 @@ export class FileSystemStorage extends BaseStorage {
502
513
  */
503
514
  async saveVerbMetadata_internal(id, metadata) {
504
515
  await this.ensureInitialized();
516
+ console.log(`[DEBUG] Saving verb metadata for ${id} to: ${this.verbMetadataDir}`);
505
517
  const filePath = path.join(this.verbMetadataDir, `${id}.json`);
506
- await fs.promises.writeFile(filePath, JSON.stringify(metadata, null, 2));
518
+ console.log(`[DEBUG] Full file path: ${filePath}`);
519
+ try {
520
+ await this.ensureDirectoryExists(path.dirname(filePath));
521
+ console.log(`[DEBUG] Directory ensured: ${path.dirname(filePath)}`);
522
+ await fs.promises.writeFile(filePath, JSON.stringify(metadata, null, 2));
523
+ console.log(`[DEBUG] File written successfully: ${filePath}`);
524
+ // Verify the file was actually written
525
+ const exists = await fs.promises.access(filePath).then(() => true).catch(() => false);
526
+ console.log(`[DEBUG] File exists after write: ${exists}`);
527
+ }
528
+ catch (error) {
529
+ console.error(`[DEBUG] Error saving verb metadata:`, error);
530
+ throw error;
531
+ }
507
532
  }
508
533
  /**
509
534
  * Get verb metadata from storage
@@ -531,9 +556,8 @@ export class FileSystemStorage extends BaseStorage {
531
556
  const limit = options.limit || 100;
532
557
  const cursor = options.cursor;
533
558
  try {
534
- // Get all noun files
535
- const files = await fs.promises.readdir(this.nounsDir);
536
- const nounFiles = files.filter((f) => f.endsWith('.json'));
559
+ // Get all noun files (handles sharding properly)
560
+ const nounFiles = await this.getAllShardedFiles(this.nounsDir);
537
561
  // Sort for consistent pagination
538
562
  nounFiles.sort();
539
563
  // Find starting position - prioritize offset for O(1) operation
@@ -562,7 +586,8 @@ export class FileSystemStorage extends BaseStorage {
562
586
  // Second pass: load the current page
563
587
  for (const file of pageFiles) {
564
588
  try {
565
- const data = await fs.promises.readFile(path.join(this.nounsDir, file), 'utf-8');
589
+ const id = file.replace('.json', '');
590
+ const data = await fs.promises.readFile(this.getNodePath(id), 'utf-8');
566
591
  const noun = JSON.parse(data);
567
592
  // Apply filter if provided
568
593
  if (options.filter) {
@@ -872,29 +897,30 @@ export class FileSystemStorage extends BaseStorage {
872
897
  const limit = options.limit || 100;
873
898
  const startIndex = options.cursor ? parseInt(options.cursor, 10) : 0;
874
899
  try {
875
- // List all verb files in the verbs directory
876
- // Note: For very large directories (millions of files), this could be memory-intensive
877
- // Future optimization: Use fs.opendir() for streaming directory reads
878
- const files = await fs.promises.readdir(this.verbsDir);
879
- const verbFiles = files.filter((f) => f.endsWith('.json'));
880
- // Sort files for consistent ordering
881
- verbFiles.sort();
882
- // Calculate pagination
883
- const totalCount = verbFiles.length;
900
+ // Production-scale optimization: Use persisted count for total instead of scanning
901
+ const totalCount = this.totalVerbCount || 0;
902
+ // For large datasets, warn about performance
903
+ if (totalCount > 1000000) {
904
+ console.warn(`Very large verb dataset detected (${totalCount} verbs). Performance may be degraded. Consider database storage for optimal performance.`);
905
+ }
906
+ // Calculate pagination bounds
884
907
  const endIndex = Math.min(startIndex + limit, totalCount);
885
908
  const hasMore = endIndex < totalCount;
886
- // Safety check for large datasets
887
- if (totalCount > 100000) {
888
- console.warn(`Large verb dataset detected (${totalCount} verbs). Consider using a database for better performance.`);
909
+ // For production-scale datasets, use streaming approach
910
+ if (totalCount > 50000) {
911
+ return await this.getVerbsWithPaginationStreaming(options, startIndex, limit);
889
912
  }
913
+ // For smaller datasets, use the current approach (with optimizations)
914
+ const verbFiles = await this.getAllShardedFiles(this.verbsDir);
915
+ verbFiles.sort(); // This is still acceptable for <50k files
890
916
  // Load the requested page of verbs
891
917
  const verbs = [];
892
918
  for (let i = startIndex; i < endIndex; i++) {
893
919
  const file = verbFiles[i];
894
920
  const id = file.replace('.json', '');
895
921
  try {
896
- // Read the verb data (HNSWVerb stored as edge)
897
- const filePath = path.join(this.verbsDir, file);
922
+ // Read the verb data (HNSWVerb stored as edge) - use sharded path
923
+ const filePath = this.getVerbPath(id);
898
924
  const data = await fs.promises.readFile(filePath, 'utf-8');
899
925
  const edge = JSON.parse(data);
900
926
  // Get metadata which contains the actual verb information
@@ -1336,20 +1362,19 @@ export class FileSystemStorage extends BaseStorage {
1336
1362
  */
1337
1363
  async initializeCountsFromDisk() {
1338
1364
  try {
1339
- // Count nouns
1340
- const nounFiles = await fs.promises.readdir(this.nounsDir);
1341
- const validNounFiles = nounFiles.filter((f) => f.endsWith('.json'));
1365
+ // Count nouns (handles sharding properly)
1366
+ const validNounFiles = await this.getAllShardedFiles(this.nounsDir);
1342
1367
  this.totalNounCount = validNounFiles.length;
1343
- // Count verbs
1344
- const verbFiles = await fs.promises.readdir(this.verbsDir);
1345
- const validVerbFiles = verbFiles.filter((f) => f.endsWith('.json'));
1368
+ // Count verbs (handles sharding properly)
1369
+ const validVerbFiles = await this.getAllShardedFiles(this.verbsDir);
1346
1370
  this.totalVerbCount = validVerbFiles.length;
1347
1371
  // Sample some files to get type distribution (don't read all)
1348
1372
  const sampleSize = Math.min(100, validNounFiles.length);
1349
1373
  for (let i = 0; i < sampleSize; i++) {
1350
1374
  try {
1351
1375
  const file = validNounFiles[i];
1352
- const data = await fs.promises.readFile(path.join(this.nounsDir, file), 'utf-8');
1376
+ const id = file.replace('.json', '');
1377
+ const data = await fs.promises.readFile(this.getNodePath(id), 'utf-8');
1353
1378
  const noun = JSON.parse(data);
1354
1379
  const type = noun.metadata?.type || noun.metadata?.nounType || 'default';
1355
1380
  this.entityCounts.set(type, (this.entityCounts.get(type) || 0) + 1);
@@ -1449,6 +1474,327 @@ export class FileSystemStorage extends BaseStorage {
1449
1474
  return path.join(baseDir, shard1Deep, shard2Deep, `${id}.json`);
1450
1475
  }
1451
1476
  }
1477
+ /**
1478
+ * Get all JSON files from a sharded directory structure
1479
+ * Properly traverses sharded subdirectories based on current sharding depth
1480
+ */
1481
+ async getAllShardedFiles(baseDir) {
1482
+ const allFiles = [];
1483
+ const depth = this.cachedShardingDepth ?? this.getOptimalShardingDepth();
1484
+ try {
1485
+ switch (depth) {
1486
+ case 0:
1487
+ // Flat structure: read directly from baseDir
1488
+ const flatFiles = await fs.promises.readdir(baseDir);
1489
+ for (const file of flatFiles) {
1490
+ if (file.endsWith('.json')) {
1491
+ allFiles.push(file);
1492
+ }
1493
+ }
1494
+ break;
1495
+ case 1:
1496
+ // Single-level sharding: baseDir/ab/
1497
+ try {
1498
+ const shardDirs = await fs.promises.readdir(baseDir);
1499
+ for (const shardDir of shardDirs) {
1500
+ const shardPath = path.join(baseDir, shardDir);
1501
+ try {
1502
+ const stat = await fs.promises.stat(shardPath);
1503
+ if (stat.isDirectory()) {
1504
+ const shardFiles = await fs.promises.readdir(shardPath);
1505
+ for (const file of shardFiles) {
1506
+ if (file.endsWith('.json')) {
1507
+ allFiles.push(file);
1508
+ }
1509
+ }
1510
+ }
1511
+ }
1512
+ catch (shardError) {
1513
+ // Skip inaccessible shard directories
1514
+ continue;
1515
+ }
1516
+ }
1517
+ }
1518
+ catch (baseError) {
1519
+ // If baseDir doesn't exist, return empty array
1520
+ if (baseError.code === 'ENOENT') {
1521
+ return [];
1522
+ }
1523
+ throw baseError;
1524
+ }
1525
+ break;
1526
+ case 2:
1527
+ default:
1528
+ // Deep sharding: baseDir/ab/cd/
1529
+ try {
1530
+ const level1Dirs = await fs.promises.readdir(baseDir);
1531
+ for (const level1Dir of level1Dirs) {
1532
+ const level1Path = path.join(baseDir, level1Dir);
1533
+ try {
1534
+ const level1Stat = await fs.promises.stat(level1Path);
1535
+ if (level1Stat.isDirectory()) {
1536
+ const level2Dirs = await fs.promises.readdir(level1Path);
1537
+ for (const level2Dir of level2Dirs) {
1538
+ const level2Path = path.join(level1Path, level2Dir);
1539
+ try {
1540
+ const level2Stat = await fs.promises.stat(level2Path);
1541
+ if (level2Stat.isDirectory()) {
1542
+ const shardFiles = await fs.promises.readdir(level2Path);
1543
+ for (const file of shardFiles) {
1544
+ if (file.endsWith('.json')) {
1545
+ allFiles.push(file);
1546
+ }
1547
+ }
1548
+ }
1549
+ }
1550
+ catch (level2Error) {
1551
+ // Skip inaccessible level2 directories
1552
+ continue;
1553
+ }
1554
+ }
1555
+ }
1556
+ }
1557
+ catch (level1Error) {
1558
+ // Skip inaccessible level1 directories
1559
+ continue;
1560
+ }
1561
+ }
1562
+ }
1563
+ catch (baseError) {
1564
+ // If baseDir doesn't exist, return empty array
1565
+ if (baseError.code === 'ENOENT') {
1566
+ return [];
1567
+ }
1568
+ throw baseError;
1569
+ }
1570
+ break;
1571
+ }
1572
+ // Sort for consistent ordering
1573
+ allFiles.sort();
1574
+ return allFiles;
1575
+ }
1576
+ catch (error) {
1577
+ if (error.code === 'ENOENT') {
1578
+ // Directory doesn't exist yet
1579
+ return [];
1580
+ }
1581
+ throw error;
1582
+ }
1583
+ }
1584
+ /**
1585
+ * Production-scale streaming pagination for very large datasets
1586
+ * Avoids loading all filenames into memory
1587
+ */
1588
+ async getVerbsWithPaginationStreaming(options, startIndex, limit) {
1589
+ const verbs = [];
1590
+ const totalCount = this.totalVerbCount || 0;
1591
+ let processedCount = 0;
1592
+ let skippedCount = 0;
1593
+ let resultCount = 0;
1594
+ const depth = this.cachedShardingDepth ?? this.getOptimalShardingDepth();
1595
+ try {
1596
+ // Stream through sharded directories efficiently
1597
+ const hasMore = await this.streamShardedFiles(this.verbsDir, depth, async (filename, filePath) => {
1598
+ // Skip files until we reach start index
1599
+ if (skippedCount < startIndex) {
1600
+ skippedCount++;
1601
+ return true; // continue
1602
+ }
1603
+ // Stop if we have enough results
1604
+ if (resultCount >= limit) {
1605
+ return false; // stop streaming
1606
+ }
1607
+ try {
1608
+ const id = filename.replace('.json', '');
1609
+ // Read verb data and metadata
1610
+ const data = await fs.promises.readFile(filePath, 'utf-8');
1611
+ const edge = JSON.parse(data);
1612
+ const metadata = await this.getVerbMetadata(id);
1613
+ if (!metadata) {
1614
+ processedCount++;
1615
+ return true; // continue, skip this verb
1616
+ }
1617
+ // Reconstruct GraphVerb
1618
+ const verb = {
1619
+ id: edge.id,
1620
+ vector: edge.vector,
1621
+ connections: edge.connections || new Map(),
1622
+ sourceId: metadata.sourceId || metadata.source,
1623
+ targetId: metadata.targetId || metadata.target,
1624
+ source: metadata.source || metadata.sourceId,
1625
+ target: metadata.target || metadata.targetId,
1626
+ verb: metadata.verb || metadata.type,
1627
+ type: metadata.type || metadata.verb,
1628
+ weight: metadata.weight,
1629
+ metadata: metadata.metadata || metadata,
1630
+ data: metadata.data,
1631
+ createdAt: metadata.createdAt,
1632
+ updatedAt: metadata.updatedAt,
1633
+ createdBy: metadata.createdBy,
1634
+ embedding: metadata.embedding || edge.vector
1635
+ };
1636
+ // Apply filters
1637
+ if (options.filter) {
1638
+ const filter = options.filter;
1639
+ if (filter.verbType) {
1640
+ const types = Array.isArray(filter.verbType) ? filter.verbType : [filter.verbType];
1641
+ const verbType = verb.type || verb.verb;
1642
+ if (verbType && !types.includes(verbType))
1643
+ return true; // continue
1644
+ }
1645
+ if (filter.sourceId) {
1646
+ const sources = Array.isArray(filter.sourceId) ? filter.sourceId : [filter.sourceId];
1647
+ const sourceId = verb.sourceId || verb.source;
1648
+ if (!sourceId || !sources.includes(sourceId))
1649
+ return true; // continue
1650
+ }
1651
+ if (filter.targetId) {
1652
+ const targets = Array.isArray(filter.targetId) ? filter.targetId : [filter.targetId];
1653
+ const targetId = verb.targetId || verb.target;
1654
+ if (!targetId || !targets.includes(targetId))
1655
+ return true; // continue
1656
+ }
1657
+ }
1658
+ verbs.push(verb);
1659
+ resultCount++;
1660
+ processedCount++;
1661
+ return true; // continue
1662
+ }
1663
+ catch (error) {
1664
+ console.warn(`Failed to read verb from ${filePath}:`, error);
1665
+ processedCount++;
1666
+ return true; // continue
1667
+ }
1668
+ });
1669
+ const finalHasMore = (startIndex + resultCount) < totalCount;
1670
+ return {
1671
+ items: verbs,
1672
+ totalCount,
1673
+ hasMore: finalHasMore,
1674
+ nextCursor: finalHasMore ? String(startIndex + resultCount) : undefined
1675
+ };
1676
+ }
1677
+ catch (error) {
1678
+ if (error.code === 'ENOENT') {
1679
+ return {
1680
+ items: [],
1681
+ totalCount: 0,
1682
+ hasMore: false
1683
+ };
1684
+ }
1685
+ throw error;
1686
+ }
1687
+ }
1688
+ /**
1689
+ * Stream through sharded files without loading all names into memory
1690
+ * Production-scale implementation for millions of files
1691
+ */
1692
+ async streamShardedFiles(baseDir, depth, processor) {
1693
+ let hasMore = true;
1694
+ switch (depth) {
1695
+ case 0:
1696
+ // Flat structure
1697
+ try {
1698
+ const files = await fs.promises.readdir(baseDir);
1699
+ const sortedFiles = files.filter((f) => f.endsWith('.json')).sort();
1700
+ for (const file of sortedFiles) {
1701
+ const shouldContinue = await processor(file, path.join(baseDir, file));
1702
+ if (!shouldContinue) {
1703
+ hasMore = false;
1704
+ break;
1705
+ }
1706
+ }
1707
+ }
1708
+ catch (error) {
1709
+ if (error.code === 'ENOENT')
1710
+ hasMore = false;
1711
+ }
1712
+ break;
1713
+ case 1:
1714
+ // Single-level sharding: ab/
1715
+ try {
1716
+ const shardDirs = await fs.promises.readdir(baseDir);
1717
+ const sortedShardDirs = shardDirs.sort();
1718
+ for (const shardDir of sortedShardDirs) {
1719
+ const shardPath = path.join(baseDir, shardDir);
1720
+ try {
1721
+ const stat = await fs.promises.stat(shardPath);
1722
+ if (stat.isDirectory()) {
1723
+ const files = await fs.promises.readdir(shardPath);
1724
+ const sortedFiles = files.filter((f) => f.endsWith('.json')).sort();
1725
+ for (const file of sortedFiles) {
1726
+ const shouldContinue = await processor(file, path.join(shardPath, file));
1727
+ if (!shouldContinue) {
1728
+ hasMore = false;
1729
+ break;
1730
+ }
1731
+ }
1732
+ if (!hasMore)
1733
+ break;
1734
+ }
1735
+ }
1736
+ catch (shardError) {
1737
+ continue; // Skip inaccessible shard directories
1738
+ }
1739
+ }
1740
+ }
1741
+ catch (error) {
1742
+ if (error.code === 'ENOENT')
1743
+ hasMore = false;
1744
+ }
1745
+ break;
1746
+ case 2:
1747
+ default:
1748
+ // Deep sharding: ab/cd/
1749
+ try {
1750
+ const level1Dirs = await fs.promises.readdir(baseDir);
1751
+ const sortedLevel1Dirs = level1Dirs.sort();
1752
+ for (const level1Dir of sortedLevel1Dirs) {
1753
+ const level1Path = path.join(baseDir, level1Dir);
1754
+ try {
1755
+ const level1Stat = await fs.promises.stat(level1Path);
1756
+ if (level1Stat.isDirectory()) {
1757
+ const level2Dirs = await fs.promises.readdir(level1Path);
1758
+ const sortedLevel2Dirs = level2Dirs.sort();
1759
+ for (const level2Dir of sortedLevel2Dirs) {
1760
+ const level2Path = path.join(level1Path, level2Dir);
1761
+ try {
1762
+ const level2Stat = await fs.promises.stat(level2Path);
1763
+ if (level2Stat.isDirectory()) {
1764
+ const files = await fs.promises.readdir(level2Path);
1765
+ const sortedFiles = files.filter((f) => f.endsWith('.json')).sort();
1766
+ for (const file of sortedFiles) {
1767
+ const shouldContinue = await processor(file, path.join(level2Path, file));
1768
+ if (!shouldContinue) {
1769
+ hasMore = false;
1770
+ break;
1771
+ }
1772
+ }
1773
+ if (!hasMore)
1774
+ break;
1775
+ }
1776
+ }
1777
+ catch (level2Error) {
1778
+ continue; // Skip inaccessible level2 directories
1779
+ }
1780
+ }
1781
+ if (!hasMore)
1782
+ break;
1783
+ }
1784
+ }
1785
+ catch (level1Error) {
1786
+ continue; // Skip inaccessible level1 directories
1787
+ }
1788
+ }
1789
+ }
1790
+ catch (error) {
1791
+ if (error.code === 'ENOENT')
1792
+ hasMore = false;
1793
+ }
1794
+ break;
1795
+ }
1796
+ return hasMore;
1797
+ }
1452
1798
  /**
1453
1799
  * Check if a file exists (handles both sharded and non-sharded)
1454
1800
  */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@soulcraft/brainy",
3
- "version": "3.16.0",
3
+ "version": "3.17.0",
4
4
  "description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns × 40 verbs for infinite expressiveness.",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",