@soulcraft/brainy 3.8.3 → 3.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -9,7 +9,7 @@
9
9
  [![MIT License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
10
10
  [![TypeScript](https://img.shields.io/badge/%3C%2F%3E-TypeScript-%230074c1.svg)](https://www.typescriptlang.org/)
11
11
 
12
- **🧠 Brainy 3.0 - Universal Knowledge Protocol™**
12
+ **🧠 Brainy - Universal Knowledge Protocol™**
13
13
 
14
14
  **World's first Triple Intelligence™ database** unifying vector similarity, graph relationships, and document filtering in one magical API. **Framework-friendly design** works seamlessly with Next.js, React, Vue, Angular, and any modern JavaScript framework.
15
15
 
@@ -17,7 +17,7 @@
17
17
 
18
18
  **Framework-first design.** Built for modern web development with zero configuration and automatic framework compatibility. O(log n) performance, <10ms search latency, production-ready.
19
19
 
20
- ## 🎉 What's New in 3.0
20
+ ## 🎉 Key Features
21
21
 
22
22
  ### 🧠 **Triple Intelligence™ Engine**
23
23
 
@@ -49,7 +49,7 @@ npm install @soulcraft/brainy
49
49
  ### 🎯 **True Zero Configuration**
50
50
 
51
51
  ```javascript
52
- import {Brainy} from '@soulcraft/brainy'
52
+ import { Brainy, NounType } from '@soulcraft/brainy'
53
53
 
54
54
  // Just this - auto-detects everything!
55
55
  const brain = new Brainy()
@@ -58,7 +58,7 @@ await brain.init()
58
58
  // Add entities with automatic embedding
59
59
  const jsId = await brain.add({
60
60
  data: "JavaScript is a programming language",
61
- type: "concept",
61
+ nounType: NounType.Concept,
62
62
  metadata: {
63
63
  type: "language",
64
64
  year: 1995,
@@ -68,7 +68,7 @@ const jsId = await brain.add({
68
68
 
69
69
  const nodeId = await brain.add({
70
70
  data: "Node.js runtime environment",
71
- type: "concept",
71
+ nounType: NounType.Concept,
72
72
  metadata: {
73
73
  type: "runtime",
74
74
  year: 2009,
@@ -100,7 +100,7 @@ const filtered = await brain.find({
100
100
 
101
101
  ## 🌐 Framework Integration
102
102
 
103
- **Brainy 3.0 is framework-first!** Works seamlessly with any modern JavaScript framework:
103
+ **Brainy is framework-first!** Works seamlessly with any modern JavaScript framework:
104
104
 
105
105
  ### ⚛️ **React & Next.js**
106
106
  ```javascript
@@ -194,7 +194,7 @@ If using nvm: `nvm use` (we provide a `.nvmrc` file)
194
194
 
195
195
  **Enabled by Triple Intelligence, standardized for everyone:**
196
196
 
197
- - **24 Noun Types × 40 Verb Types**: 960 base combinations
197
+ - **31 Noun Types × 40 Verb Types**: 1,240 base combinations
198
198
  - **∞ Expressiveness**: Unlimited metadata = model ANY data
199
199
  - **One Language**: All tools, augmentations, AI models speak the same types
200
200
  - **Perfect Interoperability**: Move data between any Brainy instance
@@ -211,10 +211,10 @@ await brain.find("Documentation about authentication from last month")
211
211
 
212
212
  ### 🎯 Zero Configuration Philosophy
213
213
 
214
- Brainy 3.0 automatically configures **everything**:
214
+ Brainy automatically configures **everything**:
215
215
 
216
216
  ```javascript
217
- import {Brainy} from '@soulcraft/brainy'
217
+ import { Brainy } from '@soulcraft/brainy'
218
218
 
219
219
  // 1. Pure zero-config - detects everything
220
220
  const brain = new Brainy()
@@ -368,6 +368,8 @@ const brain = new Brainy({
368
368
  ### Real-World Example: Social Media Firehose
369
369
 
370
370
  ```javascript
371
+ import { Brainy, NounType } from '@soulcraft/brainy'
372
+
371
373
  // Ingestion nodes (optimized for writes)
372
374
  const ingestionNode = new Brainy({
373
375
  storage: {type: 's3', options: {bucket: 'social-data'}},
@@ -378,7 +380,7 @@ const ingestionNode = new Brainy({
378
380
  // Process Bluesky firehose
379
381
  blueskyStream.on('post', async (post) => {
380
382
  await ingestionNode.add(post, {
381
- nounType: 'social-post',
383
+ nounType: NounType.Message,
382
384
  platform: 'bluesky',
383
385
  author: post.author,
384
386
  timestamp: post.createdAt
@@ -417,21 +419,19 @@ const trending = await searchNode.find('trending AI topics', {
417
419
  ```javascript
418
420
  // Store documentation with rich relationships
419
421
  const apiGuide = await brain.add("REST API Guide", {
420
- nounType: 'document',
422
+ nounType: NounType.Document,
421
423
  title: "API Guide",
422
424
  category: "documentation",
423
425
  version: "2.0"
424
426
  })
425
427
 
426
428
  const author = await brain.add("Jane Developer", {
427
- nounType: 'person',
428
- type: "person",
429
+ nounType: NounType.Person,
429
430
  role: "tech-lead"
430
431
  })
431
432
 
432
433
  const project = await brain.add("E-commerce Platform", {
433
- nounType: 'project',
434
- type: "project",
434
+ nounType: NounType.Project,
435
435
  status: "active"
436
436
  })
437
437
 
@@ -462,21 +462,18 @@ const similar = await brain.search(existingContent, {
462
462
  ```javascript
463
463
  // Store conversation with relationships
464
464
  const userId = await brain.add("User 123", {
465
- nounType: 'user',
466
- type: "user",
465
+ nounType: NounType.User,
467
466
  tier: "premium"
468
467
  })
469
468
 
470
469
  const messageId = await brain.add(userMessage, {
471
- nounType: 'message',
472
- type: "message",
470
+ nounType: NounType.Message,
473
471
  timestamp: Date.now(),
474
472
  session: "abc"
475
473
  })
476
474
 
477
475
  const topicId = await brain.add("Product Support", {
478
- nounType: 'topic',
479
- type: "topic",
476
+ nounType: NounType.Topic,
480
477
  category: "support"
481
478
  })
482
479
 
@@ -602,7 +599,7 @@ for (const cluster of feedbackClusters) {
602
599
  }
603
600
 
604
601
  // Find related documents
605
- const docId = await brain.add("Machine learning guide", { nounType: 'document' })
602
+ const docId = await brain.add("Machine learning guide", { nounType: NounType.Document })
606
603
  const similar = await neural.neighbors(docId, 5)
607
604
  // Returns 5 most similar documents
608
605
 
@@ -637,7 +634,7 @@ Brainy includes enterprise-grade capabilities at no extra cost. **No premium tie
637
634
  - **Built-in monitoring** with metrics and health checks
638
635
  - **Production ready** with circuit breakers and backpressure
639
636
 
640
- 📖 **Enterprise features coming in Brainy 3.1** - Stay tuned!
637
+ 📖 **More enterprise features coming soon** - Stay tuned!
641
638
 
642
639
  ## 📊 Benchmarks
643
640
 
@@ -651,13 +648,14 @@ Brainy includes enterprise-grade capabilities at no extra cost. **No premium tie
651
648
  | Bulk Import (1000 items) | 2.3s | +8MB |
652
649
  | **Production Scale (10M items)** | **5.8ms** | **12GB** |
653
650
 
654
- ## 🔄 Migration from 2.x
651
+ ## 🔄 Migration from Previous Versions
655
652
 
656
- Key changes for upgrading to 3.0:
653
+ Key changes in the latest version:
657
654
 
658
655
  - Search methods consolidated into `search()` and `find()`
659
656
  - Result format now includes full objects with metadata
660
- - New natural language capabilities
657
+ - Enhanced natural language capabilities
658
+ - Distributed architecture support
661
659
 
662
660
  ## 🤝 Contributing
663
661
 
@@ -678,10 +676,10 @@ We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
678
676
  ### The Math of Infinite Expressiveness
679
677
 
680
678
  ```
681
- 24 Nouns × 40 Verbs × ∞ Metadata × Triple Intelligence = Universal Protocol
679
+ 31 Nouns × 40 Verbs × ∞ Metadata × Triple Intelligence = Universal Protocol
682
680
  ```
683
681
 
684
- - **960 base combinations** from standardized types
682
+ - **1,240 base combinations** from standardized types
685
683
  - **∞ domain specificity** via unlimited metadata
686
684
  - **∞ relationship depth** via graph traversal
687
685
  - **= Model ANYTHING**: From quantum physics to social networks
package/dist/brainy.d.ts CHANGED
@@ -26,6 +26,10 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
26
26
  private distance;
27
27
  private augmentationRegistry;
28
28
  private config;
29
+ private coordinator?;
30
+ private shardManager?;
31
+ private cacheSync?;
32
+ private readWriteSeparation?;
29
33
  private originalConsole?;
30
34
  private _neural?;
31
35
  private _nlp?;
@@ -111,6 +115,16 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
111
115
  * Clear all data from the database
112
116
  */
113
117
  clear(): Promise<void>;
118
+ /**
119
+ * Get total count of nouns - O(1) operation
120
+ * @returns Promise that resolves to the total number of nouns
121
+ */
122
+ getNounCount(): Promise<number>;
123
+ /**
124
+ * Get total count of verbs - O(1) operation
125
+ * @returns Promise that resolves to the total number of verbs
126
+ */
127
+ getVerbCount(): Promise<number>;
114
128
  /**
115
129
  * Neural API - Advanced AI operations
116
130
  */
@@ -353,6 +367,19 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
353
367
  * Close and cleanup
354
368
  */
355
369
  close(): Promise<void>;
370
+ /**
371
+ * Intelligently auto-detect distributed configuration
372
+ * Zero-config: Automatically determines best distributed settings
373
+ */
374
+ private autoDetectDistributed;
375
+ /**
376
+ * Setup distributed components with zero-config intelligence
377
+ */
378
+ private setupDistributedComponents;
379
+ /**
380
+ * Pass distributed components to storage adapter
381
+ */
382
+ private connectDistributedStorage;
356
383
  }
357
384
  export * from './types/brainy.types.js';
358
385
  export { NounType, VerbType } from './types/graphTypes.js';
package/dist/brainy.js CHANGED
@@ -18,6 +18,7 @@ import { MetadataIndexManager } from './utils/metadataIndex.js';
18
18
  import { GraphAdjacencyIndex } from './graph/graphAdjacencyIndex.js';
19
19
  import { createPipeline } from './streaming/pipeline.js';
20
20
  import { configureLogger, LogLevel } from './utils/logger.js';
21
+ import { DistributedCoordinator, ShardManager, CacheSync, ReadWriteSeparation } from './distributed/index.js';
21
22
  import { NounType } from './types/graphTypes.js';
22
23
  /**
23
24
  * The main Brainy class - Clean, Beautiful, Powerful
@@ -35,6 +36,10 @@ export class Brainy {
35
36
  this.distance = cosineDistance;
36
37
  this.embedder = this.setupEmbedder();
37
38
  this.augmentationRegistry = this.setupAugmentations();
39
+ // Setup distributed components if enabled
40
+ if (this.config.distributed?.enabled) {
41
+ this.setupDistributedComponents();
42
+ }
38
43
  // Index and storage are initialized in init() because they may need each other
39
44
  }
40
45
  /**
@@ -113,6 +118,8 @@ export class Brainy {
113
118
  }
114
119
  }
115
120
  });
121
+ // Connect distributed components to storage
122
+ await this.connectDistributedStorage();
116
123
  // Warm up if configured
117
124
  if (this.config.warmup) {
118
125
  await this.warmup();
@@ -269,6 +276,10 @@ export class Brainy {
269
276
  * Delete an entity
270
277
  */
271
278
  async delete(id) {
279
+ // Handle invalid IDs gracefully
280
+ if (!id || typeof id !== 'string') {
281
+ return; // Silently return for invalid IDs
282
+ }
272
283
  await this.ensureInitialized();
273
284
  return this.augmentationRegistry.execute('delete', { id }, async () => {
274
285
  // Remove from vector index
@@ -289,6 +300,9 @@ export class Brainy {
289
300
  const targetVerbs = await this.storage.getVerbsByTarget(id);
290
301
  const allVerbs = [...verbs, ...targetVerbs];
291
302
  for (const verb of allVerbs) {
303
+ // Remove from graph index first
304
+ await this.graphIndex.removeVerb(verb.id);
305
+ // Then delete from storage
292
306
  await this.storage.deleteVerb(verb.id);
293
307
  }
294
308
  });
@@ -407,10 +421,53 @@ export class Brainy {
407
421
  const startTime = Date.now();
408
422
  const result = await this.augmentationRegistry.execute('find', params, async () => {
409
423
  let results = [];
410
- // Handle empty query - return paginated results from storage
411
- const hasSearchCriteria = params.query || params.vector || params.where ||
412
- params.type || params.service || params.near || params.connected;
413
- if (!hasSearchCriteria) {
424
+ // Distinguish between search criteria (need vector search) and filter criteria (metadata only)
425
+ // Treat empty string query as no query
426
+ const hasVectorSearchCriteria = (params.query && params.query.trim() !== '') || params.vector || params.near;
427
+ const hasFilterCriteria = params.where || params.type || params.service;
428
+ const hasGraphCriteria = params.connected;
429
+ // Handle metadata-only queries (no vector search needed)
430
+ if (!hasVectorSearchCriteria && !hasGraphCriteria && hasFilterCriteria) {
431
+ // Build filter for metadata index
432
+ let filter = {};
433
+ if (params.where)
434
+ Object.assign(filter, params.where);
435
+ if (params.service)
436
+ filter.service = params.service;
437
+ if (params.type) {
438
+ const types = Array.isArray(params.type) ? params.type : [params.type];
439
+ if (types.length === 1) {
440
+ filter.noun = types[0];
441
+ }
442
+ else {
443
+ filter = {
444
+ anyOf: types.map(type => ({
445
+ noun: type,
446
+ ...filter
447
+ }))
448
+ };
449
+ }
450
+ }
451
+ // Get filtered IDs and paginate BEFORE loading entities
452
+ const filteredIds = await this.metadataIndex.getIdsForFilter(filter);
453
+ const limit = params.limit || 10;
454
+ const offset = params.offset || 0;
455
+ const pageIds = filteredIds.slice(offset, offset + limit);
456
+ // Load entities for the paginated results
457
+ for (const id of pageIds) {
458
+ const entity = await this.get(id);
459
+ if (entity) {
460
+ results.push({
461
+ id,
462
+ score: 1.0, // All metadata-filtered results equally relevant
463
+ entity
464
+ });
465
+ }
466
+ }
467
+ return results;
468
+ }
469
+ // Handle completely empty query - return all results paginated
470
+ if (!hasVectorSearchCriteria && !hasFilterCriteria && !hasGraphCriteria) {
414
471
  const limit = params.limit || 20;
415
472
  const offset = params.offset || 0;
416
473
  const storageResults = await this.storage.getNouns({
@@ -803,6 +860,22 @@ export class Brainy {
803
860
  this._tripleIntelligence = undefined;
804
861
  });
805
862
  }
863
+ /**
864
+ * Get total count of nouns - O(1) operation
865
+ * @returns Promise that resolves to the total number of nouns
866
+ */
867
+ async getNounCount() {
868
+ await this.ensureInitialized();
869
+ return this.storage.getNounCount();
870
+ }
871
+ /**
872
+ * Get total count of verbs - O(1) operation
873
+ * @returns Promise that resolves to the total number of verbs
874
+ */
875
+ async getVerbCount() {
876
+ await this.ensureInitialized();
877
+ return this.storage.getVerbCount();
878
+ }
806
879
  // ============= SUB-APIS =============
807
880
  /**
808
881
  * Neural API - Advanced AI operations
@@ -1462,18 +1535,27 @@ export class Brainy {
1462
1535
  if (config?.index?.efSearch && (config.index.efSearch < 1 || config.index.efSearch > 1000)) {
1463
1536
  throw new Error(`Invalid index efSearch: ${config.index.efSearch}. Must be between 1 and 1000`);
1464
1537
  }
1538
+ // Auto-detect distributed mode based on environment and configuration
1539
+ const distributedConfig = this.autoDetectDistributed(config?.distributed);
1465
1540
  return {
1466
1541
  storage: config?.storage || { type: 'auto' },
1467
1542
  model: config?.model || { type: 'fast' },
1468
1543
  index: config?.index || {},
1469
1544
  cache: config?.cache ?? true,
1470
1545
  augmentations: config?.augmentations || {},
1546
+ distributed: distributedConfig, // Type will be fixed when used
1471
1547
  warmup: config?.warmup ?? false,
1472
1548
  realtime: config?.realtime ?? false,
1473
1549
  multiTenancy: config?.multiTenancy ?? false,
1474
1550
  telemetry: config?.telemetry ?? false,
1475
1551
  verbose: config?.verbose ?? false,
1476
- silent: config?.silent ?? false
1552
+ silent: config?.silent ?? false,
1553
+ // New performance options with smart defaults
1554
+ disableAutoRebuild: config?.disableAutoRebuild ?? false, // false = auto-decide based on size
1555
+ disableMetrics: config?.disableMetrics ?? false,
1556
+ disableAutoOptimize: config?.disableAutoOptimize ?? false,
1557
+ batchWrites: config?.batchWrites ?? true,
1558
+ maxConcurrentOperations: config?.maxConcurrentOperations ?? 10
1477
1559
  };
1478
1560
  }
1479
1561
  /**
@@ -1483,17 +1565,49 @@ export class Brainy {
1483
1565
  try {
1484
1566
  // Check if storage has data
1485
1567
  const entities = await this.storage.getNouns({ pagination: { limit: 1 } });
1486
- if (entities.totalCount === 0 || entities.items.length === 0) {
1568
+ const totalCount = entities.totalCount || 0;
1569
+ if (totalCount === 0) {
1487
1570
  // No data in storage, no rebuild needed
1488
1571
  return;
1489
1572
  }
1573
+ // Intelligent decision: Auto-rebuild only for small datasets
1574
+ // For large datasets, use lazy loading for optimal performance
1575
+ const AUTO_REBUILD_THRESHOLD = 1000; // Only auto-rebuild if < 1000 items
1490
1576
  // Check if metadata index is empty
1491
1577
  const metadataStats = await this.metadataIndex.getStats();
1492
- if (metadataStats.totalEntries === 0) {
1493
- console.log('🔄 Rebuilding metadata index for existing data...');
1578
+ if (metadataStats.totalEntries === 0 && totalCount > 0) {
1579
+ if (totalCount < AUTO_REBUILD_THRESHOLD) {
1580
+ // Small dataset - rebuild for convenience
1581
+ if (!this.config.silent) {
1582
+ console.log(`🔄 Small dataset (${totalCount} items) - rebuilding index for optimal performance...`);
1583
+ }
1584
+ await this.metadataIndex.rebuild();
1585
+ const newStats = await this.metadataIndex.getStats();
1586
+ if (!this.config.silent) {
1587
+ console.log(`✅ Index rebuilt: ${newStats.totalEntries} entries`);
1588
+ }
1589
+ }
1590
+ else {
1591
+ // Large dataset - use lazy loading
1592
+ if (!this.config.silent) {
1593
+ console.log(`⚡ Large dataset (${totalCount} items) - using lazy loading for optimal startup performance`);
1594
+ console.log('💡 Tip: Indexes will build automatically as you use the system');
1595
+ }
1596
+ }
1597
+ }
1598
+ // Override with explicit config if provided
1599
+ if (this.config.disableAutoRebuild === true) {
1600
+ if (!this.config.silent) {
1601
+ console.log('⚡ Auto-rebuild explicitly disabled via config');
1602
+ }
1603
+ return;
1604
+ }
1605
+ else if (this.config.disableAutoRebuild === false && metadataStats.totalEntries === 0) {
1606
+ // Explicitly enabled - rebuild regardless of size
1607
+ if (!this.config.silent) {
1608
+ console.log('🔄 Auto-rebuild explicitly enabled - rebuilding index...');
1609
+ }
1494
1610
  await this.metadataIndex.rebuild();
1495
- const newStats = await this.metadataIndex.getStats();
1496
- console.log(`✅ Metadata index rebuilt: ${newStats.totalEntries} entries`);
1497
1611
  }
1498
1612
  // Note: GraphAdjacencyIndex will rebuild itself as relationships are added
1499
1613
  // Vector index should already be populated if storage has data
@@ -1525,6 +1639,113 @@ export class Brainy {
1525
1639
  // We'll just mark as not initialized
1526
1640
  this.initialized = false;
1527
1641
  }
1642
+ /**
1643
+ * Intelligently auto-detect distributed configuration
1644
+ * Zero-config: Automatically determines best distributed settings
1645
+ */
1646
+ autoDetectDistributed(config) {
1647
+ // If explicitly disabled, respect that
1648
+ if (config?.enabled === false) {
1649
+ return config;
1650
+ }
1651
+ // Auto-detect based on environment variables (common in production)
1652
+ const envEnabled = process.env.BRAINY_DISTRIBUTED === 'true' ||
1653
+ process.env.NODE_ENV === 'production' ||
1654
+ process.env.CLUSTER_SIZE ||
1655
+ process.env.KUBERNETES_SERVICE_HOST; // Running in K8s
1656
+ // Auto-detect based on storage type (S3/R2/GCS implies distributed)
1657
+ const storageImpliesDistributed = this.config?.storage?.type === 's3' ||
1658
+ this.config?.storage?.type === 'r2' ||
1659
+ this.config?.storage?.type === 'gcs';
1660
+ // If not explicitly configured but environment suggests distributed
1661
+ if (!config && (envEnabled || storageImpliesDistributed)) {
1662
+ return {
1663
+ enabled: true,
1664
+ nodeId: process.env.HOSTNAME || process.env.NODE_ID || `node-${Date.now()}`,
1665
+ nodes: process.env.BRAINY_NODES?.split(',') || [],
1666
+ coordinatorUrl: process.env.BRAINY_COORDINATOR || undefined,
1667
+ shardCount: parseInt(process.env.BRAINY_SHARDS || '64'),
1668
+ replicationFactor: parseInt(process.env.BRAINY_REPLICAS || '3'),
1669
+ consensus: process.env.BRAINY_CONSENSUS || 'raft',
1670
+ transport: process.env.BRAINY_TRANSPORT || 'http'
1671
+ };
1672
+ }
1673
+ // Merge with provided config, applying intelligent defaults
1674
+ return config ? {
1675
+ ...config,
1676
+ nodeId: config.nodeId || process.env.HOSTNAME || `node-${Date.now()}`,
1677
+ shardCount: config.shardCount || 64,
1678
+ replicationFactor: config.replicationFactor || 3,
1679
+ consensus: config.consensus || 'raft',
1680
+ transport: config.transport || 'http'
1681
+ } : undefined;
1682
+ }
1683
+ /**
1684
+ * Setup distributed components with zero-config intelligence
1685
+ */
1686
+ setupDistributedComponents() {
1687
+ const distConfig = this.config.distributed;
1688
+ if (!distConfig?.enabled)
1689
+ return;
1690
+ console.log('🌍 Initializing distributed mode:', {
1691
+ nodeId: distConfig.nodeId,
1692
+ shards: distConfig.shardCount,
1693
+ replicas: distConfig.replicationFactor
1694
+ });
1695
+ // Initialize coordinator for consensus
1696
+ this.coordinator = new DistributedCoordinator({
1697
+ nodeId: distConfig.nodeId,
1698
+ address: distConfig.coordinatorUrl?.split(':')[0] || 'localhost',
1699
+ port: parseInt(distConfig.coordinatorUrl?.split(':')[1] || '8080'),
1700
+ nodes: distConfig.nodes
1701
+ });
1702
+ // Start the coordinator to establish leadership
1703
+ this.coordinator.start().catch(err => {
1704
+ console.warn('Coordinator start failed (will retry on init):', err.message);
1705
+ });
1706
+ // Initialize shard manager for data distribution
1707
+ this.shardManager = new ShardManager({
1708
+ shardCount: distConfig.shardCount,
1709
+ replicationFactor: distConfig.replicationFactor,
1710
+ virtualNodes: 150, // Optimal for consistent distribution
1711
+ autoRebalance: true
1712
+ });
1713
+ // Initialize cache synchronization
1714
+ this.cacheSync = new CacheSync({
1715
+ nodeId: distConfig.nodeId,
1716
+ syncInterval: 1000
1717
+ });
1718
+ // Initialize read/write separation if we have replicas
1719
+ // Note: Will be properly initialized after coordinator starts
1720
+ if (distConfig.replicationFactor && distConfig.replicationFactor > 1) {
1721
+ // Defer creation until coordinator is ready
1722
+ setTimeout(() => {
1723
+ this.readWriteSeparation = new ReadWriteSeparation({
1724
+ nodeId: distConfig.nodeId,
1725
+ consistencyLevel: 'eventual',
1726
+ role: 'replica', // Start as replica, will promote if leader
1727
+ syncInterval: 5000
1728
+ }, this.coordinator, this.shardManager, this.cacheSync);
1729
+ }, 100);
1730
+ }
1731
+ }
1732
+ /**
1733
+ * Pass distributed components to storage adapter
1734
+ */
1735
+ async connectDistributedStorage() {
1736
+ if (!this.config.distributed?.enabled)
1737
+ return;
1738
+ // Check if storage supports distributed operations
1739
+ if ('setDistributedComponents' in this.storage) {
1740
+ this.storage.setDistributedComponents({
1741
+ coordinator: this.coordinator,
1742
+ shardManager: this.shardManager,
1743
+ cacheSync: this.cacheSync,
1744
+ readWriteSeparation: this.readWriteSeparation
1745
+ });
1746
+ console.log('✅ Distributed storage connected');
1747
+ }
1748
+ }
1528
1749
  }
1529
1750
  // Re-export types for convenience
1530
1751
  export * from './types/brainy.types.js';
@@ -512,4 +512,14 @@ export interface StorageAdapter {
512
512
  * @returns Promise that resolves to an array of changes
513
513
  */
514
514
  getChangesSince?(timestamp: number, limit?: number): Promise<any[]>;
515
+ /**
516
+ * Get total count of nouns in storage - O(1) operation
517
+ * @returns Promise that resolves to the total number of nouns
518
+ */
519
+ getNounCount(): Promise<number>;
520
+ /**
521
+ * Get total count of verbs in storage - O(1) operation
522
+ * @returns Promise that resolves to the total number of verbs
523
+ */
524
+ getVerbCount(): Promise<number>;
515
525
  }
@@ -7,6 +7,8 @@ export declare class HNSWIndex {
7
7
  private nouns;
8
8
  private entryPointId;
9
9
  private maxLevel;
10
+ private highLevelNodes;
11
+ private readonly MAX_TRACKED_LEVELS;
10
12
  private config;
11
13
  private distanceFunction;
12
14
  private dimension;
@@ -15,6 +15,9 @@ export class HNSWIndex {
15
15
  this.nouns = new Map();
16
16
  this.entryPointId = null;
17
17
  this.maxLevel = 0;
18
+ // Track high-level nodes for O(1) entry point selection
19
+ this.highLevelNodes = new Map(); // level -> node IDs
20
+ this.MAX_TRACKED_LEVELS = 10; // Only track top levels for memory efficiency
18
21
  this.dimension = null;
19
22
  this.useParallelization = true; // Whether to use parallelization for performance-critical operations
20
23
  this.config = { ...DEFAULT_CONFIG, ...config };
@@ -203,6 +206,13 @@ export class HNSWIndex {
203
206
  }
204
207
  // Add noun to the index
205
208
  this.nouns.set(id, noun);
209
+ // Track high-level nodes for O(1) entry point selection
210
+ if (nounLevel >= 2 && nounLevel <= this.MAX_TRACKED_LEVELS) {
211
+ if (!this.highLevelNodes.has(nounLevel)) {
212
+ this.highLevelNodes.set(nounLevel, new Set());
213
+ }
214
+ this.highLevelNodes.get(nounLevel).add(id);
215
+ }
206
216
  return id;
207
217
  }
208
218
  /**
@@ -159,8 +159,21 @@ export declare class ImprovedNeuralAPI {
159
159
  * Group items by their semantic noun types
160
160
  */
161
161
  private _groupBySemanticType;
162
- private _getAllItemIds;
162
+ /**
163
+ * Iterate through all items without loading them all at once
164
+ * This scales to millions of items without memory issues
165
+ */
166
+ private _iterateAllItems;
167
+ /**
168
+ * Get a sample of item IDs for operations that don't need all items
169
+ * This is O(1) for small samples
170
+ */
171
+ private _getSampleItemIds;
172
+ /**
173
+ * Get total count using the brain's O(1) counting API
174
+ */
163
175
  private _getTotalItemCount;
176
+ private _getAllItemIds;
164
177
  private _calculateTotalWeight;
165
178
  private _getNeighborCommunities;
166
179
  private _calculateModularityGain;