@soulcraft/brainy 3.8.3 → 3.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/brainy.d.ts CHANGED
@@ -26,6 +26,10 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
26
26
  private distance;
27
27
  private augmentationRegistry;
28
28
  private config;
29
+ private coordinator?;
30
+ private shardManager?;
31
+ private cacheSync?;
32
+ private readWriteSeparation?;
29
33
  private originalConsole?;
30
34
  private _neural?;
31
35
  private _nlp?;
@@ -111,6 +115,16 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
111
115
  * Clear all data from the database
112
116
  */
113
117
  clear(): Promise<void>;
118
+ /**
119
+ * Get total count of nouns - O(1) operation
120
+ * @returns Promise that resolves to the total number of nouns
121
+ */
122
+ getNounCount(): Promise<number>;
123
+ /**
124
+ * Get total count of verbs - O(1) operation
125
+ * @returns Promise that resolves to the total number of verbs
126
+ */
127
+ getVerbCount(): Promise<number>;
114
128
  /**
115
129
  * Neural API - Advanced AI operations
116
130
  */
@@ -353,6 +367,19 @@ export declare class Brainy<T = any> implements BrainyInterface<T> {
353
367
  * Close and cleanup
354
368
  */
355
369
  close(): Promise<void>;
370
+ /**
371
+ * Intelligently auto-detect distributed configuration
372
+ * Zero-config: Automatically determines best distributed settings
373
+ */
374
+ private autoDetectDistributed;
375
+ /**
376
+ * Setup distributed components with zero-config intelligence
377
+ */
378
+ private setupDistributedComponents;
379
+ /**
380
+ * Pass distributed components to storage adapter
381
+ */
382
+ private connectDistributedStorage;
356
383
  }
357
384
  export * from './types/brainy.types.js';
358
385
  export { NounType, VerbType } from './types/graphTypes.js';
package/dist/brainy.js CHANGED
@@ -18,6 +18,7 @@ import { MetadataIndexManager } from './utils/metadataIndex.js';
18
18
  import { GraphAdjacencyIndex } from './graph/graphAdjacencyIndex.js';
19
19
  import { createPipeline } from './streaming/pipeline.js';
20
20
  import { configureLogger, LogLevel } from './utils/logger.js';
21
+ import { DistributedCoordinator, ShardManager, CacheSync, ReadWriteSeparation } from './distributed/index.js';
21
22
  import { NounType } from './types/graphTypes.js';
22
23
  /**
23
24
  * The main Brainy class - Clean, Beautiful, Powerful
@@ -35,6 +36,10 @@ export class Brainy {
35
36
  this.distance = cosineDistance;
36
37
  this.embedder = this.setupEmbedder();
37
38
  this.augmentationRegistry = this.setupAugmentations();
39
+ // Setup distributed components if enabled
40
+ if (this.config.distributed?.enabled) {
41
+ this.setupDistributedComponents();
42
+ }
38
43
  // Index and storage are initialized in init() because they may need each other
39
44
  }
40
45
  /**
@@ -113,6 +118,8 @@ export class Brainy {
113
118
  }
114
119
  }
115
120
  });
121
+ // Connect distributed components to storage
122
+ await this.connectDistributedStorage();
116
123
  // Warm up if configured
117
124
  if (this.config.warmup) {
118
125
  await this.warmup();
@@ -269,6 +276,10 @@ export class Brainy {
269
276
  * Delete an entity
270
277
  */
271
278
  async delete(id) {
279
+ // Handle invalid IDs gracefully
280
+ if (!id || typeof id !== 'string') {
281
+ return; // Silently return for invalid IDs
282
+ }
272
283
  await this.ensureInitialized();
273
284
  return this.augmentationRegistry.execute('delete', { id }, async () => {
274
285
  // Remove from vector index
@@ -289,6 +300,9 @@ export class Brainy {
289
300
  const targetVerbs = await this.storage.getVerbsByTarget(id);
290
301
  const allVerbs = [...verbs, ...targetVerbs];
291
302
  for (const verb of allVerbs) {
303
+ // Remove from graph index first
304
+ await this.graphIndex.removeVerb(verb.id);
305
+ // Then delete from storage
292
306
  await this.storage.deleteVerb(verb.id);
293
307
  }
294
308
  });
@@ -407,10 +421,53 @@ export class Brainy {
407
421
  const startTime = Date.now();
408
422
  const result = await this.augmentationRegistry.execute('find', params, async () => {
409
423
  let results = [];
410
- // Handle empty query - return paginated results from storage
411
- const hasSearchCriteria = params.query || params.vector || params.where ||
412
- params.type || params.service || params.near || params.connected;
413
- if (!hasSearchCriteria) {
424
+ // Distinguish between search criteria (need vector search) and filter criteria (metadata only)
425
+ // Treat empty string query as no query
426
+ const hasVectorSearchCriteria = (params.query && params.query.trim() !== '') || params.vector || params.near;
427
+ const hasFilterCriteria = params.where || params.type || params.service;
428
+ const hasGraphCriteria = params.connected;
429
+ // Handle metadata-only queries (no vector search needed)
430
+ if (!hasVectorSearchCriteria && !hasGraphCriteria && hasFilterCriteria) {
431
+ // Build filter for metadata index
432
+ let filter = {};
433
+ if (params.where)
434
+ Object.assign(filter, params.where);
435
+ if (params.service)
436
+ filter.service = params.service;
437
+ if (params.type) {
438
+ const types = Array.isArray(params.type) ? params.type : [params.type];
439
+ if (types.length === 1) {
440
+ filter.noun = types[0];
441
+ }
442
+ else {
443
+ filter = {
444
+ anyOf: types.map(type => ({
445
+ noun: type,
446
+ ...filter
447
+ }))
448
+ };
449
+ }
450
+ }
451
+ // Get filtered IDs and paginate BEFORE loading entities
452
+ const filteredIds = await this.metadataIndex.getIdsForFilter(filter);
453
+ const limit = params.limit || 10;
454
+ const offset = params.offset || 0;
455
+ const pageIds = filteredIds.slice(offset, offset + limit);
456
+ // Load entities for the paginated results
457
+ for (const id of pageIds) {
458
+ const entity = await this.get(id);
459
+ if (entity) {
460
+ results.push({
461
+ id,
462
+ score: 1.0, // All metadata-filtered results equally relevant
463
+ entity
464
+ });
465
+ }
466
+ }
467
+ return results;
468
+ }
469
+ // Handle completely empty query - return all results paginated
470
+ if (!hasVectorSearchCriteria && !hasFilterCriteria && !hasGraphCriteria) {
414
471
  const limit = params.limit || 20;
415
472
  const offset = params.offset || 0;
416
473
  const storageResults = await this.storage.getNouns({
@@ -803,6 +860,22 @@ export class Brainy {
803
860
  this._tripleIntelligence = undefined;
804
861
  });
805
862
  }
863
+ /**
864
+ * Get total count of nouns - O(1) operation
865
+ * @returns Promise that resolves to the total number of nouns
866
+ */
867
+ async getNounCount() {
868
+ await this.ensureInitialized();
869
+ return this.storage.getNounCount();
870
+ }
871
+ /**
872
+ * Get total count of verbs - O(1) operation
873
+ * @returns Promise that resolves to the total number of verbs
874
+ */
875
+ async getVerbCount() {
876
+ await this.ensureInitialized();
877
+ return this.storage.getVerbCount();
878
+ }
806
879
  // ============= SUB-APIS =============
807
880
  /**
808
881
  * Neural API - Advanced AI operations
@@ -1462,18 +1535,27 @@ export class Brainy {
1462
1535
  if (config?.index?.efSearch && (config.index.efSearch < 1 || config.index.efSearch > 1000)) {
1463
1536
  throw new Error(`Invalid index efSearch: ${config.index.efSearch}. Must be between 1 and 1000`);
1464
1537
  }
1538
+ // Auto-detect distributed mode based on environment and configuration
1539
+ const distributedConfig = this.autoDetectDistributed(config?.distributed);
1465
1540
  return {
1466
1541
  storage: config?.storage || { type: 'auto' },
1467
1542
  model: config?.model || { type: 'fast' },
1468
1543
  index: config?.index || {},
1469
1544
  cache: config?.cache ?? true,
1470
1545
  augmentations: config?.augmentations || {},
1546
+ distributed: distributedConfig, // Type will be fixed when used
1471
1547
  warmup: config?.warmup ?? false,
1472
1548
  realtime: config?.realtime ?? false,
1473
1549
  multiTenancy: config?.multiTenancy ?? false,
1474
1550
  telemetry: config?.telemetry ?? false,
1475
1551
  verbose: config?.verbose ?? false,
1476
- silent: config?.silent ?? false
1552
+ silent: config?.silent ?? false,
1553
+ // New performance options with smart defaults
1554
+ disableAutoRebuild: config?.disableAutoRebuild ?? false, // false = auto-decide based on size
1555
+ disableMetrics: config?.disableMetrics ?? false,
1556
+ disableAutoOptimize: config?.disableAutoOptimize ?? false,
1557
+ batchWrites: config?.batchWrites ?? true,
1558
+ maxConcurrentOperations: config?.maxConcurrentOperations ?? 10
1477
1559
  };
1478
1560
  }
1479
1561
  /**
@@ -1483,17 +1565,49 @@ export class Brainy {
1483
1565
  try {
1484
1566
  // Check if storage has data
1485
1567
  const entities = await this.storage.getNouns({ pagination: { limit: 1 } });
1486
- if (entities.totalCount === 0 || entities.items.length === 0) {
1568
+ const totalCount = entities.totalCount || 0;
1569
+ if (totalCount === 0) {
1487
1570
  // No data in storage, no rebuild needed
1488
1571
  return;
1489
1572
  }
1573
+ // Intelligent decision: Auto-rebuild only for small datasets
1574
+ // For large datasets, use lazy loading for optimal performance
1575
+ const AUTO_REBUILD_THRESHOLD = 1000; // Only auto-rebuild if < 1000 items
1490
1576
  // Check if metadata index is empty
1491
1577
  const metadataStats = await this.metadataIndex.getStats();
1492
- if (metadataStats.totalEntries === 0) {
1493
- console.log('🔄 Rebuilding metadata index for existing data...');
1578
+ if (metadataStats.totalEntries === 0 && totalCount > 0) {
1579
+ if (totalCount < AUTO_REBUILD_THRESHOLD) {
1580
+ // Small dataset - rebuild for convenience
1581
+ if (!this.config.silent) {
1582
+ console.log(`🔄 Small dataset (${totalCount} items) - rebuilding index for optimal performance...`);
1583
+ }
1584
+ await this.metadataIndex.rebuild();
1585
+ const newStats = await this.metadataIndex.getStats();
1586
+ if (!this.config.silent) {
1587
+ console.log(`✅ Index rebuilt: ${newStats.totalEntries} entries`);
1588
+ }
1589
+ }
1590
+ else {
1591
+ // Large dataset - use lazy loading
1592
+ if (!this.config.silent) {
1593
+ console.log(`⚡ Large dataset (${totalCount} items) - using lazy loading for optimal startup performance`);
1594
+ console.log('💡 Tip: Indexes will build automatically as you use the system');
1595
+ }
1596
+ }
1597
+ }
1598
+ // Override with explicit config if provided
1599
+ if (this.config.disableAutoRebuild === true) {
1600
+ if (!this.config.silent) {
1601
+ console.log('⚡ Auto-rebuild explicitly disabled via config');
1602
+ }
1603
+ return;
1604
+ }
1605
+ else if (this.config.disableAutoRebuild === false && metadataStats.totalEntries === 0) {
1606
+ // Explicitly enabled - rebuild regardless of size
1607
+ if (!this.config.silent) {
1608
+ console.log('🔄 Auto-rebuild explicitly enabled - rebuilding index...');
1609
+ }
1494
1610
  await this.metadataIndex.rebuild();
1495
- const newStats = await this.metadataIndex.getStats();
1496
- console.log(`✅ Metadata index rebuilt: ${newStats.totalEntries} entries`);
1497
1611
  }
1498
1612
  // Note: GraphAdjacencyIndex will rebuild itself as relationships are added
1499
1613
  // Vector index should already be populated if storage has data
@@ -1525,6 +1639,113 @@ export class Brainy {
1525
1639
  // We'll just mark as not initialized
1526
1640
  this.initialized = false;
1527
1641
  }
1642
+ /**
1643
+ * Intelligently auto-detect distributed configuration
1644
+ * Zero-config: Automatically determines best distributed settings
1645
+ */
1646
+ autoDetectDistributed(config) {
1647
+ // If explicitly disabled, respect that
1648
+ if (config?.enabled === false) {
1649
+ return config;
1650
+ }
1651
+ // Auto-detect based on environment variables (common in production)
1652
+ const envEnabled = process.env.BRAINY_DISTRIBUTED === 'true' ||
1653
+ process.env.NODE_ENV === 'production' ||
1654
+ process.env.CLUSTER_SIZE ||
1655
+ process.env.KUBERNETES_SERVICE_HOST; // Running in K8s
1656
+ // Auto-detect based on storage type (S3/R2/GCS implies distributed)
1657
+ const storageImpliesDistributed = this.config?.storage?.type === 's3' ||
1658
+ this.config?.storage?.type === 'r2' ||
1659
+ this.config?.storage?.type === 'gcs';
1660
+ // If not explicitly configured but environment suggests distributed
1661
+ if (!config && (envEnabled || storageImpliesDistributed)) {
1662
+ return {
1663
+ enabled: true,
1664
+ nodeId: process.env.HOSTNAME || process.env.NODE_ID || `node-${Date.now()}`,
1665
+ nodes: process.env.BRAINY_NODES?.split(',') || [],
1666
+ coordinatorUrl: process.env.BRAINY_COORDINATOR || undefined,
1667
+ shardCount: parseInt(process.env.BRAINY_SHARDS || '64'),
1668
+ replicationFactor: parseInt(process.env.BRAINY_REPLICAS || '3'),
1669
+ consensus: process.env.BRAINY_CONSENSUS || 'raft',
1670
+ transport: process.env.BRAINY_TRANSPORT || 'http'
1671
+ };
1672
+ }
1673
+ // Merge with provided config, applying intelligent defaults
1674
+ return config ? {
1675
+ ...config,
1676
+ nodeId: config.nodeId || process.env.HOSTNAME || `node-${Date.now()}`,
1677
+ shardCount: config.shardCount || 64,
1678
+ replicationFactor: config.replicationFactor || 3,
1679
+ consensus: config.consensus || 'raft',
1680
+ transport: config.transport || 'http'
1681
+ } : undefined;
1682
+ }
1683
+ /**
1684
+ * Setup distributed components with zero-config intelligence
1685
+ */
1686
+ setupDistributedComponents() {
1687
+ const distConfig = this.config.distributed;
1688
+ if (!distConfig?.enabled)
1689
+ return;
1690
+ console.log('🌍 Initializing distributed mode:', {
1691
+ nodeId: distConfig.nodeId,
1692
+ shards: distConfig.shardCount,
1693
+ replicas: distConfig.replicationFactor
1694
+ });
1695
+ // Initialize coordinator for consensus
1696
+ this.coordinator = new DistributedCoordinator({
1697
+ nodeId: distConfig.nodeId,
1698
+ address: distConfig.coordinatorUrl?.split(':')[0] || 'localhost',
1699
+ port: parseInt(distConfig.coordinatorUrl?.split(':')[1] || '8080'),
1700
+ nodes: distConfig.nodes
1701
+ });
1702
+ // Start the coordinator to establish leadership
1703
+ this.coordinator.start().catch(err => {
1704
+ console.warn('Coordinator start failed (will retry on init):', err.message);
1705
+ });
1706
+ // Initialize shard manager for data distribution
1707
+ this.shardManager = new ShardManager({
1708
+ shardCount: distConfig.shardCount,
1709
+ replicationFactor: distConfig.replicationFactor,
1710
+ virtualNodes: 150, // Optimal for consistent distribution
1711
+ autoRebalance: true
1712
+ });
1713
+ // Initialize cache synchronization
1714
+ this.cacheSync = new CacheSync({
1715
+ nodeId: distConfig.nodeId,
1716
+ syncInterval: 1000
1717
+ });
1718
+ // Initialize read/write separation if we have replicas
1719
+ // Note: Will be properly initialized after coordinator starts
1720
+ if (distConfig.replicationFactor && distConfig.replicationFactor > 1) {
1721
+ // Defer creation until coordinator is ready
1722
+ setTimeout(() => {
1723
+ this.readWriteSeparation = new ReadWriteSeparation({
1724
+ nodeId: distConfig.nodeId,
1725
+ consistencyLevel: 'eventual',
1726
+ role: 'replica', // Start as replica, will promote if leader
1727
+ syncInterval: 5000
1728
+ }, this.coordinator, this.shardManager, this.cacheSync);
1729
+ }, 100);
1730
+ }
1731
+ }
1732
+ /**
1733
+ * Pass distributed components to storage adapter
1734
+ */
1735
+ async connectDistributedStorage() {
1736
+ if (!this.config.distributed?.enabled)
1737
+ return;
1738
+ // Check if storage supports distributed operations
1739
+ if ('setDistributedComponents' in this.storage) {
1740
+ this.storage.setDistributedComponents({
1741
+ coordinator: this.coordinator,
1742
+ shardManager: this.shardManager,
1743
+ cacheSync: this.cacheSync,
1744
+ readWriteSeparation: this.readWriteSeparation
1745
+ });
1746
+ console.log('✅ Distributed storage connected');
1747
+ }
1748
+ }
1528
1749
  }
1529
1750
  // Re-export types for convenience
1530
1751
  export * from './types/brainy.types.js';
@@ -512,4 +512,14 @@ export interface StorageAdapter {
512
512
  * @returns Promise that resolves to an array of changes
513
513
  */
514
514
  getChangesSince?(timestamp: number, limit?: number): Promise<any[]>;
515
+ /**
516
+ * Get total count of nouns in storage - O(1) operation
517
+ * @returns Promise that resolves to the total number of nouns
518
+ */
519
+ getNounCount(): Promise<number>;
520
+ /**
521
+ * Get total count of verbs in storage - O(1) operation
522
+ * @returns Promise that resolves to the total number of verbs
523
+ */
524
+ getVerbCount(): Promise<number>;
515
525
  }
@@ -7,6 +7,8 @@ export declare class HNSWIndex {
7
7
  private nouns;
8
8
  private entryPointId;
9
9
  private maxLevel;
10
+ private highLevelNodes;
11
+ private readonly MAX_TRACKED_LEVELS;
10
12
  private config;
11
13
  private distanceFunction;
12
14
  private dimension;
@@ -15,6 +15,9 @@ export class HNSWIndex {
15
15
  this.nouns = new Map();
16
16
  this.entryPointId = null;
17
17
  this.maxLevel = 0;
18
+ // Track high-level nodes for O(1) entry point selection
19
+ this.highLevelNodes = new Map(); // level -> node IDs
20
+ this.MAX_TRACKED_LEVELS = 10; // Only track top levels for memory efficiency
18
21
  this.dimension = null;
19
22
  this.useParallelization = true; // Whether to use parallelization for performance-critical operations
20
23
  this.config = { ...DEFAULT_CONFIG, ...config };
@@ -203,6 +206,13 @@ export class HNSWIndex {
203
206
  }
204
207
  // Add noun to the index
205
208
  this.nouns.set(id, noun);
209
+ // Track high-level nodes for O(1) entry point selection
210
+ if (nounLevel >= 2 && nounLevel <= this.MAX_TRACKED_LEVELS) {
211
+ if (!this.highLevelNodes.has(nounLevel)) {
212
+ this.highLevelNodes.set(nounLevel, new Set());
213
+ }
214
+ this.highLevelNodes.get(nounLevel).add(id);
215
+ }
206
216
  return id;
207
217
  }
208
218
  /**
@@ -159,8 +159,21 @@ export declare class ImprovedNeuralAPI {
159
159
  * Group items by their semantic noun types
160
160
  */
161
161
  private _groupBySemanticType;
162
- private _getAllItemIds;
162
+ /**
163
+ * Iterate through all items without loading them all at once
164
+ * This scales to millions of items without memory issues
165
+ */
166
+ private _iterateAllItems;
167
+ /**
168
+ * Get a sample of item IDs for operations that don't need all items
169
+ * This is O(1) for small samples
170
+ */
171
+ private _getSampleItemIds;
172
+ /**
173
+ * Get total count using the brain's O(1) counting API
174
+ */
163
175
  private _getTotalItemCount;
176
+ private _getAllItemIds;
164
177
  private _calculateTotalWeight;
165
178
  private _getNeighborCommunities;
166
179
  private _calculateModularityGain;
@@ -80,8 +80,8 @@ export class ImprovedNeuralAPI {
80
80
  catch (error) {
81
81
  const errorMessage = error instanceof Error ? error.message : String(error);
82
82
  throw new SimilarityError(`Failed to calculate similarity: ${errorMessage}`, {
83
- inputA: typeof a === 'object' ? 'vector' : String(a).substring(0, 50),
84
- inputB: typeof b === 'object' ? 'vector' : String(b).substring(0, 50),
83
+ inputA: Array.isArray(a) ? 'vector' : typeof a === 'string' ? a.substring(0, 50) : 'unknown',
84
+ inputB: Array.isArray(b) ? 'vector' : typeof b === 'string' ? b.substring(0, 50) : 'unknown',
85
85
  options
86
86
  });
87
87
  }
@@ -1172,8 +1172,8 @@ export class ImprovedNeuralAPI {
1172
1172
  // Utility methods for internal operations
1173
1173
  _isId(value) {
1174
1174
  return typeof value === 'string' &&
1175
- (value.length === 36 && value.includes('-')) || // UUID-like
1176
- (value.length > 10 && !value.includes(' ')); // ID-like string
1175
+ ((value.length === 36 && value.includes('-')) || // UUID-like
1176
+ (value.length > 10 && !value.includes(' '))); // ID-like string
1177
1177
  }
1178
1178
  _isVector(value) {
1179
1179
  return Array.isArray(value) &&
@@ -1441,28 +1441,67 @@ export class ImprovedNeuralAPI {
1441
1441
  }
1442
1442
  return groups;
1443
1443
  }
1444
- // Placeholder implementations for complex operations
1445
- async _getAllItemIds() {
1446
- // Get all noun IDs from the brain
1447
- // Get total item count using find with empty query
1448
- const allItems = await this.brain.find({ query: '', limit: Number.MAX_SAFE_INTEGER });
1449
- const stats = { totalNouns: allItems.length || 0 };
1450
- if (!stats.totalNouns || stats.totalNouns === 0) {
1451
- return [];
1444
+ // Iterator-based implementations for scalability
1445
+ /**
1446
+ * Iterate through all items without loading them all at once
1447
+ * This scales to millions of items without memory issues
1448
+ */
1449
+ async *_iterateAllItems(options) {
1450
+ const batchSize = options?.batchSize || 1000;
1451
+ let cursor;
1452
+ let hasMore = true;
1453
+ while (hasMore) {
1454
+ const result = await this.brain.find({
1455
+ query: '',
1456
+ limit: batchSize,
1457
+ cursor
1458
+ });
1459
+ for (const item of result.items || result) {
1460
+ yield item;
1461
+ }
1462
+ hasMore = result.hasMore || false;
1463
+ cursor = result.nextCursor;
1464
+ // Safety check to prevent infinite loops
1465
+ if (!result.items || result.items.length === 0) {
1466
+ break;
1467
+ }
1452
1468
  }
1453
- // Get nouns with pagination (limit to 10000 for performance)
1454
- const limit = Math.min(stats.totalNouns, 10000);
1469
+ }
1470
+ /**
1471
+ * Get a sample of item IDs for operations that don't need all items
1472
+ * This is O(1) for small samples
1473
+ */
1474
+ async _getSampleItemIds(sampleSize = 1000) {
1455
1475
  const result = await this.brain.find({
1456
1476
  query: '',
1457
- limit
1477
+ limit: Math.min(sampleSize, 10000) // Cap at 10k for safety
1458
1478
  });
1459
- return result.map((item) => item.id).filter((id) => id);
1479
+ const items = result.items || result;
1480
+ return items.map((item) => item.entity?.id || item.id).filter((id) => id);
1460
1481
  }
1482
+ /**
1483
+ * Get total count using the brain's O(1) counting API
1484
+ */
1461
1485
  async _getTotalItemCount() {
1462
- // Get total item count using find with empty query
1463
- const allItems = await this.brain.find({ query: '', limit: Number.MAX_SAFE_INTEGER });
1464
- const stats = { totalNouns: allItems.length || 0 };
1465
- return stats.totalNouns || 0;
1486
+ // Use the brain's O(1) counting API if available
1487
+ if (this.brain.counts && typeof this.brain.counts.entities === 'function') {
1488
+ return await this.brain.counts.entities();
1489
+ }
1490
+ // Fallback: Get from storage statistics
1491
+ const storage = this.brain.storage;
1492
+ if (storage && typeof storage.getStatistics === 'function') {
1493
+ const stats = await storage.getStatistics();
1494
+ return stats?.totalNodes || 0;
1495
+ }
1496
+ // Last resort: Sample and estimate
1497
+ const sample = await this.brain.find({ query: '', limit: 1 });
1498
+ return sample.totalCount || 0;
1499
+ }
1500
+ // Deprecated: Remove methods that load everything
1501
+ // These are kept for backward compatibility but should not be used
1502
+ async _getAllItemIds() {
1503
+ console.warn('⚠️ _getAllItemIds() is deprecated and will fail with large datasets. Use _iterateAllItems() or _getSampleItemIds() instead.');
1504
+ return this._getSampleItemIds(10000); // Return sample only
1466
1505
  }
1467
1506
  // ===== GRAPH ALGORITHM SUPPORTING METHODS =====
1468
1507
  _calculateTotalWeight(edges) {
@@ -47,6 +47,7 @@ export declare class NaturalLanguageProcessor {
47
47
  private buildFieldConstraints;
48
48
  /**
49
49
  * Find similar queries from history (without using Brainy)
50
+ * NOTE: Currently unused - reserved for future query caching optimization
50
51
  */
51
52
  private findSimilarQueries;
52
53
  /**
@@ -119,10 +119,11 @@ export class NaturalLanguageProcessor {
119
119
  }
120
120
  /**
121
121
  * Find similar queries from history (without using Brainy)
122
+ * NOTE: Currently unused - reserved for future query caching optimization
122
123
  */
123
124
  findSimilarQueries(embedding) {
124
- // Simple similarity check against recent history
125
- // This is just a placeholder - real implementation would use cosine similarity
125
+ // Not implemented - not required for core functionality
126
+ // Would implement cosine similarity against queryHistory if needed
126
127
  return [];
127
128
  }
128
129
  /**
@@ -253,4 +253,63 @@ export declare abstract class BaseStorageAdapter implements StorageAdapter {
253
253
  * Include throttling metrics in statistics
254
254
  */
255
255
  getStatisticsWithThrottling(): Promise<StatisticsData | null>;
256
+ protected totalNounCount: number;
257
+ protected totalVerbCount: number;
258
+ protected entityCounts: Map<string, number>;
259
+ protected verbCounts: Map<string, number>;
260
+ protected countCache: Map<string, {
261
+ count: number;
262
+ timestamp: number;
263
+ }>;
264
+ protected readonly COUNT_CACHE_TTL = 60000;
265
+ /**
266
+ * Get total noun count - O(1) operation
267
+ * @returns Promise that resolves to the total number of nouns
268
+ */
269
+ getNounCount(): Promise<number>;
270
+ /**
271
+ * Get total verb count - O(1) operation
272
+ * @returns Promise that resolves to the total number of verbs
273
+ */
274
+ getVerbCount(): Promise<number>;
275
+ /**
276
+ * Increment count for entity type - O(1) operation
277
+ * Protected by storage-specific mechanisms (mutex, distributed consensus, etc.)
278
+ * @param type The entity type
279
+ */
280
+ protected incrementEntityCount(type: string): void;
281
+ /**
282
+ * Thread-safe increment for concurrent scenarios
283
+ * Uses mutex for single-node, distributed consensus for multi-node
284
+ */
285
+ protected incrementEntityCountSafe(type: string): Promise<void>;
286
+ /**
287
+ * Decrement count for entity type - O(1) operation
288
+ * @param type The entity type
289
+ */
290
+ protected decrementEntityCount(type: string): void;
291
+ /**
292
+ * Thread-safe decrement for concurrent scenarios
293
+ */
294
+ protected decrementEntityCountSafe(type: string): Promise<void>;
295
+ /**
296
+ * Increment verb count - O(1) operation with mutex protection
297
+ * @param type The verb type
298
+ */
299
+ protected incrementVerbCount(type: string): Promise<void>;
300
+ /**
301
+ * Decrement verb count - O(1) operation with mutex protection
302
+ * @param type The verb type
303
+ */
304
+ protected decrementVerbCount(type: string): Promise<void>;
305
+ /**
306
+ * Initialize counts from storage - must be implemented by each adapter
307
+ * @protected
308
+ */
309
+ protected abstract initializeCounts(): Promise<void>;
310
+ /**
311
+ * Persist counts to storage - must be implemented by each adapter
312
+ * @protected
313
+ */
314
+ protected abstract persistCounts(): Promise<void>;
256
315
  }