@soulcraft/brainy 3.8.3 → 3.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -80,8 +80,8 @@ export class ImprovedNeuralAPI {
80
80
  catch (error) {
81
81
  const errorMessage = error instanceof Error ? error.message : String(error);
82
82
  throw new SimilarityError(`Failed to calculate similarity: ${errorMessage}`, {
83
- inputA: typeof a === 'object' ? 'vector' : String(a).substring(0, 50),
84
- inputB: typeof b === 'object' ? 'vector' : String(b).substring(0, 50),
83
+ inputA: Array.isArray(a) ? 'vector' : typeof a === 'string' ? a.substring(0, 50) : 'unknown',
84
+ inputB: Array.isArray(b) ? 'vector' : typeof b === 'string' ? b.substring(0, 50) : 'unknown',
85
85
  options
86
86
  });
87
87
  }
@@ -1172,8 +1172,8 @@ export class ImprovedNeuralAPI {
1172
1172
  // Utility methods for internal operations
1173
1173
  _isId(value) {
1174
1174
  return typeof value === 'string' &&
1175
- (value.length === 36 && value.includes('-')) || // UUID-like
1176
- (value.length > 10 && !value.includes(' ')); // ID-like string
1175
+ ((value.length === 36 && value.includes('-')) || // UUID-like
1176
+ (value.length > 10 && !value.includes(' '))); // ID-like string
1177
1177
  }
1178
1178
  _isVector(value) {
1179
1179
  return Array.isArray(value) &&
@@ -1441,28 +1441,67 @@ export class ImprovedNeuralAPI {
1441
1441
  }
1442
1442
  return groups;
1443
1443
  }
1444
- // Placeholder implementations for complex operations
1445
- async _getAllItemIds() {
1446
- // Get all noun IDs from the brain
1447
- // Get total item count using find with empty query
1448
- const allItems = await this.brain.find({ query: '', limit: Number.MAX_SAFE_INTEGER });
1449
- const stats = { totalNouns: allItems.length || 0 };
1450
- if (!stats.totalNouns || stats.totalNouns === 0) {
1451
- return [];
1444
+ // Iterator-based implementations for scalability
1445
+ /**
1446
+ * Iterate through all items without loading them all at once
1447
+ * This scales to millions of items without memory issues
1448
+ */
1449
+ async *_iterateAllItems(options) {
1450
+ const batchSize = options?.batchSize || 1000;
1451
+ let cursor;
1452
+ let hasMore = true;
1453
+ while (hasMore) {
1454
+ const result = await this.brain.find({
1455
+ query: '',
1456
+ limit: batchSize,
1457
+ cursor
1458
+ });
1459
+ for (const item of result.items || result) {
1460
+ yield item;
1461
+ }
1462
+ hasMore = result.hasMore || false;
1463
+ cursor = result.nextCursor;
1464
+ // Safety check to prevent infinite loops
1465
+ if (!result.items || result.items.length === 0) {
1466
+ break;
1467
+ }
1452
1468
  }
1453
- // Get nouns with pagination (limit to 10000 for performance)
1454
- const limit = Math.min(stats.totalNouns, 10000);
1469
+ }
1470
+ /**
1471
+ * Get a sample of item IDs for operations that don't need all items
1472
+ * This is O(1) for small samples
1473
+ */
1474
+ async _getSampleItemIds(sampleSize = 1000) {
1455
1475
  const result = await this.brain.find({
1456
1476
  query: '',
1457
- limit
1477
+ limit: Math.min(sampleSize, 10000) // Cap at 10k for safety
1458
1478
  });
1459
- return result.map((item) => item.id).filter((id) => id);
1479
+ const items = result.items || result;
1480
+ return items.map((item) => item.entity?.id || item.id).filter((id) => id);
1460
1481
  }
1482
+ /**
1483
+ * Get total count using the brain's O(1) counting API
1484
+ */
1461
1485
  async _getTotalItemCount() {
1462
- // Get total item count using find with empty query
1463
- const allItems = await this.brain.find({ query: '', limit: Number.MAX_SAFE_INTEGER });
1464
- const stats = { totalNouns: allItems.length || 0 };
1465
- return stats.totalNouns || 0;
1486
+ // Use the brain's O(1) counting API if available
1487
+ if (this.brain.counts && typeof this.brain.counts.entities === 'function') {
1488
+ return await this.brain.counts.entities();
1489
+ }
1490
+ // Fallback: Get from storage statistics
1491
+ const storage = this.brain.storage;
1492
+ if (storage && typeof storage.getStatistics === 'function') {
1493
+ const stats = await storage.getStatistics();
1494
+ return stats?.totalNodes || 0;
1495
+ }
1496
+ // Last resort: Sample and estimate
1497
+ const sample = await this.brain.find({ query: '', limit: 1 });
1498
+ return sample.totalCount || 0;
1499
+ }
1500
+ // Deprecated: Remove methods that load everything
1501
+ // These are kept for backward compatibility but should not be used
1502
+ async _getAllItemIds() {
1503
+ console.warn('⚠️ _getAllItemIds() is deprecated and will fail with large datasets. Use _iterateAllItems() or _getSampleItemIds() instead.');
1504
+ return this._getSampleItemIds(10000); // Return sample only
1466
1505
  }
1467
1506
  // ===== GRAPH ALGORITHM SUPPORTING METHODS =====
1468
1507
  _calculateTotalWeight(edges) {
@@ -47,6 +47,7 @@ export declare class NaturalLanguageProcessor {
47
47
  private buildFieldConstraints;
48
48
  /**
49
49
  * Find similar queries from history (without using Brainy)
50
+ * NOTE: Currently unused - reserved for future query caching optimization
50
51
  */
51
52
  private findSimilarQueries;
52
53
  /**
@@ -119,10 +119,11 @@ export class NaturalLanguageProcessor {
119
119
  }
120
120
  /**
121
121
  * Find similar queries from history (without using Brainy)
122
+ * NOTE: Currently unused - reserved for future query caching optimization
122
123
  */
123
124
  findSimilarQueries(embedding) {
124
- // Simple similarity check against recent history
125
- // This is just a placeholder - real implementation would use cosine similarity
125
+ // Not implemented - not required for core functionality
126
+ // Would implement cosine similarity against queryHistory if needed
126
127
  return [];
127
128
  }
128
129
  /**
@@ -253,4 +253,63 @@ export declare abstract class BaseStorageAdapter implements StorageAdapter {
253
253
  * Include throttling metrics in statistics
254
254
  */
255
255
  getStatisticsWithThrottling(): Promise<StatisticsData | null>;
256
+ protected totalNounCount: number;
257
+ protected totalVerbCount: number;
258
+ protected entityCounts: Map<string, number>;
259
+ protected verbCounts: Map<string, number>;
260
+ protected countCache: Map<string, {
261
+ count: number;
262
+ timestamp: number;
263
+ }>;
264
+ protected readonly COUNT_CACHE_TTL = 60000;
265
+ /**
266
+ * Get total noun count - O(1) operation
267
+ * @returns Promise that resolves to the total number of nouns
268
+ */
269
+ getNounCount(): Promise<number>;
270
+ /**
271
+ * Get total verb count - O(1) operation
272
+ * @returns Promise that resolves to the total number of verbs
273
+ */
274
+ getVerbCount(): Promise<number>;
275
+ /**
276
+ * Increment count for entity type - O(1) operation
277
+ * Protected by storage-specific mechanisms (mutex, distributed consensus, etc.)
278
+ * @param type The entity type
279
+ */
280
+ protected incrementEntityCount(type: string): void;
281
+ /**
282
+ * Thread-safe increment for concurrent scenarios
283
+ * Uses mutex for single-node, distributed consensus for multi-node
284
+ */
285
+ protected incrementEntityCountSafe(type: string): Promise<void>;
286
+ /**
287
+ * Decrement count for entity type - O(1) operation
288
+ * @param type The entity type
289
+ */
290
+ protected decrementEntityCount(type: string): void;
291
+ /**
292
+ * Thread-safe decrement for concurrent scenarios
293
+ */
294
+ protected decrementEntityCountSafe(type: string): Promise<void>;
295
+ /**
296
+ * Increment verb count - O(1) operation with mutex protection
297
+ * @param type The verb type
298
+ */
299
+ protected incrementVerbCount(type: string): Promise<void>;
300
+ /**
301
+ * Decrement verb count - O(1) operation with mutex protection
302
+ * @param type The verb type
303
+ */
304
+ protected decrementVerbCount(type: string): Promise<void>;
305
+ /**
306
+ * Initialize counts from storage - must be implemented by each adapter
307
+ * @protected
308
+ */
309
+ protected abstract initializeCounts(): Promise<void>;
310
+ /**
311
+ * Persist counts to storage - must be implemented by each adapter
312
+ * @protected
313
+ */
314
+ protected abstract persistCounts(): Promise<void>;
256
315
  }
@@ -3,6 +3,7 @@
3
3
  * Provides common functionality for all storage adapters, including statistics tracking
4
4
  */
5
5
  import { extractFieldNamesFromJson, mapToStandardField } from '../../utils/fieldNameTracking.js';
6
+ import { getGlobalMutex } from '../../utils/mutex.js';
6
7
  /**
7
8
  * Base class for storage adapters that implements statistics tracking
8
9
  */
@@ -37,6 +38,16 @@ export class BaseStorageAdapter {
37
38
  this.totalDelayMs = 0;
38
39
  // Service-level throttling
39
40
  this.serviceThrottling = new Map();
41
+ // =============================================
42
+ // Universal O(1) Count Management
43
+ // =============================================
44
+ // Universal count tracking - O(1) operations
45
+ this.totalNounCount = 0;
46
+ this.totalVerbCount = 0;
47
+ this.entityCounts = new Map(); // type -> count
48
+ this.verbCounts = new Map(); // verb type -> count
49
+ this.countCache = new Map();
50
+ this.COUNT_CACHE_TTL = 60000; // 1 minute cache TTL
40
51
  }
41
52
  /**
42
53
  * Save statistics data
@@ -609,5 +620,131 @@ export class BaseStorageAdapter {
609
620
  }
610
621
  return stats;
611
622
  }
623
+ /**
624
+ * Get total noun count - O(1) operation
625
+ * @returns Promise that resolves to the total number of nouns
626
+ */
627
+ async getNounCount() {
628
+ return this.totalNounCount;
629
+ }
630
+ /**
631
+ * Get total verb count - O(1) operation
632
+ * @returns Promise that resolves to the total number of verbs
633
+ */
634
+ async getVerbCount() {
635
+ return this.totalVerbCount;
636
+ }
637
+ /**
638
+ * Increment count for entity type - O(1) operation
639
+ * Protected by storage-specific mechanisms (mutex, distributed consensus, etc.)
640
+ * @param type The entity type
641
+ */
642
+ incrementEntityCount(type) {
643
+ // For distributed scenarios, this is aggregated across shards
644
+ // For single-node, this is protected by storage-specific locking
645
+ this.entityCounts.set(type, (this.entityCounts.get(type) || 0) + 1);
646
+ this.totalNounCount++;
647
+ // Update cache
648
+ this.countCache.set('nouns_count', {
649
+ count: this.totalNounCount,
650
+ timestamp: Date.now()
651
+ });
652
+ }
653
+ /**
654
+ * Thread-safe increment for concurrent scenarios
655
+ * Uses mutex for single-node, distributed consensus for multi-node
656
+ */
657
+ async incrementEntityCountSafe(type) {
658
+ // Single-node mutex protection (distributed mode handled by coordinator)
659
+ const mutex = getGlobalMutex();
660
+ await mutex.runExclusive(`count-entity-${type}`, async () => {
661
+ this.incrementEntityCount(type);
662
+ // Persist counts periodically
663
+ if (this.totalNounCount % 10 === 0) {
664
+ await this.persistCounts();
665
+ }
666
+ });
667
+ }
668
+ /**
669
+ * Decrement count for entity type - O(1) operation
670
+ * @param type The entity type
671
+ */
672
+ decrementEntityCount(type) {
673
+ const current = this.entityCounts.get(type) || 0;
674
+ if (current > 1) {
675
+ this.entityCounts.set(type, current - 1);
676
+ }
677
+ else {
678
+ this.entityCounts.delete(type);
679
+ }
680
+ if (this.totalNounCount > 0) {
681
+ this.totalNounCount--;
682
+ }
683
+ // Update cache
684
+ this.countCache.set('nouns_count', {
685
+ count: this.totalNounCount,
686
+ timestamp: Date.now()
687
+ });
688
+ }
689
+ /**
690
+ * Thread-safe decrement for concurrent scenarios
691
+ */
692
+ async decrementEntityCountSafe(type) {
693
+ const mutex = getGlobalMutex();
694
+ await mutex.runExclusive(`count-entity-${type}`, async () => {
695
+ this.decrementEntityCount(type);
696
+ if (this.totalNounCount % 10 === 0) {
697
+ await this.persistCounts();
698
+ }
699
+ });
700
+ }
701
+ /**
702
+ * Increment verb count - O(1) operation with mutex protection
703
+ * @param type The verb type
704
+ */
705
+ async incrementVerbCount(type) {
706
+ const mutex = getGlobalMutex();
707
+ await mutex.runExclusive(`count-verb-${type}`, async () => {
708
+ this.verbCounts.set(type, (this.verbCounts.get(type) || 0) + 1);
709
+ this.totalVerbCount++;
710
+ // Update cache
711
+ this.countCache.set('verbs_count', {
712
+ count: this.totalVerbCount,
713
+ timestamp: Date.now()
714
+ });
715
+ // Persist counts immediately for consistency
716
+ if (this.totalVerbCount % 10 === 0) {
717
+ await this.persistCounts();
718
+ }
719
+ });
720
+ }
721
+ /**
722
+ * Decrement verb count - O(1) operation with mutex protection
723
+ * @param type The verb type
724
+ */
725
+ async decrementVerbCount(type) {
726
+ const mutex = getGlobalMutex();
727
+ await mutex.runExclusive(`count-verb-${type}`, async () => {
728
+ const current = this.verbCounts.get(type) || 0;
729
+ if (current > 1) {
730
+ this.verbCounts.set(type, current - 1);
731
+ }
732
+ else {
733
+ this.verbCounts.delete(type);
734
+ }
735
+ if (this.totalVerbCount > 0) {
736
+ this.totalVerbCount--;
737
+ }
738
+ // Update cache
739
+ this.countCache.set('verbs_count', {
740
+ count: this.totalVerbCount,
741
+ timestamp: Date.now()
742
+ });
743
+ // Persist counts immediately for consistency
744
+ if (this.totalVerbCount % 10 === 0) {
745
+ await this.persistCounts();
746
+ }
747
+ });
748
+ }
612
749
  }
613
750
  //# sourceMappingURL=baseStorageAdapter.js.map
@@ -11,6 +11,10 @@ type Edge = HNSWVerb;
11
11
  * Uses the file system to store data in the specified directory structure
12
12
  */
13
13
  export declare class FileSystemStorage extends BaseStorage {
14
+ private countsFilePath?;
15
+ private readonly shardingDepth;
16
+ private readonly SHARDING_THRESHOLD;
17
+ private cachedShardingDepth?;
14
18
  private rootDir;
15
19
  private nounsDir;
16
20
  private verbsDir;
@@ -22,6 +26,8 @@ export declare class FileSystemStorage extends BaseStorage {
22
26
  private lockDir;
23
27
  private useDualWrite;
24
28
  private activeLocks;
29
+ private lockTimers;
30
+ private allTimers;
25
31
  /**
26
32
  * Initialize the storage adapter
27
33
  * @param rootDirectory The root directory for storage
@@ -251,5 +257,40 @@ export declare class FileSystemStorage extends BaseStorage {
251
257
  * Merge statistics from multiple sources
252
258
  */
253
259
  private mergeStatistics;
260
+ /**
261
+ * Initialize counts from filesystem storage
262
+ */
263
+ protected initializeCounts(): Promise<void>;
264
+ /**
265
+ * Initialize counts by scanning disk (only done once)
266
+ */
267
+ private initializeCountsFromDisk;
268
+ /**
269
+ * Persist counts to filesystem storage
270
+ */
271
+ protected persistCounts(): Promise<void>;
272
+ /**
273
+ * Determine optimal sharding depth based on dataset size
274
+ * This is called once during initialization for consistent behavior
275
+ */
276
+ private getOptimalShardingDepth;
277
+ /**
278
+ * Get the path for a node with consistent sharding strategy
279
+ * Clean, predictable path generation
280
+ */
281
+ private getNodePath;
282
+ /**
283
+ * Get the path for a verb with consistent sharding strategy
284
+ */
285
+ private getVerbPath;
286
+ /**
287
+ * Universal sharded path generator
288
+ * Consistent across all entity types
289
+ */
290
+ private getShardedPath;
291
+ /**
292
+ * Check if a file exists (handles both sharded and non-sharded)
293
+ */
294
+ private fileExists;
254
295
  }
255
296
  export {};