@soulcraft/brainy 0.48.0 → 0.50.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/README.md +304 -555
  2. package/dist/brainyData.d.ts +83 -2
  3. package/dist/brainyData.js +536 -66
  4. package/dist/brainyData.js.map +1 -1
  5. package/dist/coreTypes.d.ts +74 -12
  6. package/dist/distributed/configManager.d.ts +9 -0
  7. package/dist/distributed/configManager.js +129 -10
  8. package/dist/distributed/configManager.js.map +1 -1
  9. package/dist/hnsw/hnswIndex.d.ts +1 -1
  10. package/dist/hnsw/hnswIndex.js +44 -25
  11. package/dist/hnsw/hnswIndex.js.map +1 -1
  12. package/dist/hnsw/optimizedHNSWIndex.d.ts +1 -1
  13. package/dist/hnsw/optimizedHNSWIndex.js +3 -3
  14. package/dist/hnsw/optimizedHNSWIndex.js.map +1 -1
  15. package/dist/storage/adapters/baseStorageAdapter.d.ts +18 -2
  16. package/dist/storage/adapters/baseStorageAdapter.js +69 -4
  17. package/dist/storage/adapters/baseStorageAdapter.js.map +1 -1
  18. package/dist/storage/adapters/fileSystemStorage.d.ts +14 -8
  19. package/dist/storage/adapters/fileSystemStorage.js +90 -22
  20. package/dist/storage/adapters/fileSystemStorage.js.map +1 -1
  21. package/dist/storage/adapters/memoryStorage.d.ts +0 -8
  22. package/dist/storage/adapters/memoryStorage.js +26 -45
  23. package/dist/storage/adapters/memoryStorage.js.map +1 -1
  24. package/dist/storage/adapters/opfsStorage.d.ts +40 -8
  25. package/dist/storage/adapters/opfsStorage.js +195 -44
  26. package/dist/storage/adapters/opfsStorage.js.map +1 -1
  27. package/dist/storage/adapters/optimizedS3Search.js +4 -3
  28. package/dist/storage/adapters/optimizedS3Search.js.map +1 -1
  29. package/dist/storage/adapters/s3CompatibleStorage.d.ts +3 -10
  30. package/dist/storage/adapters/s3CompatibleStorage.js +41 -44
  31. package/dist/storage/adapters/s3CompatibleStorage.js.map +1 -1
  32. package/dist/storage/backwardCompatibility.d.ts +84 -0
  33. package/dist/storage/backwardCompatibility.js +141 -0
  34. package/dist/storage/backwardCompatibility.js.map +1 -0
  35. package/dist/storage/baseStorage.d.ts +33 -19
  36. package/dist/storage/baseStorage.js +116 -195
  37. package/dist/storage/baseStorage.js.map +1 -1
  38. package/dist/utils/metadataFilter.d.ts +79 -0
  39. package/dist/utils/metadataFilter.js +229 -0
  40. package/dist/utils/metadataFilter.js.map +1 -0
  41. package/dist/utils/metadataIndex.d.ts +148 -0
  42. package/dist/utils/metadataIndex.js +639 -0
  43. package/dist/utils/metadataIndex.js.map +1 -0
  44. package/dist/utils/metadataIndexCache.d.ts +60 -0
  45. package/dist/utils/metadataIndexCache.js +119 -0
  46. package/dist/utils/metadataIndexCache.js.map +1 -0
  47. package/package.json +1 -1
@@ -3,10 +3,13 @@
3
3
  * Main class that provides the vector database functionality
4
4
  */
5
5
  import { v4 as uuidv4 } from 'uuid';
6
+ import { HNSWIndex } from './hnsw/hnswIndex.js';
6
7
  import { HNSWIndexOptimized } from './hnsw/hnswIndexOptimized.js';
7
8
  import { createStorage } from './storage/storageFactory.js';
8
9
  import { cosineDistance, defaultEmbeddingFunction, cleanupWorkerPools, batchEmbed } from './utils/index.js';
9
10
  import { getAugmentationVersion } from './utils/version.js';
11
+ import { matchesMetadataFilter } from './utils/metadataFilter.js';
12
+ import { MetadataIndexManager } from './utils/metadataIndex.js';
10
13
  import { NounType, VerbType } from './types/graphTypes.js';
11
14
  import { createServerSearchAugmentations } from './augmentations/serverSearchAugmentations.js';
12
15
  import { augmentationPipeline } from './augmentationPipeline.js';
@@ -41,6 +44,7 @@ export class BrainyData {
41
44
  */
42
45
  constructor(config = {}) {
43
46
  this.storage = null;
47
+ this.metadataIndex = null;
44
48
  this.isInitialized = false;
45
49
  this.isInitializing = false;
46
50
  this.storageConfig = {};
@@ -58,6 +62,7 @@ export class BrainyData {
58
62
  updateIndex: true
59
63
  };
60
64
  this.updateTimerId = null;
65
+ this.maintenanceIntervals = [];
61
66
  this.lastUpdateTime = 0;
62
67
  this.lastKnownNounCount = 0;
63
68
  // Remote server properties
@@ -73,6 +78,8 @@ export class BrainyData {
73
78
  this.healthMonitor = null;
74
79
  // Statistics collector
75
80
  this.statisticsCollector = new StatisticsCollector();
81
+ // Store config
82
+ this.config = config;
76
83
  // Set dimensions to fixed value of 384 (all-MiniLM-L6-v2 dimension)
77
84
  this._dimensions = 384;
78
85
  // Set distance function
@@ -83,8 +90,9 @@ export class BrainyData {
83
90
  if (config.storageAdapter) {
84
91
  hnswConfig.useDiskBasedIndex = true;
85
92
  }
86
- this.index = new HNSWIndexOptimized(hnswConfig, this.distanceFunction, config.storageAdapter || null);
87
- this.useOptimizedIndex = true;
93
+ // Temporarily use base HNSW index for metadata filtering
94
+ this.index = new HNSWIndex(hnswConfig, this.distanceFunction);
95
+ this.useOptimizedIndex = false;
88
96
  // Set storage if provided, otherwise it will be initialized in init()
89
97
  this.storage = config.storageAdapter || null;
90
98
  // Store logging configuration
@@ -106,6 +114,8 @@ export class BrainyData {
106
114
  config.storage?.requestPersistentStorage || false;
107
115
  // Set read-only flag
108
116
  this.readOnly = config.readOnly || false;
117
+ // Set frozen flag (defaults to false to allow optimizations in readOnly mode)
118
+ this.frozen = config.frozen || false;
109
119
  // Set lazy loading in read-only mode flag
110
120
  this.lazyLoadInReadOnlyMode = config.lazyLoadInReadOnlyMode || false;
111
121
  // Set write-only flag
@@ -198,6 +208,15 @@ export class BrainyData {
198
208
  throw new Error('Cannot perform write operation: database is in read-only mode');
199
209
  }
200
210
  }
211
+ /**
212
+ * Check if the database is frozen and throw an error if it is
213
+ * @throws Error if the database is frozen
214
+ */
215
+ checkFrozen() {
216
+ if (this.frozen) {
217
+ throw new Error('Cannot perform operation: database is frozen (no changes allowed)');
218
+ }
219
+ }
201
220
  /**
202
221
  * Check if the database is in write-only mode and throw an error if it is
203
222
  * @param allowExistenceChecks If true, allows existence checks (get operations) in write-only mode
@@ -217,6 +236,13 @@ export class BrainyData {
217
236
  if (!this.realtimeUpdateConfig.enabled) {
218
237
  return;
219
238
  }
239
+ // If the database is frozen, do not start real-time updates
240
+ if (this.frozen) {
241
+ if (this.loggingConfig?.verbose) {
242
+ console.log('Real-time updates disabled: database is frozen');
243
+ }
244
+ return;
245
+ }
220
246
  // If the update timer is already running, do nothing
221
247
  if (this.updateTimerId !== null) {
222
248
  return;
@@ -281,6 +307,27 @@ export class BrainyData {
281
307
  this.startRealtimeUpdates();
282
308
  }
283
309
  }
310
+ /**
311
+ * Start metadata index maintenance
312
+ */
313
+ startMetadataIndexMaintenance() {
314
+ if (!this.metadataIndex)
315
+ return;
316
+ // Flush index periodically to persist changes
317
+ const flushInterval = setInterval(async () => {
318
+ try {
319
+ await this.metadataIndex.flush();
320
+ }
321
+ catch (error) {
322
+ console.warn('Error flushing metadata index:', error);
323
+ }
324
+ }, 30000); // Flush every 30 seconds
325
+ // Store the interval ID for cleanup
326
+ if (!this.maintenanceIntervals) {
327
+ this.maintenanceIntervals = [];
328
+ }
329
+ this.maintenanceIntervals.push(flushInterval);
330
+ }
284
331
  /**
285
332
  * Disable real-time updates
286
333
  */
@@ -307,6 +354,10 @@ export class BrainyData {
307
354
  if (!this.isInitialized || !this.storage) {
308
355
  return;
309
356
  }
357
+ // If the database is frozen, do not perform updates
358
+ if (this.frozen) {
359
+ return;
360
+ }
310
361
  try {
311
362
  // Record the current time
312
363
  const startTime = Date.now();
@@ -438,40 +489,51 @@ export class BrainyData {
438
489
  const currentCount = await this.getNounCount();
439
490
  // If the noun count has changed, update the index
440
491
  if (currentCount !== this.lastKnownNounCount) {
441
- // Get all nouns from storage
442
- const nouns = await this.storage.getAllNouns();
443
492
  // Get all nouns currently in the index
444
493
  const indexNouns = this.index.getNouns();
445
494
  const indexNounIds = new Set(indexNouns.keys());
446
- // Find nouns that are in storage but not in the index
447
- const newNouns = nouns.filter((noun) => !indexNounIds.has(noun.id));
448
- // Add new nouns to the index
449
- for (const noun of newNouns) {
450
- // Check if the vector dimensions match the expected dimensions
451
- if (noun.vector.length !== this._dimensions) {
452
- console.warn(`Skipping noun ${noun.id} due to dimension mismatch: expected ${this._dimensions}, got ${noun.vector.length}`);
453
- continue;
454
- }
455
- // Add to index
456
- await this.index.addItem({
457
- id: noun.id,
458
- vector: noun.vector
495
+ // Use pagination to load nouns from storage
496
+ let offset = 0;
497
+ const limit = 100;
498
+ let hasMore = true;
499
+ let totalNewNouns = 0;
500
+ while (hasMore) {
501
+ const result = await this.storage.getNouns({
502
+ pagination: { offset, limit }
459
503
  });
460
- if (this.loggingConfig?.verbose) {
461
- console.log(`Added new noun ${noun.id} to index during real-time update`);
504
+ // Find nouns that are in storage but not in the index
505
+ const newNouns = result.items.filter((noun) => !indexNounIds.has(noun.id));
506
+ totalNewNouns += newNouns.length;
507
+ // Add new nouns to the index
508
+ for (const noun of newNouns) {
509
+ // Check if the vector dimensions match the expected dimensions
510
+ if (noun.vector.length !== this._dimensions) {
511
+ console.warn(`Skipping noun ${noun.id} due to dimension mismatch: expected ${this._dimensions}, got ${noun.vector.length}`);
512
+ continue;
513
+ }
514
+ // Add to index
515
+ await this.index.addItem({
516
+ id: noun.id,
517
+ vector: noun.vector
518
+ });
519
+ if (this.loggingConfig?.verbose) {
520
+ console.log(`Added new noun ${noun.id} to index during real-time update`);
521
+ }
462
522
  }
523
+ hasMore = result.hasMore;
524
+ offset += limit;
463
525
  }
464
526
  // Update the last known noun count
465
527
  this.lastKnownNounCount = currentCount;
466
528
  // Invalidate search cache if new nouns were detected
467
- if (newNouns.length > 0) {
529
+ if (totalNewNouns > 0) {
468
530
  this.searchCache.invalidateOnDataChange('add');
469
531
  if (this.loggingConfig?.verbose) {
470
532
  console.log('Search cache invalidated due to external data changes');
471
533
  }
472
534
  }
473
- if (this.loggingConfig?.verbose && newNouns.length > 0) {
474
- console.log(`Real-time update: Added ${newNouns.length} new nouns to index using full scan`);
535
+ if (this.loggingConfig?.verbose && totalNewNouns > 0) {
536
+ console.log(`Real-time update: Added ${totalNewNouns} new nouns to index using full scan`);
475
537
  }
476
538
  }
477
539
  }
@@ -624,23 +686,31 @@ export class BrainyData {
624
686
  this.index.clear();
625
687
  }
626
688
  else {
627
- // Load all nouns from storage
628
- const nouns = await this.storage.getAllNouns();
629
- // Clear the index and add all nouns
689
+ // Clear the index and load nouns using pagination
630
690
  this.index.clear();
631
- for (const noun of nouns) {
632
- // Check if the vector dimensions match the expected dimensions
633
- if (noun.vector.length !== this._dimensions) {
634
- console.warn(`Deleting noun ${noun.id} due to dimension mismatch: expected ${this._dimensions}, got ${noun.vector.length}`);
635
- // Delete the mismatched noun from storage to prevent future issues
636
- await this.storage.deleteNoun(noun.id);
637
- continue;
638
- }
639
- // Add to index
640
- await this.index.addItem({
641
- id: noun.id,
642
- vector: noun.vector
691
+ let offset = 0;
692
+ const limit = 100;
693
+ let hasMore = true;
694
+ while (hasMore) {
695
+ const result = await this.storage.getNouns({
696
+ pagination: { offset, limit }
643
697
  });
698
+ for (const noun of result.items) {
699
+ // Check if the vector dimensions match the expected dimensions
700
+ if (noun.vector.length !== this._dimensions) {
701
+ console.warn(`Deleting noun ${noun.id} due to dimension mismatch: expected ${this._dimensions}, got ${noun.vector.length}`);
702
+ // Delete the mismatched noun from storage to prevent future issues
703
+ await this.storage.deleteNoun(noun.id);
704
+ continue;
705
+ }
706
+ // Add to index
707
+ await this.index.addItem({
708
+ id: noun.id,
709
+ vector: noun.vector
710
+ });
711
+ }
712
+ hasMore = result.hasMore;
713
+ offset += limit;
644
714
  }
645
715
  }
646
716
  // Connect to remote server if configured with autoConnect
@@ -663,10 +733,46 @@ export class BrainyData {
663
733
  catch (e) {
664
734
  // Ignore errors loading existing statistics
665
735
  }
736
+ // Initialize metadata index if not in write-only mode
737
+ if (!this.writeOnly) {
738
+ this.metadataIndex = new MetadataIndexManager(this.storage, this.config.metadataIndex);
739
+ // Check if we need to rebuild the index (for existing data)
740
+ // Skip rebuild for memory storage (starts empty) or when in read-only mode
741
+ // Also skip if index already has entries
742
+ const isMemoryStorage = this.storage?.constructor?.name === 'MemoryStorage';
743
+ const stats = await this.metadataIndex.getStats();
744
+ if (!isMemoryStorage && !this.readOnly && stats.totalEntries === 0) {
745
+ // Check if we have existing data that needs indexing
746
+ // Use a simple check to avoid expensive operations
747
+ try {
748
+ const testResult = await this.storage.getNouns({ pagination: { offset: 0, limit: 1 } });
749
+ if (testResult.items.length > 0) {
750
+ if (this.loggingConfig?.verbose) {
751
+ console.log('Rebuilding metadata index for existing data...');
752
+ }
753
+ await this.metadataIndex.rebuild();
754
+ if (this.loggingConfig?.verbose) {
755
+ const newStats = await this.metadataIndex.getStats();
756
+ console.log(`Metadata index rebuilt: ${newStats.totalEntries} entries, ${newStats.fieldsIndexed.length} fields`);
757
+ }
758
+ }
759
+ }
760
+ catch (error) {
761
+ // If getNouns fails, skip rebuild
762
+ if (this.loggingConfig?.verbose) {
763
+ console.log('Skipping metadata index rebuild:', error);
764
+ }
765
+ }
766
+ }
767
+ }
666
768
  this.isInitialized = true;
667
769
  this.isInitializing = false;
668
770
  // Start real-time updates if enabled
669
771
  this.startRealtimeUpdates();
772
+ // Start metadata index maintenance
773
+ if (this.metadataIndex) {
774
+ this.startMetadataIndexMaintenance();
775
+ }
670
776
  }
671
777
  catch (error) {
672
778
  console.error('Failed to initialize BrainyData:', error);
@@ -1018,6 +1124,10 @@ export class BrainyData {
1018
1124
  }
1019
1125
  }
1020
1126
  await this.storage.saveMetadata(id, metadataToSave);
1127
+ // Update metadata index
1128
+ if (this.metadataIndex && !this.readOnly && !this.frozen) {
1129
+ await this.metadataIndex.addToIndex(id, metadataToSave);
1130
+ }
1021
1131
  // Track metadata statistics
1022
1132
  const metadataService = this.getServiceName(options);
1023
1133
  await this.storage.incrementStatistic('metadata', metadataService);
@@ -1300,10 +1410,11 @@ export class BrainyData {
1300
1410
  }
1301
1411
  // In lazy loading mode, we need to load some nodes to search
1302
1412
  // Instead of loading all nodes, we'll load a subset of nodes
1303
- // Since we don't have a specialized method to get top nodes for a query,
1304
- // we'll load a limited number of nodes from storage
1305
- const nouns = await this.storage.getAllNouns();
1306
- const limitedNouns = nouns.slice(0, Math.min(nouns.length, k * 10)); // Get 10x more nodes than needed
1413
+ // Load a limited number of nodes from storage using pagination
1414
+ const result = await this.storage.getNouns({
1415
+ pagination: { offset: 0, limit: k * 10 } // Get 10x more nodes than needed
1416
+ });
1417
+ const limitedNouns = result.items;
1307
1418
  // Add these nodes to the index
1308
1419
  for (const node of limitedNouns) {
1309
1420
  // Check if the vector dimensions match the expected dimensions
@@ -1321,11 +1432,79 @@ export class BrainyData {
1321
1432
  console.log(`Lazy loading mode: Added ${limitedNouns.length} nodes to index for search`);
1322
1433
  }
1323
1434
  }
1435
+ // Create filter function for HNSW search with metadata index optimization
1436
+ const hasMetadataFilter = options.metadata && Object.keys(options.metadata).length > 0;
1437
+ const hasServiceFilter = !!options.service;
1438
+ let filterFunction;
1439
+ let preFilteredIds;
1440
+ // Use metadata index for pre-filtering if available
1441
+ if (hasMetadataFilter && this.metadataIndex) {
1442
+ try {
1443
+ // Ensure metadata index is up to date
1444
+ await this.metadataIndex.flush();
1445
+ // Get candidate IDs from metadata index
1446
+ const candidateIds = await this.metadataIndex.getIdsForFilter(options.metadata);
1447
+ if (candidateIds.length > 0) {
1448
+ preFilteredIds = new Set(candidateIds);
1449
+ // Create a simple filter function that just checks the pre-filtered set
1450
+ filterFunction = async (id) => {
1451
+ if (!preFilteredIds.has(id))
1452
+ return false;
1453
+ // Still apply service filter if needed
1454
+ if (hasServiceFilter) {
1455
+ const metadata = await this.storage.getMetadata(id);
1456
+ const noun = this.index.getNouns().get(id);
1457
+ if (!noun || !metadata)
1458
+ return false;
1459
+ const result = { id, score: 0, vector: noun.vector, metadata };
1460
+ return this.filterResultsByService([result], options.service).length > 0;
1461
+ }
1462
+ return true;
1463
+ };
1464
+ }
1465
+ else {
1466
+ // No items match the metadata criteria, return empty results immediately
1467
+ return [];
1468
+ }
1469
+ }
1470
+ catch (indexError) {
1471
+ console.warn('Metadata index error, falling back to full filtering:', indexError);
1472
+ // Fall back to full metadata filtering below
1473
+ }
1474
+ }
1475
+ // Fallback to full metadata filtering if index wasn't used
1476
+ if (!filterFunction && (hasMetadataFilter || hasServiceFilter)) {
1477
+ filterFunction = async (id) => {
1478
+ // Get metadata for filtering
1479
+ let metadata = await this.storage.getMetadata(id);
1480
+ if (metadata === null) {
1481
+ metadata = {};
1482
+ }
1483
+ // Apply metadata filter
1484
+ if (hasMetadataFilter) {
1485
+ const matches = matchesMetadataFilter(metadata, options.metadata);
1486
+ if (!matches) {
1487
+ return false;
1488
+ }
1489
+ }
1490
+ // Apply service filter
1491
+ if (hasServiceFilter) {
1492
+ const noun = this.index.getNouns().get(id);
1493
+ if (!noun)
1494
+ return false;
1495
+ const result = { id, score: 0, vector: noun.vector, metadata };
1496
+ if (!this.filterResultsByService([result], options.service).length) {
1497
+ return false;
1498
+ }
1499
+ }
1500
+ return true;
1501
+ };
1502
+ }
1324
1503
  // When using offset, we need to fetch more results and then slice
1325
1504
  const offset = options.offset || 0;
1326
1505
  const totalNeeded = k + offset;
1327
- // Search in the index for totalNeeded results
1328
- const results = await this.index.search(queryVector, totalNeeded);
1506
+ // Search in the index with filter
1507
+ const results = await this.index.search(queryVector, totalNeeded, filterFunction);
1329
1508
  // Skip the offset number of results
1330
1509
  const paginatedResults = results.slice(offset, offset + k);
1331
1510
  // Get metadata for each result
@@ -1351,8 +1530,7 @@ export class BrainyData {
1351
1530
  metadata: metadata
1352
1531
  });
1353
1532
  }
1354
- // Filter results by service if specified
1355
- return this.filterResultsByService(searchResults, options.service);
1533
+ return searchResults;
1356
1534
  }
1357
1535
  else {
1358
1536
  // Get nouns for each noun type in parallel
@@ -1397,8 +1575,8 @@ export class BrainyData {
1397
1575
  metadata: metadata
1398
1576
  });
1399
1577
  }
1400
- // Filter results by service if specified
1401
- return this.filterResultsByService(searchResults, options.service);
1578
+ // Results are already filtered, just return them
1579
+ return searchResults;
1402
1580
  }
1403
1581
  }
1404
1582
  catch (error) {
@@ -1467,22 +1645,29 @@ export class BrainyData {
1467
1645
  }
1468
1646
  // Default behavior (backward compatible): search locally
1469
1647
  try {
1470
- // Check cache first (transparent to user)
1471
- const cacheKey = this.searchCache.getCacheKey(queryVectorOrData, k, options);
1472
- const cachedResults = this.searchCache.get(cacheKey);
1473
- if (cachedResults) {
1474
- // Track cache hit in health monitor
1475
- if (this.healthMonitor) {
1476
- const latency = Date.now() - startTime;
1477
- this.healthMonitor.recordRequest(latency, false);
1478
- this.healthMonitor.recordCacheAccess(true);
1479
- }
1480
- return cachedResults;
1648
+ const hasMetadataFilter = options.metadata && Object.keys(options.metadata).length > 0;
1649
+ // Check cache first (transparent to user) - but skip cache if we have metadata filters
1650
+ if (!hasMetadataFilter) {
1651
+ const cacheKey = this.searchCache.getCacheKey(queryVectorOrData, k, options);
1652
+ const cachedResults = this.searchCache.get(cacheKey);
1653
+ if (cachedResults) {
1654
+ // Track cache hit in health monitor
1655
+ if (this.healthMonitor) {
1656
+ const latency = Date.now() - startTime;
1657
+ this.healthMonitor.recordRequest(latency, false);
1658
+ this.healthMonitor.recordCacheAccess(true);
1659
+ }
1660
+ return cachedResults;
1661
+ }
1481
1662
  }
1482
1663
  // Cache miss - perform actual search
1483
- const results = await this.searchLocal(queryVectorOrData, k, options);
1484
- // Cache results for future queries (unless explicitly disabled)
1485
- if (!options.skipCache) {
1664
+ const results = await this.searchLocal(queryVectorOrData, k, {
1665
+ ...options,
1666
+ metadata: options.metadata
1667
+ });
1668
+ // Cache results for future queries (unless explicitly disabled or has metadata filter)
1669
+ if (!options.skipCache && !hasMetadataFilter) {
1670
+ const cacheKey = this.searchCache.getCacheKey(queryVectorOrData, k, options);
1486
1671
  this.searchCache.set(cacheKey, results);
1487
1672
  }
1488
1673
  // Track successful search in health monitor
@@ -1611,6 +1796,7 @@ export class BrainyData {
1611
1796
  searchResults = await this.searchByNounTypes(queryToUse, k, options.nounTypes, {
1612
1797
  forceEmbed: options.forceEmbed,
1613
1798
  service: options.service,
1799
+ metadata: options.metadata,
1614
1800
  offset: options.offset
1615
1801
  });
1616
1802
  }
@@ -1619,6 +1805,7 @@ export class BrainyData {
1619
1805
  searchResults = await this.searchByNounTypes(queryToUse, k, null, {
1620
1806
  forceEmbed: options.forceEmbed,
1621
1807
  service: options.service,
1808
+ metadata: options.metadata,
1622
1809
  offset: options.offset
1623
1810
  });
1624
1811
  }
@@ -1947,6 +2134,12 @@ export class BrainyData {
1947
2134
  await this.storage.decrementStatistic('noun', service);
1948
2135
  // Try to remove metadata (ignore errors)
1949
2136
  try {
2137
+ // Get metadata before removing for index cleanup
2138
+ const existingMetadata = await this.storage.getMetadata(actualId);
2139
+ // Remove from metadata index
2140
+ if (this.metadataIndex && existingMetadata && !this.readOnly && !this.frozen) {
2141
+ await this.metadataIndex.removeFromIndex(actualId, existingMetadata);
2142
+ }
1950
2143
  await this.storage.saveMetadata(actualId, null);
1951
2144
  await this.storage.decrementStatistic('metadata', service);
1952
2145
  }
@@ -2032,6 +2225,18 @@ export class BrainyData {
2032
2225
  }
2033
2226
  // Update metadata
2034
2227
  await this.storage.saveMetadata(id, metadata);
2228
+ // Update metadata index
2229
+ if (this.metadataIndex && !this.readOnly && !this.frozen) {
2230
+ // Remove old metadata from index if it exists
2231
+ const oldMetadata = await this.storage.getMetadata(id);
2232
+ if (oldMetadata) {
2233
+ await this.metadataIndex.removeFromIndex(id, oldMetadata);
2234
+ }
2235
+ // Add new metadata to index
2236
+ if (metadata) {
2237
+ await this.metadataIndex.addToIndex(id, metadata);
2238
+ }
2239
+ }
2035
2240
  // Track metadata statistics
2036
2241
  const service = this.getServiceName(options);
2037
2242
  await this.storage.incrementStatistic('metadata', service);
@@ -2379,6 +2584,10 @@ export class BrainyData {
2379
2584
  };
2380
2585
  // Save the complete verb (BaseStorage will handle the separation)
2381
2586
  await this.storage.saveVerb(fullVerb);
2587
+ // Update metadata index
2588
+ if (this.metadataIndex && verbMetadata) {
2589
+ await this.metadataIndex.addToIndex(id, verbMetadata);
2590
+ }
2382
2591
  // Track verb statistics
2383
2592
  const serviceForStats = this.getServiceName(options);
2384
2593
  await this.storage.incrementStatistic('verb', serviceForStats);
@@ -2582,11 +2791,17 @@ export class BrainyData {
2582
2791
  // Check if database is in read-only mode
2583
2792
  this.checkReadOnly();
2584
2793
  try {
2794
+ // Get existing metadata before removal for index cleanup
2795
+ const existingMetadata = await this.storage.getVerbMetadata(id);
2585
2796
  // Remove from index
2586
2797
  const removed = this.index.removeItem(id);
2587
2798
  if (!removed) {
2588
2799
  return false;
2589
2800
  }
2801
+ // Remove from metadata index
2802
+ if (this.metadataIndex && existingMetadata) {
2803
+ await this.metadataIndex.removeFromIndex(id, existingMetadata);
2804
+ }
2590
2805
  // Remove from storage
2591
2806
  await this.storage.deleteVerb(id);
2592
2807
  // Track deletion statistics
@@ -2754,6 +2969,10 @@ export class BrainyData {
2754
2969
  if (!this.storage) {
2755
2970
  throw new Error('Storage not initialized');
2756
2971
  }
2972
+ // If the database is frozen, do not flush statistics
2973
+ if (this.frozen) {
2974
+ return;
2975
+ }
2757
2976
  // Call the flushStatisticsToStorage method on the storage adapter
2758
2977
  await this.storage.flushStatisticsToStorage();
2759
2978
  }
@@ -2761,6 +2980,10 @@ export class BrainyData {
2761
2980
  * Update storage sizes if needed (called periodically for performance)
2762
2981
  */
2763
2982
  async updateStorageSizesIfNeeded() {
2983
+ // If the database is frozen, do not update storage sizes
2984
+ if (this.frozen) {
2985
+ return;
2986
+ }
2764
2987
  // Only update every minute to avoid performance impact
2765
2988
  const now = Date.now();
2766
2989
  const lastUpdate = this.lastStorageSizeUpdate || 0;
@@ -2801,8 +3024,8 @@ export class BrainyData {
2801
3024
  async getStatistics(options = {}) {
2802
3025
  await this.ensureInitialized();
2803
3026
  try {
2804
- // If forceRefresh is true, flush statistics to storage first
2805
- if (options.forceRefresh && this.storage) {
3027
+ // If forceRefresh is true and not frozen, flush statistics to storage first
3028
+ if (options.forceRefresh && this.storage && !this.frozen) {
2806
3029
  await this.storage.flushStatisticsToStorage();
2807
3030
  }
2808
3031
  // Get statistics from storage
@@ -2945,6 +3168,137 @@ export class BrainyData {
2945
3168
  throw new Error(`Failed to get statistics: ${error}`);
2946
3169
  }
2947
3170
  }
3171
+ /**
3172
+ * List all services that have written data to the database
3173
+ * @returns Array of service statistics
3174
+ */
3175
+ async listServices() {
3176
+ await this.ensureInitialized();
3177
+ try {
3178
+ const stats = await this.storage.getStatistics();
3179
+ if (!stats) {
3180
+ return [];
3181
+ }
3182
+ // Get unique service names from all counters
3183
+ const services = new Set();
3184
+ Object.keys(stats.nounCount).forEach(s => services.add(s));
3185
+ Object.keys(stats.verbCount).forEach(s => services.add(s));
3186
+ Object.keys(stats.metadataCount).forEach(s => services.add(s));
3187
+ // Build service statistics for each service
3188
+ const result = [];
3189
+ for (const service of services) {
3190
+ const serviceStats = {
3191
+ name: service,
3192
+ totalNouns: stats.nounCount[service] || 0,
3193
+ totalVerbs: stats.verbCount[service] || 0,
3194
+ totalMetadata: stats.metadataCount[service] || 0
3195
+ };
3196
+ // Add activity timestamps if available
3197
+ if (stats.serviceActivity && stats.serviceActivity[service]) {
3198
+ const activity = stats.serviceActivity[service];
3199
+ serviceStats.firstActivity = activity.firstActivity;
3200
+ serviceStats.lastActivity = activity.lastActivity;
3201
+ serviceStats.operations = {
3202
+ adds: activity.totalOperations,
3203
+ updates: 0,
3204
+ deletes: 0
3205
+ };
3206
+ }
3207
+ // Determine status based on recent activity
3208
+ if (serviceStats.lastActivity) {
3209
+ const lastActivityTime = new Date(serviceStats.lastActivity).getTime();
3210
+ const now = Date.now();
3211
+ const hourAgo = now - 3600000;
3212
+ if (lastActivityTime > hourAgo) {
3213
+ serviceStats.status = 'active';
3214
+ }
3215
+ else {
3216
+ serviceStats.status = 'inactive';
3217
+ }
3218
+ }
3219
+ else {
3220
+ serviceStats.status = 'inactive';
3221
+ }
3222
+ // Check if service is read-only (has no write operations)
3223
+ if (serviceStats.totalNouns === 0 && serviceStats.totalVerbs === 0) {
3224
+ serviceStats.status = 'read-only';
3225
+ }
3226
+ result.push(serviceStats);
3227
+ }
3228
+ // Sort by last activity (most recent first)
3229
+ result.sort((a, b) => {
3230
+ if (!a.lastActivity && !b.lastActivity)
3231
+ return 0;
3232
+ if (!a.lastActivity)
3233
+ return 1;
3234
+ if (!b.lastActivity)
3235
+ return -1;
3236
+ return new Date(b.lastActivity).getTime() - new Date(a.lastActivity).getTime();
3237
+ });
3238
+ return result;
3239
+ }
3240
+ catch (error) {
3241
+ console.error('Failed to list services:', error);
3242
+ throw new Error(`Failed to list services: ${error}`);
3243
+ }
3244
+ }
3245
+ /**
3246
+ * Get statistics for a specific service
3247
+ * @param service The service name to get statistics for
3248
+ * @returns Service statistics or null if service not found
3249
+ */
3250
+ async getServiceStatistics(service) {
3251
+ await this.ensureInitialized();
3252
+ try {
3253
+ const stats = await this.storage.getStatistics();
3254
+ if (!stats) {
3255
+ return null;
3256
+ }
3257
+ // Check if service exists in any counter
3258
+ const hasData = (stats.nounCount[service] || 0) > 0 ||
3259
+ (stats.verbCount[service] || 0) > 0 ||
3260
+ (stats.metadataCount[service] || 0) > 0;
3261
+ if (!hasData && !stats.serviceActivity?.[service]) {
3262
+ return null;
3263
+ }
3264
+ const serviceStats = {
3265
+ name: service,
3266
+ totalNouns: stats.nounCount[service] || 0,
3267
+ totalVerbs: stats.verbCount[service] || 0,
3268
+ totalMetadata: stats.metadataCount[service] || 0
3269
+ };
3270
+ // Add activity timestamps if available
3271
+ if (stats.serviceActivity && stats.serviceActivity[service]) {
3272
+ const activity = stats.serviceActivity[service];
3273
+ serviceStats.firstActivity = activity.firstActivity;
3274
+ serviceStats.lastActivity = activity.lastActivity;
3275
+ serviceStats.operations = {
3276
+ adds: activity.totalOperations,
3277
+ updates: 0,
3278
+ deletes: 0
3279
+ };
3280
+ }
3281
+ // Determine status
3282
+ if (serviceStats.lastActivity) {
3283
+ const lastActivityTime = new Date(serviceStats.lastActivity).getTime();
3284
+ const now = Date.now();
3285
+ const hourAgo = now - 3600000;
3286
+ serviceStats.status = lastActivityTime > hourAgo ? 'active' : 'inactive';
3287
+ }
3288
+ else {
3289
+ serviceStats.status = 'inactive';
3290
+ }
3291
+ // Check if service is read-only
3292
+ if (serviceStats.totalNouns === 0 && serviceStats.totalVerbs === 0) {
3293
+ serviceStats.status = 'read-only';
3294
+ }
3295
+ return serviceStats;
3296
+ }
3297
+ catch (error) {
3298
+ console.error(`Failed to get statistics for service ${service}:`, error);
3299
+ throw new Error(`Failed to get statistics for service ${service}: ${error}`);
3300
+ }
3301
+ }
2948
3302
  /**
2949
3303
  * Check if the database is in read-only mode
2950
3304
  * @returns True if the database is in read-only mode, false otherwise
@@ -2963,6 +3317,29 @@ export class BrainyData {
2963
3317
  this.writeOnly = false;
2964
3318
  }
2965
3319
  }
3320
+ /**
3321
+ * Check if the database is frozen (completely immutable)
3322
+ * @returns True if the database is frozen, false otherwise
3323
+ */
3324
+ isFrozen() {
3325
+ return this.frozen;
3326
+ }
3327
+ /**
3328
+ * Set the database to frozen mode (completely immutable)
3329
+ * When frozen, no changes are allowed including statistics updates and index optimizations
3330
+ * @param frozen True to freeze the database, false to allow optimizations
3331
+ */
3332
+ setFrozen(frozen) {
3333
+ this.frozen = frozen;
3334
+ // If unfreezing and real-time updates are configured, restart them
3335
+ if (!frozen && this.realtimeUpdateConfig.enabled && this.isInitialized) {
3336
+ this.startRealtimeUpdates();
3337
+ }
3338
+ // If freezing, stop real-time updates
3339
+ else if (frozen && this.updateTimerId !== null) {
3340
+ this.stopRealtimeUpdates();
3341
+ }
3342
+ }
2966
3343
  /**
2967
3344
  * Check if the database is in write-only mode
2968
3345
  * @returns True if the database is in write-only mode, false otherwise
@@ -3090,7 +3467,7 @@ export class BrainyData {
3090
3467
  // First use the HNSW index to find similar vectors efficiently
3091
3468
  const searchResults = await this.index.search(queryVector, k * 2);
3092
3469
  // Get all verbs for filtering
3093
- const allVerbs = await this.storage.getAllVerbs();
3470
+ const allVerbs = await this.getAllVerbs();
3094
3471
  // Create a map of verb IDs for faster lookup
3095
3472
  const verbMap = new Map();
3096
3473
  for (const verb of allVerbs) {
@@ -3247,6 +3624,83 @@ export class BrainyData {
3247
3624
  throw new Error(`Failed to search nouns by verbs: ${error}`);
3248
3625
  }
3249
3626
  }
3627
+ /**
3628
+ * Get available filter values for a field
3629
+ * Useful for building dynamic filter UIs
3630
+ *
3631
+ * @param field The field name to get values for
3632
+ * @returns Array of available values for that field
3633
+ */
3634
+ async getFilterValues(field) {
3635
+ await this.ensureInitialized();
3636
+ if (!this.metadataIndex) {
3637
+ return [];
3638
+ }
3639
+ return this.metadataIndex.getFilterValues(field);
3640
+ }
3641
+ /**
3642
+ * Get all available filter fields
3643
+ * Useful for discovering what metadata fields are indexed
3644
+ *
3645
+ * @returns Array of indexed field names
3646
+ */
3647
+ async getFilterFields() {
3648
+ await this.ensureInitialized();
3649
+ if (!this.metadataIndex) {
3650
+ return [];
3651
+ }
3652
+ return this.metadataIndex.getFilterFields();
3653
+ }
3654
+ /**
3655
+ * Search within a specific set of items
3656
+ * This is useful when you've pre-filtered items and want to search only within them
3657
+ *
3658
+ * @param queryVectorOrData Query vector or data to search for
3659
+ * @param itemIds Array of item IDs to search within
3660
+ * @param k Number of results to return
3661
+ * @param options Additional options
3662
+ * @returns Array of search results
3663
+ */
3664
+ async searchWithinItems(queryVectorOrData, itemIds, k = 10, options = {}) {
3665
+ await this.ensureInitialized();
3666
+ // Check if database is in write-only mode
3667
+ this.checkWriteOnly();
3668
+ // Create a Set for fast lookups
3669
+ const allowedIds = new Set(itemIds);
3670
+ // Create filter function that only allows specified items
3671
+ const filterFunction = async (id) => allowedIds.has(id);
3672
+ // Get query vector
3673
+ let queryVector;
3674
+ if (Array.isArray(queryVectorOrData) && !options.forceEmbed) {
3675
+ queryVector = queryVectorOrData;
3676
+ }
3677
+ else {
3678
+ queryVector = await this.embeddingFunction(queryVectorOrData);
3679
+ }
3680
+ // Search with the filter
3681
+ const results = await this.index.search(queryVector, Math.min(k, itemIds.length), filterFunction);
3682
+ // Get metadata for each result
3683
+ const searchResults = [];
3684
+ for (const [id, score] of results) {
3685
+ const noun = this.index.getNouns().get(id);
3686
+ if (!noun)
3687
+ continue;
3688
+ let metadata = await this.storage.getMetadata(id);
3689
+ if (metadata === null) {
3690
+ metadata = {};
3691
+ }
3692
+ if (metadata && typeof metadata === 'object') {
3693
+ metadata = { ...metadata, id };
3694
+ }
3695
+ searchResults.push({
3696
+ id,
3697
+ score,
3698
+ vector: noun.vector,
3699
+ metadata: metadata
3700
+ });
3701
+ }
3702
+ return searchResults;
3703
+ }
3250
3704
  /**
3251
3705
  * Search for similar documents using a text query
3252
3706
  * This is a convenience method that embeds the query text and performs a search
@@ -3264,11 +3718,13 @@ export class BrainyData {
3264
3718
  try {
3265
3719
  // Embed the query text
3266
3720
  const queryVector = await this.embed(query);
3267
- // Search using the embedded vector
3721
+ // Search using the embedded vector with metadata filtering
3268
3722
  const results = await this.search(queryVector, k, {
3269
3723
  nounTypes: options.nounTypes,
3270
3724
  includeVerbs: options.includeVerbs,
3271
- searchMode: options.searchMode
3725
+ searchMode: options.searchMode,
3726
+ metadata: options.metadata,
3727
+ forceEmbed: false // Already embedded
3272
3728
  });
3273
3729
  // Track search performance
3274
3730
  const duration = Date.now() - searchStartTime;
@@ -3981,6 +4437,20 @@ export class BrainyData {
3981
4437
  clearInterval(this.updateTimerId);
3982
4438
  this.updateTimerId = null;
3983
4439
  }
4440
+ // Stop maintenance intervals
4441
+ for (const intervalId of this.maintenanceIntervals) {
4442
+ clearInterval(intervalId);
4443
+ }
4444
+ this.maintenanceIntervals = [];
4445
+ // Flush metadata index one last time
4446
+ if (this.metadataIndex) {
4447
+ try {
4448
+ await this.metadataIndex.flush();
4449
+ }
4450
+ catch (error) {
4451
+ console.warn('Error flushing metadata index during cleanup:', error);
4452
+ }
4453
+ }
3984
4454
  // Clean up distributed mode resources
3985
4455
  if (this.healthMonitor) {
3986
4456
  this.healthMonitor.stop();