@soulcraft/brainy 0.37.0 → 0.39.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/unified.js CHANGED
@@ -15417,6 +15417,1699 @@ class AugmentationPipeline {
15417
15417
  // Create and export a default instance of the pipeline
15418
15418
  const augmentationPipeline$1 = new AugmentationPipeline();
15419
15419
 
15420
+ /**
15421
+ * Distributed Configuration Manager
15422
+ * Manages shared configuration in S3 for distributed Brainy instances
15423
+ */
15424
+ class DistributedConfigManager {
15425
+ constructor(storage, distributedConfig, brainyMode) {
15426
+ this.config = null;
15427
+ this.lastConfigVersion = 0;
15428
+ this.storage = storage;
15429
+ this.instanceId = distributedConfig?.instanceId || `instance-${v4()}`;
15430
+ this.configPath = distributedConfig?.configPath || '_brainy/config.json';
15431
+ this.heartbeatInterval = distributedConfig?.heartbeatInterval || 30000;
15432
+ this.configCheckInterval = distributedConfig?.configCheckInterval || 10000;
15433
+ this.instanceTimeout = distributedConfig?.instanceTimeout || 60000;
15434
+ // Set role from distributed config if provided
15435
+ if (distributedConfig?.role) {
15436
+ this.role = distributedConfig.role;
15437
+ }
15438
+ // Infer role from Brainy's read/write mode if not explicitly set
15439
+ else if (brainyMode) {
15440
+ if (brainyMode.writeOnly) {
15441
+ this.role = 'writer';
15442
+ }
15443
+ else if (brainyMode.readOnly) {
15444
+ this.role = 'reader';
15445
+ }
15446
+ // If neither readOnly nor writeOnly, role must be explicitly set
15447
+ }
15448
+ }
15449
+ /**
15450
+ * Initialize the distributed configuration
15451
+ */
15452
+ async initialize() {
15453
+ // Load or create configuration
15454
+ this.config = await this.loadOrCreateConfig();
15455
+ // Determine role if not explicitly set
15456
+ if (!this.role) {
15457
+ this.role = await this.determineRole();
15458
+ }
15459
+ // Register this instance
15460
+ await this.registerInstance();
15461
+ // Start heartbeat and config watching
15462
+ this.startHeartbeat();
15463
+ this.startConfigWatch();
15464
+ return this.config;
15465
+ }
15466
+ /**
15467
+ * Load existing config or create new one
15468
+ */
15469
+ async loadOrCreateConfig() {
15470
+ try {
15471
+ // Use metadata storage with a special ID for config
15472
+ const configData = await this.storage.getMetadata('_distributed_config');
15473
+ if (configData) {
15474
+ this.lastConfigVersion = configData.version;
15475
+ return configData;
15476
+ }
15477
+ }
15478
+ catch (error) {
15479
+ // Config doesn't exist yet
15480
+ }
15481
+ // Create default config
15482
+ const newConfig = {
15483
+ version: 1,
15484
+ updated: new Date().toISOString(),
15485
+ settings: {
15486
+ partitionStrategy: 'hash',
15487
+ partitionCount: 100,
15488
+ embeddingModel: 'text-embedding-ada-002',
15489
+ dimensions: 1536,
15490
+ distanceMetric: 'cosine',
15491
+ hnswParams: {
15492
+ M: 16,
15493
+ efConstruction: 200
15494
+ }
15495
+ },
15496
+ instances: {}
15497
+ };
15498
+ await this.saveConfig(newConfig);
15499
+ return newConfig;
15500
+ }
15501
+ /**
15502
+ * Determine role based on configuration
15503
+ * IMPORTANT: Role must be explicitly set - no automatic assignment based on order
15504
+ */
15505
+ async determineRole() {
15506
+ // Check environment variable first
15507
+ if (process.env.BRAINY_ROLE) {
15508
+ const role = process.env.BRAINY_ROLE.toLowerCase();
15509
+ if (role === 'writer' || role === 'reader' || role === 'hybrid') {
15510
+ return role;
15511
+ }
15512
+ throw new Error(`Invalid BRAINY_ROLE: ${process.env.BRAINY_ROLE}. Must be 'writer', 'reader', or 'hybrid'`);
15513
+ }
15514
+ // Check if explicitly passed in distributed config
15515
+ if (this.role) {
15516
+ return this.role;
15517
+ }
15518
+ // DO NOT auto-assign roles based on deployment order or existing instances
15519
+ // This is dangerous and can lead to data corruption or loss
15520
+ throw new Error('Distributed mode requires explicit role configuration. ' +
15521
+ 'Set BRAINY_ROLE environment variable or pass role in distributed config. ' +
15522
+ 'Valid roles: "writer", "reader", "hybrid"');
15523
+ }
15524
+ /**
15525
+ * Check if an instance is still alive
15526
+ */
15527
+ isInstanceAlive(instance) {
15528
+ const lastSeen = new Date(instance.lastHeartbeat).getTime();
15529
+ const now = Date.now();
15530
+ return (now - lastSeen) < this.instanceTimeout;
15531
+ }
15532
+ /**
15533
+ * Register this instance in the shared config
15534
+ */
15535
+ async registerInstance() {
15536
+ if (!this.config)
15537
+ return;
15538
+ // Role must be set by this point
15539
+ if (!this.role) {
15540
+ throw new Error('Cannot register instance without a role');
15541
+ }
15542
+ const instanceInfo = {
15543
+ role: this.role,
15544
+ status: 'active',
15545
+ lastHeartbeat: new Date().toISOString(),
15546
+ metrics: {
15547
+ memoryUsage: process.memoryUsage().heapUsed
15548
+ }
15549
+ };
15550
+ // Add endpoint if available
15551
+ if (process.env.SERVICE_ENDPOINT) {
15552
+ instanceInfo.endpoint = process.env.SERVICE_ENDPOINT;
15553
+ }
15554
+ this.config.instances[this.instanceId] = instanceInfo;
15555
+ await this.saveConfig(this.config);
15556
+ }
15557
+ /**
15558
+ * Save configuration with version increment
15559
+ */
15560
+ async saveConfig(config) {
15561
+ config.version++;
15562
+ config.updated = new Date().toISOString();
15563
+ this.lastConfigVersion = config.version;
15564
+ // Use metadata storage with a special ID for config
15565
+ await this.storage.saveMetadata('_distributed_config', config);
15566
+ this.config = config;
15567
+ }
15568
+ /**
15569
+ * Start heartbeat to keep instance alive in config
15570
+ */
15571
+ startHeartbeat() {
15572
+ this.heartbeatTimer = setInterval(async () => {
15573
+ await this.updateHeartbeat();
15574
+ }, this.heartbeatInterval);
15575
+ }
15576
+ /**
15577
+ * Update heartbeat and clean stale instances
15578
+ */
15579
+ async updateHeartbeat() {
15580
+ if (!this.config)
15581
+ return;
15582
+ // Reload config to get latest state
15583
+ try {
15584
+ const latestConfig = await this.loadConfig();
15585
+ if (latestConfig) {
15586
+ this.config = latestConfig;
15587
+ }
15588
+ }
15589
+ catch (error) {
15590
+ console.error('Failed to reload config:', error);
15591
+ }
15592
+ // Update our heartbeat
15593
+ if (this.config.instances[this.instanceId]) {
15594
+ this.config.instances[this.instanceId].lastHeartbeat = new Date().toISOString();
15595
+ this.config.instances[this.instanceId].status = 'active';
15596
+ // Update metrics if available
15597
+ this.config.instances[this.instanceId].metrics = {
15598
+ memoryUsage: process.memoryUsage().heapUsed
15599
+ };
15600
+ }
15601
+ else {
15602
+ // Re-register if we were removed
15603
+ await this.registerInstance();
15604
+ return;
15605
+ }
15606
+ // Clean up stale instances
15607
+ const now = Date.now();
15608
+ let hasChanges = false;
15609
+ for (const [id, instance] of Object.entries(this.config.instances)) {
15610
+ if (id === this.instanceId)
15611
+ continue;
15612
+ const lastSeen = new Date(instance.lastHeartbeat).getTime();
15613
+ if (now - lastSeen > this.instanceTimeout) {
15614
+ delete this.config.instances[id];
15615
+ hasChanges = true;
15616
+ }
15617
+ }
15618
+ // Save if there were changes
15619
+ if (hasChanges) {
15620
+ await this.saveConfig(this.config);
15621
+ }
15622
+ else {
15623
+ // Just update our heartbeat without version increment
15624
+ await this.storage.saveMetadata('_distributed_config', this.config);
15625
+ }
15626
+ }
15627
+ /**
15628
+ * Start watching for config changes
15629
+ */
15630
+ startConfigWatch() {
15631
+ this.configWatchTimer = setInterval(async () => {
15632
+ await this.checkForConfigUpdates();
15633
+ }, this.configCheckInterval);
15634
+ }
15635
+ /**
15636
+ * Check for configuration updates
15637
+ */
15638
+ async checkForConfigUpdates() {
15639
+ try {
15640
+ const latestConfig = await this.loadConfig();
15641
+ if (!latestConfig)
15642
+ return;
15643
+ if (latestConfig.version > this.lastConfigVersion) {
15644
+ this.config = latestConfig;
15645
+ this.lastConfigVersion = latestConfig.version;
15646
+ // Notify listeners of config update
15647
+ if (this.onConfigUpdate) {
15648
+ this.onConfigUpdate(latestConfig);
15649
+ }
15650
+ }
15651
+ }
15652
+ catch (error) {
15653
+ console.error('Failed to check config updates:', error);
15654
+ }
15655
+ }
15656
+ /**
15657
+ * Load configuration from storage
15658
+ */
15659
+ async loadConfig() {
15660
+ try {
15661
+ const configData = await this.storage.getMetadata('_distributed_config');
15662
+ if (configData) {
15663
+ return configData;
15664
+ }
15665
+ }
15666
+ catch (error) {
15667
+ console.error('Failed to load config:', error);
15668
+ }
15669
+ return null;
15670
+ }
15671
+ /**
15672
+ * Get current configuration
15673
+ */
15674
+ getConfig() {
15675
+ return this.config;
15676
+ }
15677
+ /**
15678
+ * Get instance role
15679
+ */
15680
+ getRole() {
15681
+ if (!this.role) {
15682
+ throw new Error('Role not initialized');
15683
+ }
15684
+ return this.role;
15685
+ }
15686
+ /**
15687
+ * Get instance ID
15688
+ */
15689
+ getInstanceId() {
15690
+ return this.instanceId;
15691
+ }
15692
+ /**
15693
+ * Set config update callback
15694
+ */
15695
+ setOnConfigUpdate(callback) {
15696
+ this.onConfigUpdate = callback;
15697
+ }
15698
+ /**
15699
+ * Get all active instances of a specific role
15700
+ */
15701
+ getInstancesByRole(role) {
15702
+ if (!this.config)
15703
+ return [];
15704
+ return Object.entries(this.config.instances)
15705
+ .filter(([_, instance]) => instance.role === role &&
15706
+ this.isInstanceAlive(instance))
15707
+ .map(([_, instance]) => instance);
15708
+ }
15709
+ /**
15710
+ * Update instance metrics
15711
+ */
15712
+ async updateMetrics(metrics) {
15713
+ if (!this.config || !this.config.instances[this.instanceId])
15714
+ return;
15715
+ this.config.instances[this.instanceId].metrics = {
15716
+ ...this.config.instances[this.instanceId].metrics,
15717
+ ...metrics
15718
+ };
15719
+ // Don't increment version for metric updates
15720
+ await this.storage.saveMetadata('_distributed_config', this.config);
15721
+ }
15722
+ /**
15723
+ * Cleanup resources
15724
+ */
15725
+ async cleanup() {
15726
+ // Stop timers
15727
+ if (this.heartbeatTimer) {
15728
+ clearInterval(this.heartbeatTimer);
15729
+ }
15730
+ if (this.configWatchTimer) {
15731
+ clearInterval(this.configWatchTimer);
15732
+ }
15733
+ // Mark instance as inactive
15734
+ if (this.config && this.config.instances[this.instanceId]) {
15735
+ this.config.instances[this.instanceId].status = 'inactive';
15736
+ await this.saveConfig(this.config);
15737
+ }
15738
+ }
15739
+ }
15740
+
15741
+ /**
15742
+ * Cross-platform crypto utilities
15743
+ * Provides hashing functions that work in both Node.js and browser environments
15744
+ */
15745
+ /**
15746
+ * Simple string hash function that works in all environments
15747
+ * Uses djb2 algorithm - fast and good distribution
15748
+ * @param str - String to hash
15749
+ * @returns Positive integer hash
15750
+ */
15751
+ function hashString(str) {
15752
+ let hash = 5381;
15753
+ for (let i = 0; i < str.length; i++) {
15754
+ const char = str.charCodeAt(i);
15755
+ hash = ((hash << 5) + hash) + char; // hash * 33 + char
15756
+ }
15757
+ // Ensure positive number
15758
+ return Math.abs(hash);
15759
+ }
15760
+ /**
15761
+ * Generate a deterministic hash for partitioning
15762
+ * Uses the most appropriate algorithm for the environment
15763
+ * @param input - Input string to hash
15764
+ * @returns Positive integer hash suitable for modulo operations
15765
+ */
15766
+ function getPartitionHash(input) {
15767
+ // Use djb2 by default as it's fast and has good distribution
15768
+ // This ensures consistent partitioning across all environments
15769
+ return hashString(input);
15770
+ }
15771
+
15772
+ /**
15773
+ * Hash-based Partitioner
15774
+ * Provides deterministic partitioning for distributed writes
15775
+ */
15776
+ class HashPartitioner {
15777
+ constructor(config) {
15778
+ this.partitionPrefix = 'vectors/p';
15779
+ this.partitionCount = config.settings.partitionCount || 100;
15780
+ }
15781
+ /**
15782
+ * Get partition for a given vector ID using deterministic hashing
15783
+ * @param vectorId - The unique identifier of the vector
15784
+ * @returns The partition path
15785
+ */
15786
+ getPartition(vectorId) {
15787
+ const hash = this.hashString(vectorId);
15788
+ const partitionIndex = hash % this.partitionCount;
15789
+ return `${this.partitionPrefix}${partitionIndex.toString().padStart(3, '0')}`;
15790
+ }
15791
+ /**
15792
+ * Get partition with domain metadata (domain stored as metadata, not in path)
15793
+ * @param vectorId - The unique identifier of the vector
15794
+ * @param domain - The domain identifier (for metadata only)
15795
+ * @returns The partition path
15796
+ */
15797
+ getPartitionWithDomain(vectorId, domain) {
15798
+ // Domain doesn't affect partitioning - it's just metadata
15799
+ return this.getPartition(vectorId);
15800
+ }
15801
+ /**
15802
+ * Get all partition paths
15803
+ * @returns Array of all partition paths
15804
+ */
15805
+ getAllPartitions() {
15806
+ const partitions = [];
15807
+ for (let i = 0; i < this.partitionCount; i++) {
15808
+ partitions.push(`${this.partitionPrefix}${i.toString().padStart(3, '0')}`);
15809
+ }
15810
+ return partitions;
15811
+ }
15812
+ /**
15813
+ * Get partition index from partition path
15814
+ * @param partitionPath - The partition path
15815
+ * @returns The partition index
15816
+ */
15817
+ getPartitionIndex(partitionPath) {
15818
+ const match = partitionPath.match(/p(\d+)$/);
15819
+ if (match) {
15820
+ return parseInt(match[1], 10);
15821
+ }
15822
+ throw new Error(`Invalid partition path: ${partitionPath}`);
15823
+ }
15824
+ /**
15825
+ * Hash a string to a number for consistent partitioning
15826
+ * @param str - The string to hash
15827
+ * @returns A positive integer hash
15828
+ */
15829
+ hashString(str) {
15830
+ // Use our cross-platform hash function
15831
+ return getPartitionHash(str);
15832
+ }
15833
+ /**
15834
+ * Get partitions for batch operations
15835
+ * Groups vector IDs by their target partition
15836
+ * @param vectorIds - Array of vector IDs
15837
+ * @returns Map of partition to vector IDs
15838
+ */
15839
+ getPartitionsForBatch(vectorIds) {
15840
+ const partitionMap = new Map();
15841
+ for (const id of vectorIds) {
15842
+ const partition = this.getPartition(id);
15843
+ if (!partitionMap.has(partition)) {
15844
+ partitionMap.set(partition, []);
15845
+ }
15846
+ partitionMap.get(partition).push(id);
15847
+ }
15848
+ return partitionMap;
15849
+ }
15850
+ }
15851
+
15852
+ /**
15853
+ * Operational Modes for Distributed Brainy
15854
+ * Defines different modes with optimized caching strategies
15855
+ */
15856
+ /**
15857
+ * Base operational mode
15858
+ */
15859
+ class BaseOperationalMode {
15860
+ /**
15861
+ * Validate operation is allowed in this mode
15862
+ */
15863
+ validateOperation(operation) {
15864
+ switch (operation) {
15865
+ case 'read':
15866
+ if (!this.canRead) {
15867
+ throw new Error('Read operations are not allowed in write-only mode');
15868
+ }
15869
+ break;
15870
+ case 'write':
15871
+ if (!this.canWrite) {
15872
+ throw new Error('Write operations are not allowed in read-only mode');
15873
+ }
15874
+ break;
15875
+ case 'delete':
15876
+ if (!this.canDelete) {
15877
+ throw new Error('Delete operations are not allowed in this mode');
15878
+ }
15879
+ break;
15880
+ }
15881
+ }
15882
+ }
15883
+ /**
15884
+ * Read-only mode optimized for query performance
15885
+ */
15886
+ class ReaderMode extends BaseOperationalMode {
15887
+ constructor() {
15888
+ super(...arguments);
15889
+ this.canRead = true;
15890
+ this.canWrite = false;
15891
+ this.canDelete = false;
15892
+ this.cacheStrategy = {
15893
+ hotCacheRatio: 0.8, // 80% of memory for read cache
15894
+ prefetchAggressive: true, // Aggressively prefetch related vectors
15895
+ ttl: 3600000, // 1 hour cache TTL
15896
+ compressionEnabled: true, // Trade CPU for more cache capacity
15897
+ writeBufferSize: 0, // No write buffer needed
15898
+ batchWrites: false, // No writes
15899
+ adaptive: true // Adapt to query patterns
15900
+ };
15901
+ }
15902
+ /**
15903
+ * Get optimized cache configuration for readers
15904
+ */
15905
+ getCacheConfig() {
15906
+ return {
15907
+ hotCacheMaxSize: 1000000, // Large hot cache
15908
+ hotCacheEvictionThreshold: 0.9, // Keep cache full
15909
+ warmCacheTTL: 3600000, // 1 hour warm cache
15910
+ batchSize: 100, // Large batch reads
15911
+ autoTune: true, // Auto-tune for read patterns
15912
+ autoTuneInterval: 60000, // Tune every minute
15913
+ readOnly: true // Enable read-only optimizations
15914
+ };
15915
+ }
15916
+ }
15917
+ /**
15918
+ * Write-only mode optimized for ingestion
15919
+ */
15920
+ class WriterMode extends BaseOperationalMode {
15921
+ constructor() {
15922
+ super(...arguments);
15923
+ this.canRead = false;
15924
+ this.canWrite = true;
15925
+ this.canDelete = true;
15926
+ this.cacheStrategy = {
15927
+ hotCacheRatio: 0.2, // Only 20% for cache, rest for write buffer
15928
+ prefetchAggressive: false, // No prefetching needed
15929
+ ttl: 60000, // Short TTL (1 minute)
15930
+ compressionEnabled: false, // Speed over memory efficiency
15931
+ writeBufferSize: 10000, // Large write buffer for batching
15932
+ batchWrites: true, // Enable write batching
15933
+ adaptive: false // Fixed strategy for consistent writes
15934
+ };
15935
+ }
15936
+ /**
15937
+ * Get optimized cache configuration for writers
15938
+ */
15939
+ getCacheConfig() {
15940
+ return {
15941
+ hotCacheMaxSize: 100000, // Small hot cache
15942
+ hotCacheEvictionThreshold: 0.5, // Aggressive eviction
15943
+ warmCacheTTL: 60000, // 1 minute warm cache
15944
+ batchSize: 1000, // Large batch writes
15945
+ autoTune: false, // Fixed configuration
15946
+ writeOnly: true // Enable write-only optimizations
15947
+ };
15948
+ }
15949
+ }
15950
+ /**
15951
+ * Hybrid mode that can both read and write
15952
+ */
15953
+ class HybridMode extends BaseOperationalMode {
15954
+ constructor() {
15955
+ super(...arguments);
15956
+ this.canRead = true;
15957
+ this.canWrite = true;
15958
+ this.canDelete = true;
15959
+ this.cacheStrategy = {
15960
+ hotCacheRatio: 0.5, // Balanced cache/buffer allocation
15961
+ prefetchAggressive: false, // Moderate prefetching
15962
+ ttl: 600000, // 10 minute TTL
15963
+ compressionEnabled: true, // Compress when beneficial
15964
+ writeBufferSize: 5000, // Moderate write buffer
15965
+ batchWrites: true, // Batch writes when possible
15966
+ adaptive: true // Adapt to workload mix
15967
+ };
15968
+ this.readWriteRatio = 0.5; // Track read/write ratio
15969
+ }
15970
+ /**
15971
+ * Get balanced cache configuration
15972
+ */
15973
+ getCacheConfig() {
15974
+ return {
15975
+ hotCacheMaxSize: 500000, // Medium cache size
15976
+ hotCacheEvictionThreshold: 0.7, // Balanced eviction
15977
+ warmCacheTTL: 600000, // 10 minute warm cache
15978
+ batchSize: 500, // Medium batch size
15979
+ autoTune: true, // Auto-tune based on workload
15980
+ autoTuneInterval: 300000 // Tune every 5 minutes
15981
+ };
15982
+ }
15983
+ /**
15984
+ * Update cache strategy based on workload
15985
+ * @param readCount - Number of recent reads
15986
+ * @param writeCount - Number of recent writes
15987
+ */
15988
+ updateWorkloadBalance(readCount, writeCount) {
15989
+ const total = readCount + writeCount;
15990
+ if (total === 0)
15991
+ return;
15992
+ this.readWriteRatio = readCount / total;
15993
+ // Adjust cache strategy based on workload
15994
+ if (this.readWriteRatio > 0.8) {
15995
+ // Read-heavy workload
15996
+ this.cacheStrategy.hotCacheRatio = 0.7;
15997
+ this.cacheStrategy.prefetchAggressive = true;
15998
+ this.cacheStrategy.writeBufferSize = 2000;
15999
+ }
16000
+ else if (this.readWriteRatio < 0.2) {
16001
+ // Write-heavy workload
16002
+ this.cacheStrategy.hotCacheRatio = 0.3;
16003
+ this.cacheStrategy.prefetchAggressive = false;
16004
+ this.cacheStrategy.writeBufferSize = 8000;
16005
+ }
16006
+ else {
16007
+ // Balanced workload
16008
+ this.cacheStrategy.hotCacheRatio = 0.5;
16009
+ this.cacheStrategy.prefetchAggressive = false;
16010
+ this.cacheStrategy.writeBufferSize = 5000;
16011
+ }
16012
+ }
16013
+ }
16014
+ /**
16015
+ * Factory for creating operational modes
16016
+ */
16017
+ class OperationalModeFactory {
16018
+ /**
16019
+ * Create operational mode based on role
16020
+ * @param role - The instance role
16021
+ * @returns The appropriate operational mode
16022
+ */
16023
+ static createMode(role) {
16024
+ switch (role) {
16025
+ case 'reader':
16026
+ return new ReaderMode();
16027
+ case 'writer':
16028
+ return new WriterMode();
16029
+ case 'hybrid':
16030
+ return new HybridMode();
16031
+ default:
16032
+ // Default to reader for safety
16033
+ return new ReaderMode();
16034
+ }
16035
+ }
16036
+ /**
16037
+ * Create mode with custom cache strategy
16038
+ * @param role - The instance role
16039
+ * @param customStrategy - Custom cache strategy overrides
16040
+ * @returns The operational mode with custom strategy
16041
+ */
16042
+ static createModeWithStrategy(role, customStrategy) {
16043
+ const mode = this.createMode(role);
16044
+ // Apply custom strategy overrides
16045
+ mode.cacheStrategy = {
16046
+ ...mode.cacheStrategy,
16047
+ ...customStrategy
16048
+ };
16049
+ return mode;
16050
+ }
16051
+ }
16052
+
16053
+ /**
16054
+ * Domain Detector
16055
+ * Automatically detects and manages data domains for logical separation
16056
+ */
16057
+ class DomainDetector {
16058
+ constructor() {
16059
+ this.domainPatterns = [
16060
+ {
16061
+ domain: 'medical',
16062
+ patterns: {
16063
+ fields: ['symptoms', 'diagnosis', 'treatment', 'medication', 'patient'],
16064
+ keywords: ['medical', 'health', 'disease', 'symptom', 'treatment', 'doctor', 'patient']
16065
+ },
16066
+ priority: 1
16067
+ },
16068
+ {
16069
+ domain: 'legal',
16070
+ patterns: {
16071
+ fields: ['contract', 'clause', 'litigation', 'statute', 'jurisdiction'],
16072
+ keywords: ['legal', 'law', 'contract', 'court', 'attorney', 'litigation', 'statute']
16073
+ },
16074
+ priority: 1
16075
+ },
16076
+ {
16077
+ domain: 'product',
16078
+ patterns: {
16079
+ fields: ['price', 'sku', 'inventory', 'category', 'brand'],
16080
+ keywords: ['product', 'price', 'sale', 'inventory', 'catalog', 'item', 'sku']
16081
+ },
16082
+ priority: 1
16083
+ },
16084
+ {
16085
+ domain: 'customer',
16086
+ patterns: {
16087
+ fields: ['customerId', 'email', 'phone', 'address', 'orders'],
16088
+ keywords: ['customer', 'client', 'user', 'account', 'profile', 'contact']
16089
+ },
16090
+ priority: 1
16091
+ },
16092
+ {
16093
+ domain: 'financial',
16094
+ patterns: {
16095
+ fields: ['amount', 'currency', 'transaction', 'balance', 'account'],
16096
+ keywords: ['financial', 'money', 'payment', 'transaction', 'bank', 'credit', 'debit']
16097
+ },
16098
+ priority: 1
16099
+ },
16100
+ {
16101
+ domain: 'technical',
16102
+ patterns: {
16103
+ fields: ['code', 'function', 'error', 'stack', 'api'],
16104
+ keywords: ['code', 'software', 'api', 'error', 'debug', 'function', 'class', 'method']
16105
+ },
16106
+ priority: 2
16107
+ }
16108
+ ];
16109
+ this.customPatterns = [];
16110
+ this.domainStats = new Map();
16111
+ }
16112
+ /**
16113
+ * Detect domain from data object
16114
+ * @param data - The data object to analyze
16115
+ * @returns The detected domain and metadata
16116
+ */
16117
+ detectDomain(data) {
16118
+ if (!data || typeof data !== 'object') {
16119
+ return { domain: 'general' };
16120
+ }
16121
+ // Check for explicit domain field
16122
+ if (data.domain && typeof data.domain === 'string') {
16123
+ this.updateStats(data.domain);
16124
+ return {
16125
+ domain: data.domain,
16126
+ domainMetadata: this.extractDomainMetadata(data, data.domain)
16127
+ };
16128
+ }
16129
+ // Score each domain pattern
16130
+ const scores = new Map();
16131
+ // Check custom patterns first (higher priority)
16132
+ for (const pattern of this.customPatterns) {
16133
+ const score = this.scorePattern(data, pattern);
16134
+ if (score > 0) {
16135
+ scores.set(pattern.domain, score * (pattern.priority || 1));
16136
+ }
16137
+ }
16138
+ // Check default patterns
16139
+ for (const pattern of this.domainPatterns) {
16140
+ const score = this.scorePattern(data, pattern);
16141
+ if (score > 0) {
16142
+ const currentScore = scores.get(pattern.domain) || 0;
16143
+ scores.set(pattern.domain, currentScore + score * (pattern.priority || 1));
16144
+ }
16145
+ }
16146
+ // Find highest scoring domain
16147
+ let bestDomain = 'general';
16148
+ let bestScore = 0;
16149
+ for (const [domain, score] of scores.entries()) {
16150
+ if (score > bestScore) {
16151
+ bestDomain = domain;
16152
+ bestScore = score;
16153
+ }
16154
+ }
16155
+ this.updateStats(bestDomain);
16156
+ return {
16157
+ domain: bestDomain,
16158
+ domainMetadata: this.extractDomainMetadata(data, bestDomain)
16159
+ };
16160
+ }
16161
+ /**
16162
+ * Score a data object against a domain pattern
16163
+ */
16164
+ scorePattern(data, pattern) {
16165
+ let score = 0;
16166
+ // Check field matches
16167
+ if (pattern.patterns.fields) {
16168
+ const dataKeys = Object.keys(data);
16169
+ for (const field of pattern.patterns.fields) {
16170
+ if (dataKeys.some(key => key.toLowerCase().includes(field.toLowerCase()))) {
16171
+ score += 2; // Field match is strong signal
16172
+ }
16173
+ }
16174
+ }
16175
+ // Check keyword matches in values
16176
+ if (pattern.patterns.keywords) {
16177
+ const dataStr = JSON.stringify(data).toLowerCase();
16178
+ for (const keyword of pattern.patterns.keywords) {
16179
+ if (dataStr.includes(keyword.toLowerCase())) {
16180
+ score += 1;
16181
+ }
16182
+ }
16183
+ }
16184
+ // Check regex patterns
16185
+ if (pattern.patterns.regex) {
16186
+ const dataStr = JSON.stringify(data);
16187
+ if (pattern.patterns.regex.test(dataStr)) {
16188
+ score += 3; // Regex match is very specific
16189
+ }
16190
+ }
16191
+ return score;
16192
+ }
16193
+ /**
16194
+ * Extract domain-specific metadata
16195
+ */
16196
+ extractDomainMetadata(data, domain) {
16197
+ const metadata = {};
16198
+ switch (domain) {
16199
+ case 'medical':
16200
+ if (data.patientId)
16201
+ metadata.patientId = data.patientId;
16202
+ if (data.condition)
16203
+ metadata.condition = data.condition;
16204
+ if (data.severity)
16205
+ metadata.severity = data.severity;
16206
+ break;
16207
+ case 'legal':
16208
+ if (data.caseId)
16209
+ metadata.caseId = data.caseId;
16210
+ if (data.jurisdiction)
16211
+ metadata.jurisdiction = data.jurisdiction;
16212
+ if (data.documentType)
16213
+ metadata.documentType = data.documentType;
16214
+ break;
16215
+ case 'product':
16216
+ if (data.sku)
16217
+ metadata.sku = data.sku;
16218
+ if (data.category)
16219
+ metadata.category = data.category;
16220
+ if (data.brand)
16221
+ metadata.brand = data.brand;
16222
+ if (data.price)
16223
+ metadata.priceRange = this.getPriceRange(data.price);
16224
+ break;
16225
+ case 'customer':
16226
+ if (data.customerId)
16227
+ metadata.customerId = data.customerId;
16228
+ if (data.segment)
16229
+ metadata.segment = data.segment;
16230
+ if (data.lifetime_value)
16231
+ metadata.valueCategory = this.getValueCategory(data.lifetime_value);
16232
+ break;
16233
+ case 'financial':
16234
+ if (data.accountId)
16235
+ metadata.accountId = data.accountId;
16236
+ if (data.transactionType)
16237
+ metadata.transactionType = data.transactionType;
16238
+ if (data.amount)
16239
+ metadata.amountRange = this.getAmountRange(data.amount);
16240
+ break;
16241
+ case 'technical':
16242
+ if (data.service)
16243
+ metadata.service = data.service;
16244
+ if (data.environment)
16245
+ metadata.environment = data.environment;
16246
+ if (data.severity)
16247
+ metadata.severity = data.severity;
16248
+ break;
16249
+ }
16250
+ // Add detection confidence
16251
+ metadata.detectionConfidence = this.calculateConfidence(data, domain);
16252
+ return metadata;
16253
+ }
16254
+ /**
16255
+ * Calculate detection confidence
16256
+ */
16257
+ calculateConfidence(data, domain) {
16258
+ // If domain was explicitly specified
16259
+ if (data.domain === domain)
16260
+ return 'high';
16261
+ // Check how many patterns matched
16262
+ const pattern = [...this.customPatterns, ...this.domainPatterns]
16263
+ .find(p => p.domain === domain);
16264
+ if (!pattern)
16265
+ return 'low';
16266
+ const score = this.scorePattern(data, pattern);
16267
+ if (score >= 5)
16268
+ return 'high';
16269
+ if (score >= 2)
16270
+ return 'medium';
16271
+ return 'low';
16272
+ }
16273
+ /**
16274
+ * Categorize price ranges
16275
+ */
16276
+ getPriceRange(price) {
16277
+ if (price < 10)
16278
+ return 'low';
16279
+ if (price < 100)
16280
+ return 'medium';
16281
+ if (price < 1000)
16282
+ return 'high';
16283
+ return 'premium';
16284
+ }
16285
+ /**
16286
+ * Categorize customer value
16287
+ */
16288
+ getValueCategory(value) {
16289
+ if (value < 100)
16290
+ return 'low';
16291
+ if (value < 1000)
16292
+ return 'medium';
16293
+ if (value < 10000)
16294
+ return 'high';
16295
+ return 'vip';
16296
+ }
16297
+ /**
16298
+ * Categorize amount ranges
16299
+ */
16300
+ getAmountRange(amount) {
16301
+ if (amount < 100)
16302
+ return 'micro';
16303
+ if (amount < 1000)
16304
+ return 'small';
16305
+ if (amount < 10000)
16306
+ return 'medium';
16307
+ if (amount < 100000)
16308
+ return 'large';
16309
+ return 'enterprise';
16310
+ }
16311
+ /**
16312
+ * Add custom domain pattern
16313
+ * @param pattern - Custom domain pattern to add
16314
+ */
16315
+ addCustomPattern(pattern) {
16316
+ // Remove existing pattern for same domain if exists
16317
+ this.customPatterns = this.customPatterns.filter(p => p.domain !== pattern.domain);
16318
+ this.customPatterns.push(pattern);
16319
+ }
16320
+ /**
16321
+ * Remove custom domain pattern
16322
+ * @param domain - Domain to remove pattern for
16323
+ */
16324
+ removeCustomPattern(domain) {
16325
+ this.customPatterns = this.customPatterns.filter(p => p.domain !== domain);
16326
+ }
16327
+ /**
16328
+ * Update domain statistics
16329
+ */
16330
+ updateStats(domain) {
16331
+ const count = this.domainStats.get(domain) || 0;
16332
+ this.domainStats.set(domain, count + 1);
16333
+ }
16334
+ /**
16335
+ * Get domain statistics
16336
+ * @returns Map of domain to count
16337
+ */
16338
+ getDomainStats() {
16339
+ return new Map(this.domainStats);
16340
+ }
16341
+ /**
16342
+ * Clear domain statistics
16343
+ */
16344
+ clearStats() {
16345
+ this.domainStats.clear();
16346
+ }
16347
+ /**
16348
+ * Get all configured domains
16349
+ * @returns Array of domain names
16350
+ */
16351
+ getConfiguredDomains() {
16352
+ const domains = new Set();
16353
+ for (const pattern of [...this.domainPatterns, ...this.customPatterns]) {
16354
+ domains.add(pattern.domain);
16355
+ }
16356
+ return Array.from(domains).sort();
16357
+ }
16358
+ }
16359
+
16360
+ /**
16361
+ * Health Monitor
16362
+ * Monitors and reports instance health in distributed deployments
16363
+ */
16364
+ class HealthMonitor {
16365
+ constructor(configManager) {
16366
+ this.requestCount = 0;
16367
+ this.errorCount = 0;
16368
+ this.totalLatency = 0;
16369
+ this.cacheHits = 0;
16370
+ this.cacheMisses = 0;
16371
+ this.vectorCount = 0;
16372
+ this.checkInterval = 30000; // 30 seconds
16373
+ this.metricsWindow = []; // Sliding window for RPS calculation
16374
+ this.latencyWindow = []; // Sliding window for latency
16375
+ this.windowSize = 60000; // 1 minute window
16376
+ this.configManager = configManager;
16377
+ this.startTime = Date.now();
16378
+ }
16379
+ /**
16380
+ * Start health monitoring
16381
+ */
16382
+ start() {
16383
+ // Initial health update
16384
+ this.updateHealth();
16385
+ // Schedule periodic health checks
16386
+ this.healthCheckTimer = setInterval(() => {
16387
+ this.updateHealth();
16388
+ }, this.checkInterval);
16389
+ }
16390
+ /**
16391
+ * Stop health monitoring
16392
+ */
16393
+ stop() {
16394
+ if (this.healthCheckTimer) {
16395
+ clearInterval(this.healthCheckTimer);
16396
+ this.healthCheckTimer = undefined;
16397
+ }
16398
+ }
16399
+ /**
16400
+ * Update health status and metrics
16401
+ */
16402
+ async updateHealth() {
16403
+ const metrics = this.collectMetrics();
16404
+ // Update config with latest metrics
16405
+ await this.configManager.updateMetrics({
16406
+ vectorCount: metrics.vectorCount,
16407
+ cacheHitRate: metrics.cacheHitRate,
16408
+ memoryUsage: metrics.memoryUsage,
16409
+ cpuUsage: metrics.cpuUsage
16410
+ });
16411
+ // Clean sliding windows
16412
+ this.cleanWindows();
16413
+ }
16414
+ /**
16415
+ * Collect current metrics
16416
+ */
16417
+ collectMetrics() {
16418
+ const memUsage = process.memoryUsage();
16419
+ return {
16420
+ vectorCount: this.vectorCount,
16421
+ cacheHitRate: this.calculateCacheHitRate(),
16422
+ memoryUsage: memUsage.heapUsed,
16423
+ cpuUsage: this.getCPUUsage(),
16424
+ requestsPerSecond: this.calculateRPS(),
16425
+ averageLatency: this.calculateAverageLatency(),
16426
+ errorRate: this.calculateErrorRate()
16427
+ };
16428
+ }
16429
+ /**
16430
+ * Calculate cache hit rate
16431
+ */
16432
+ calculateCacheHitRate() {
16433
+ const total = this.cacheHits + this.cacheMisses;
16434
+ if (total === 0)
16435
+ return 0;
16436
+ return this.cacheHits / total;
16437
+ }
16438
+ /**
16439
+ * Calculate requests per second
16440
+ */
16441
+ calculateRPS() {
16442
+ const now = Date.now();
16443
+ const recentRequests = this.metricsWindow.filter(timestamp => now - timestamp < this.windowSize);
16444
+ return recentRequests.length / (this.windowSize / 1000);
16445
+ }
16446
+ /**
16447
+ * Calculate average latency
16448
+ */
16449
+ calculateAverageLatency() {
16450
+ if (this.latencyWindow.length === 0)
16451
+ return 0;
16452
+ const sum = this.latencyWindow.reduce((a, b) => a + b, 0);
16453
+ return sum / this.latencyWindow.length;
16454
+ }
16455
+ /**
16456
+ * Calculate error rate
16457
+ */
16458
+ calculateErrorRate() {
16459
+ if (this.requestCount === 0)
16460
+ return 0;
16461
+ return this.errorCount / this.requestCount;
16462
+ }
16463
+ /**
16464
+ * Get CPU usage (simplified)
16465
+ */
16466
+ getCPUUsage() {
16467
+ // Simplified CPU usage based on process time
16468
+ const usage = process.cpuUsage();
16469
+ const total = usage.user + usage.system;
16470
+ const seconds = (Date.now() - this.startTime) / 1000;
16471
+ return Math.min(100, (total / 1000000 / seconds) * 100);
16472
+ }
16473
+ /**
16474
+ * Clean old entries from sliding windows
16475
+ */
16476
+ cleanWindows() {
16477
+ const now = Date.now();
16478
+ const cutoff = now - this.windowSize;
16479
+ this.metricsWindow = this.metricsWindow.filter(t => t > cutoff);
16480
+ // Keep only recent latency measurements
16481
+ if (this.latencyWindow.length > 100) {
16482
+ this.latencyWindow = this.latencyWindow.slice(-100);
16483
+ }
16484
+ }
16485
+ /**
16486
+ * Record a request
16487
+ * @param latency - Request latency in milliseconds
16488
+ * @param error - Whether the request resulted in an error
16489
+ */
16490
+ recordRequest(latency, error = false) {
16491
+ this.requestCount++;
16492
+ this.metricsWindow.push(Date.now());
16493
+ this.latencyWindow.push(latency);
16494
+ if (error) {
16495
+ this.errorCount++;
16496
+ }
16497
+ }
16498
+ /**
16499
+ * Record cache access
16500
+ * @param hit - Whether it was a cache hit
16501
+ */
16502
+ recordCacheAccess(hit) {
16503
+ if (hit) {
16504
+ this.cacheHits++;
16505
+ }
16506
+ else {
16507
+ this.cacheMisses++;
16508
+ }
16509
+ }
16510
+ /**
16511
+ * Update vector count
16512
+ * @param count - New vector count
16513
+ */
16514
+ updateVectorCount(count) {
16515
+ this.vectorCount = count;
16516
+ }
16517
+ /**
16518
+ * Get current health status
16519
+ * @returns Health status object
16520
+ */
16521
+ getHealthStatus() {
16522
+ const metrics = this.collectMetrics();
16523
+ const uptime = Date.now() - this.startTime;
16524
+ const warnings = [];
16525
+ const errors = [];
16526
+ // Check for warnings
16527
+ if (metrics.memoryUsage > 1024 * 1024 * 1024) { // > 1GB
16528
+ warnings.push('High memory usage detected');
16529
+ }
16530
+ if (metrics.cacheHitRate < 0.5) {
16531
+ warnings.push('Low cache hit rate');
16532
+ }
16533
+ if (metrics.errorRate && metrics.errorRate > 0.05) {
16534
+ warnings.push('High error rate detected');
16535
+ }
16536
+ if (metrics.averageLatency && metrics.averageLatency > 1000) {
16537
+ warnings.push('High latency detected');
16538
+ }
16539
+ // Check for errors
16540
+ if (metrics.memoryUsage > 2 * 1024 * 1024 * 1024) { // > 2GB
16541
+ errors.push('Critical memory usage');
16542
+ }
16543
+ if (metrics.errorRate && metrics.errorRate > 0.2) {
16544
+ errors.push('Critical error rate');
16545
+ }
16546
+ // Determine overall status
16547
+ let status = 'healthy';
16548
+ if (errors.length > 0) {
16549
+ status = 'unhealthy';
16550
+ }
16551
+ else if (warnings.length > 0) {
16552
+ status = 'degraded';
16553
+ }
16554
+ return {
16555
+ status,
16556
+ instanceId: this.configManager.getInstanceId(),
16557
+ role: this.configManager.getRole(),
16558
+ uptime,
16559
+ lastCheck: new Date().toISOString(),
16560
+ metrics,
16561
+ warnings: warnings.length > 0 ? warnings : undefined,
16562
+ errors: errors.length > 0 ? errors : undefined
16563
+ };
16564
+ }
16565
+ /**
16566
+ * Get health check endpoint data
16567
+ * @returns JSON-serializable health data
16568
+ */
16569
+ getHealthEndpointData() {
16570
+ const status = this.getHealthStatus();
16571
+ return {
16572
+ status: status.status,
16573
+ instanceId: status.instanceId,
16574
+ role: status.role,
16575
+ uptime: Math.floor(status.uptime / 1000), // Convert to seconds
16576
+ lastCheck: status.lastCheck,
16577
+ metrics: {
16578
+ vectorCount: status.metrics.vectorCount,
16579
+ cacheHitRate: Math.round(status.metrics.cacheHitRate * 100) / 100,
16580
+ memoryUsageMB: Math.round(status.metrics.memoryUsage / 1024 / 1024),
16581
+ cpuUsagePercent: Math.round(status.metrics.cpuUsage || 0),
16582
+ requestsPerSecond: Math.round(status.metrics.requestsPerSecond || 0),
16583
+ averageLatencyMs: Math.round(status.metrics.averageLatency || 0),
16584
+ errorRate: Math.round((status.metrics.errorRate || 0) * 100) / 100
16585
+ },
16586
+ warnings: status.warnings,
16587
+ errors: status.errors
16588
+ };
16589
+ }
16590
+ /**
16591
+ * Reset metrics (useful for testing)
16592
+ */
16593
+ resetMetrics() {
16594
+ this.requestCount = 0;
16595
+ this.errorCount = 0;
16596
+ this.totalLatency = 0;
16597
+ this.cacheHits = 0;
16598
+ this.cacheMisses = 0;
16599
+ this.metricsWindow = [];
16600
+ this.latencyWindow = [];
16601
+ }
16602
+ }
16603
+
16604
+ /**
16605
+ * SearchCache - Caches search results for improved performance
16606
+ */
16607
+ class SearchCache {
16608
+ constructor(config = {}) {
16609
+ this.cache = new Map();
16610
+ // Cache statistics
16611
+ this.hits = 0;
16612
+ this.misses = 0;
16613
+ this.evictions = 0;
16614
+ this.maxAge = config.maxAge ?? 5 * 60 * 1000; // 5 minutes
16615
+ this.maxSize = config.maxSize ?? 100;
16616
+ this.enabled = config.enabled ?? true;
16617
+ this.hitCountWeight = config.hitCountWeight ?? 0.3;
16618
+ }
16619
+ /**
16620
+ * Generate cache key from search parameters
16621
+ */
16622
+ getCacheKey(query, k, options = {}) {
16623
+ // Create a normalized key that ignores order of options
16624
+ const normalizedOptions = Object.keys(options)
16625
+ .sort()
16626
+ .reduce((acc, key) => {
16627
+ // Skip cache-related options
16628
+ if (key === 'skipCache' || key === 'useStreaming')
16629
+ return acc;
16630
+ acc[key] = options[key];
16631
+ return acc;
16632
+ }, {});
16633
+ return JSON.stringify({
16634
+ query: typeof query === 'object' ? JSON.stringify(query) : query,
16635
+ k,
16636
+ ...normalizedOptions
16637
+ });
16638
+ }
16639
+ /**
16640
+ * Get cached results if available and not expired
16641
+ */
16642
+ get(key) {
16643
+ if (!this.enabled)
16644
+ return null;
16645
+ const entry = this.cache.get(key);
16646
+ if (!entry) {
16647
+ this.misses++;
16648
+ return null;
16649
+ }
16650
+ // Check if expired
16651
+ if (Date.now() - entry.timestamp > this.maxAge) {
16652
+ this.cache.delete(key);
16653
+ this.misses++;
16654
+ return null;
16655
+ }
16656
+ // Update hit count and statistics
16657
+ entry.hits++;
16658
+ this.hits++;
16659
+ return entry.results;
16660
+ }
16661
+ /**
16662
+ * Cache search results
16663
+ */
16664
+ set(key, results) {
16665
+ if (!this.enabled)
16666
+ return;
16667
+ // Evict if cache is full
16668
+ if (this.cache.size >= this.maxSize) {
16669
+ this.evictOldest();
16670
+ }
16671
+ this.cache.set(key, {
16672
+ results: [...results], // Deep copy to prevent mutations
16673
+ timestamp: Date.now(),
16674
+ hits: 0
16675
+ });
16676
+ }
16677
+ /**
16678
+ * Evict the oldest entry based on timestamp and hit count
16679
+ */
16680
+ evictOldest() {
16681
+ let oldestKey = null;
16682
+ let oldestScore = Infinity;
16683
+ const now = Date.now();
16684
+ for (const [key, entry] of this.cache.entries()) {
16685
+ // Score combines age and inverse hit count
16686
+ const age = now - entry.timestamp;
16687
+ const hitScore = entry.hits > 0 ? 1 / entry.hits : 1;
16688
+ const score = age + (hitScore * this.hitCountWeight * this.maxAge);
16689
+ if (score < oldestScore) {
16690
+ oldestScore = score;
16691
+ oldestKey = key;
16692
+ }
16693
+ }
16694
+ if (oldestKey) {
16695
+ this.cache.delete(oldestKey);
16696
+ this.evictions++;
16697
+ }
16698
+ }
16699
+ /**
16700
+ * Clear all cached results
16701
+ */
16702
+ clear() {
16703
+ this.cache.clear();
16704
+ this.hits = 0;
16705
+ this.misses = 0;
16706
+ this.evictions = 0;
16707
+ }
16708
+ /**
16709
+ * Invalidate cache entries that might be affected by data changes
16710
+ */
16711
+ invalidate(pattern) {
16712
+ if (!pattern) {
16713
+ this.clear();
16714
+ return;
16715
+ }
16716
+ const keysToDelete = [];
16717
+ for (const key of this.cache.keys()) {
16718
+ const shouldDelete = typeof pattern === 'string'
16719
+ ? key.includes(pattern)
16720
+ : pattern.test(key);
16721
+ if (shouldDelete) {
16722
+ keysToDelete.push(key);
16723
+ }
16724
+ }
16725
+ keysToDelete.forEach(key => this.cache.delete(key));
16726
+ }
16727
+ /**
16728
+ * Smart invalidation for real-time data updates
16729
+ * Only clears cache if it's getting stale or if data changes significantly
16730
+ */
16731
+ invalidateOnDataChange(changeType) {
16732
+ // For now, clear all caches on data changes to ensure consistency
16733
+ // In the future, we could implement more sophisticated invalidation
16734
+ // based on the type of change and affected data
16735
+ this.clear();
16736
+ }
16737
+ /**
16738
+ * Check if cache entries have expired and remove them
16739
+ * This is especially important in distributed scenarios where
16740
+ * real-time updates might be delayed or missed
16741
+ */
16742
+ cleanupExpiredEntries() {
16743
+ const now = Date.now();
16744
+ const keysToDelete = [];
16745
+ for (const [key, entry] of this.cache.entries()) {
16746
+ if (now - entry.timestamp > this.maxAge) {
16747
+ keysToDelete.push(key);
16748
+ }
16749
+ }
16750
+ keysToDelete.forEach(key => this.cache.delete(key));
16751
+ return keysToDelete.length;
16752
+ }
16753
+ /**
16754
+ * Get cache statistics
16755
+ */
16756
+ getStats() {
16757
+ const total = this.hits + this.misses;
16758
+ return {
16759
+ hits: this.hits,
16760
+ misses: this.misses,
16761
+ evictions: this.evictions,
16762
+ hitRate: total > 0 ? this.hits / total : 0,
16763
+ size: this.cache.size,
16764
+ maxSize: this.maxSize,
16765
+ enabled: this.enabled
16766
+ };
16767
+ }
16768
+ /**
16769
+ * Enable or disable caching
16770
+ */
16771
+ setEnabled(enabled) {
16772
+ Object.defineProperty(this, 'enabled', { value: enabled, writable: false });
16773
+ if (!enabled) {
16774
+ this.clear();
16775
+ }
16776
+ }
16777
+ /**
16778
+ * Get memory usage estimate in bytes
16779
+ */
16780
+ getMemoryUsage() {
16781
+ let totalSize = 0;
16782
+ for (const [key, entry] of this.cache.entries()) {
16783
+ // Estimate key size
16784
+ totalSize += key.length * 2; // UTF-16 characters
16785
+ // Estimate entry size
16786
+ totalSize += JSON.stringify(entry.results).length * 2;
16787
+ totalSize += 16; // timestamp + hits (8 bytes each)
16788
+ }
16789
+ return totalSize;
16790
+ }
16791
+ /**
16792
+ * Get current cache configuration
16793
+ */
16794
+ getConfig() {
16795
+ return {
16796
+ enabled: this.enabled,
16797
+ maxSize: this.maxSize,
16798
+ maxAge: this.maxAge,
16799
+ hitCountWeight: this.hitCountWeight
16800
+ };
16801
+ }
16802
+ /**
16803
+ * Update cache configuration dynamically
16804
+ */
16805
+ updateConfig(newConfig) {
16806
+ if (newConfig.enabled !== undefined) {
16807
+ this.enabled = newConfig.enabled;
16808
+ }
16809
+ if (newConfig.maxSize !== undefined) {
16810
+ this.maxSize = newConfig.maxSize;
16811
+ // Trigger eviction if current size exceeds new limit
16812
+ this.evictIfNeeded();
16813
+ }
16814
+ if (newConfig.maxAge !== undefined) {
16815
+ this.maxAge = newConfig.maxAge;
16816
+ // Clean up entries that are now expired with new TTL
16817
+ this.cleanupExpiredEntries();
16818
+ }
16819
+ if (newConfig.hitCountWeight !== undefined) {
16820
+ this.hitCountWeight = newConfig.hitCountWeight;
16821
+ }
16822
+ }
16823
+ /**
16824
+ * Evict entries if cache exceeds maxSize
16825
+ */
16826
+ evictIfNeeded() {
16827
+ if (this.cache.size <= this.maxSize) {
16828
+ return;
16829
+ }
16830
+ // Calculate eviction score for each entry (same logic as existing eviction)
16831
+ const entries = Array.from(this.cache.entries()).map(([key, entry]) => {
16832
+ const age = Date.now() - entry.timestamp;
16833
+ const hitCount = entry.hits;
16834
+ // Eviction score: lower is more likely to be evicted
16835
+ // Combines age and hit count (weighted by hitCountWeight)
16836
+ const ageScore = age / this.maxAge;
16837
+ const hitScore = 1 / (hitCount + 1); // Inverse of hits (more hits = lower score)
16838
+ const score = ageScore * (1 - this.hitCountWeight) + hitScore * this.hitCountWeight;
16839
+ return { key, entry, score };
16840
+ });
16841
+ // Sort by score (lowest first - these will be evicted)
16842
+ entries.sort((a, b) => a.score - b.score);
16843
+ // Evict entries until we're under the limit
16844
+ const toEvict = entries.slice(0, this.cache.size - this.maxSize);
16845
+ toEvict.forEach(({ key }) => {
16846
+ this.cache.delete(key);
16847
+ this.evictions++;
16848
+ });
16849
+ }
16850
+ }
16851
+
16852
+ /**
16853
+ * Intelligent cache auto-configuration system
16854
+ * Adapts cache settings based on environment, usage patterns, and storage type
16855
+ */
16856
+ class CacheAutoConfigurator {
16857
+ constructor() {
16858
+ this.stats = {
16859
+ totalQueries: 0,
16860
+ repeatQueries: 0,
16861
+ avgQueryTime: 50,
16862
+ memoryPressure: 0,
16863
+ storageType: 'memory',
16864
+ isDistributed: false,
16865
+ changeFrequency: 0,
16866
+ readWriteRatio: 10,
16867
+ };
16868
+ this.configHistory = [];
16869
+ this.lastOptimization = 0;
16870
+ }
16871
+ /**
16872
+ * Auto-detect optimal cache configuration based on current conditions
16873
+ */
16874
+ autoDetectOptimalConfig(storageConfig, currentStats) {
16875
+ // Update stats with current information
16876
+ if (currentStats) {
16877
+ this.stats = { ...this.stats, ...currentStats };
16878
+ }
16879
+ // Detect environment characteristics
16880
+ this.detectEnvironment(storageConfig);
16881
+ // Generate optimal configuration
16882
+ const result = this.generateOptimalConfig();
16883
+ // Store for learning
16884
+ this.configHistory.push(result);
16885
+ this.lastOptimization = Date.now();
16886
+ return result;
16887
+ }
16888
+ /**
16889
+ * Dynamically adjust configuration based on runtime performance
16890
+ */
16891
+ adaptConfiguration(currentConfig, performanceMetrics) {
16892
+ const reasoning = [];
16893
+ let needsUpdate = false;
16894
+ // Check if we should update (don't over-optimize)
16895
+ if (Date.now() - this.lastOptimization < 60000) {
16896
+ return null; // Wait at least 1 minute between optimizations
16897
+ }
16898
+ // Analyze performance patterns
16899
+ const adaptations = {};
16900
+ // Low hit rate → adjust cache size or TTL
16901
+ if (performanceMetrics.hitRate < 0.3) {
16902
+ if (performanceMetrics.externalChangesDetected > 5) {
16903
+ // Too many external changes → shorter TTL
16904
+ adaptations.maxAge = Math.max(60000, currentConfig.maxAge * 0.7);
16905
+ reasoning.push('Reduced cache TTL due to frequent external changes');
16906
+ needsUpdate = true;
16907
+ }
16908
+ else {
16909
+ // Expand cache size for better hit rate
16910
+ adaptations.maxSize = Math.min(500, (currentConfig.maxSize || 100) * 1.5);
16911
+ reasoning.push('Increased cache size due to low hit rate');
16912
+ needsUpdate = true;
16913
+ }
16914
+ }
16915
+ // High hit rate but slow responses → might need cache warming
16916
+ if (performanceMetrics.hitRate > 0.8 && performanceMetrics.avgResponseTime > 100) {
16917
+ reasoning.push('High hit rate but slow responses - consider cache warming');
16918
+ }
16919
+ // Memory pressure → reduce cache size
16920
+ if (performanceMetrics.memoryUsage > 100 * 1024 * 1024) { // 100MB
16921
+ adaptations.maxSize = Math.max(20, (currentConfig.maxSize || 100) * 0.7);
16922
+ reasoning.push('Reduced cache size due to memory pressure');
16923
+ needsUpdate = true;
16924
+ }
16925
+ // Recent external changes → adaptive TTL
16926
+ if (performanceMetrics.timeSinceLastChange < 30000) { // 30 seconds
16927
+ adaptations.maxAge = Math.max(30000, currentConfig.maxAge * 0.8);
16928
+ reasoning.push('Shortened TTL due to recent external changes');
16929
+ needsUpdate = true;
16930
+ }
16931
+ if (!needsUpdate) {
16932
+ return null;
16933
+ }
16934
+ const newCacheConfig = {
16935
+ ...currentConfig,
16936
+ ...adaptations
16937
+ };
16938
+ const newRealtimeConfig = this.calculateRealtimeConfig();
16939
+ return {
16940
+ cacheConfig: newCacheConfig,
16941
+ realtimeConfig: newRealtimeConfig,
16942
+ reasoning
16943
+ };
16944
+ }
16945
+ /**
16946
+ * Get recommended configuration for specific use case
16947
+ */
16948
+ getRecommendedConfig(useCase) {
16949
+ const configs = {
16950
+ 'high-consistency': {
16951
+ cache: { maxAge: 120000, maxSize: 50 },
16952
+ realtime: { interval: 15000, enabled: true },
16953
+ reasoning: ['Optimized for data consistency and real-time updates']
16954
+ },
16955
+ 'balanced': {
16956
+ cache: { maxAge: 300000, maxSize: 100 },
16957
+ realtime: { interval: 30000, enabled: true },
16958
+ reasoning: ['Balanced performance and consistency']
16959
+ },
16960
+ 'performance-first': {
16961
+ cache: { maxAge: 600000, maxSize: 200 },
16962
+ realtime: { interval: 60000, enabled: true },
16963
+ reasoning: ['Optimized for maximum cache performance']
16964
+ }
16965
+ };
16966
+ const config = configs[useCase];
16967
+ return {
16968
+ cacheConfig: {
16969
+ enabled: true,
16970
+ ...config.cache
16971
+ },
16972
+ realtimeConfig: {
16973
+ updateIndex: true,
16974
+ updateStatistics: true,
16975
+ ...config.realtime
16976
+ },
16977
+ reasoning: config.reasoning
16978
+ };
16979
+ }
16980
+ /**
16981
+ * Learn from usage patterns and improve recommendations
16982
+ */
16983
+ learnFromUsage(usageData) {
16984
+ // Update internal stats for better future recommendations
16985
+ this.stats.totalQueries += usageData.totalQueries;
16986
+ this.stats.repeatQueries += usageData.cacheHits;
16987
+ this.stats.avgQueryTime = (this.stats.avgQueryTime + usageData.responseTime) / 2;
16988
+ this.stats.changeFrequency = usageData.dataChanges / (usageData.timeWindow / 60000);
16989
+ // Calculate read/write ratio
16990
+ const writes = usageData.dataChanges;
16991
+ const reads = usageData.totalQueries;
16992
+ this.stats.readWriteRatio = reads > 0 ? reads / Math.max(writes, 1) : 10;
16993
+ }
16994
+ detectEnvironment(storageConfig) {
16995
+ // Detect storage type
16996
+ if (storageConfig?.s3Storage || storageConfig?.customS3Storage) {
16997
+ this.stats.storageType = 's3';
16998
+ this.stats.isDistributed = true;
16999
+ }
17000
+ else if (storageConfig?.forceFileSystemStorage) {
17001
+ this.stats.storageType = 'filesystem';
17002
+ }
17003
+ else if (storageConfig?.forceMemoryStorage) {
17004
+ this.stats.storageType = 'memory';
17005
+ }
17006
+ else {
17007
+ // Auto-detect browser vs Node.js
17008
+ this.stats.storageType = typeof window !== 'undefined' ? 'opfs' : 'filesystem';
17009
+ }
17010
+ // Detect distributed mode indicators
17011
+ this.stats.isDistributed = this.stats.isDistributed ||
17012
+ Boolean(storageConfig?.s3Storage || storageConfig?.customS3Storage);
17013
+ }
17014
+ generateOptimalConfig() {
17015
+ const reasoning = [];
17016
+ // Base configuration
17017
+ let cacheConfig = {
17018
+ enabled: true,
17019
+ maxSize: 100,
17020
+ maxAge: 300000, // 5 minutes
17021
+ hitCountWeight: 0.3
17022
+ };
17023
+ let realtimeConfig = {
17024
+ enabled: false,
17025
+ interval: 60000,
17026
+ updateIndex: true,
17027
+ updateStatistics: true
17028
+ };
17029
+ // Adjust for storage type
17030
+ if (this.stats.storageType === 's3' || this.stats.isDistributed) {
17031
+ cacheConfig.maxAge = 180000; // 3 minutes for distributed
17032
+ realtimeConfig.enabled = true;
17033
+ realtimeConfig.interval = 30000; // 30 seconds
17034
+ reasoning.push('Distributed storage detected - enabled real-time updates');
17035
+ reasoning.push('Reduced cache TTL for distributed consistency');
17036
+ }
17037
+ // Adjust for read/write patterns
17038
+ if (this.stats.readWriteRatio > 20) {
17039
+ // Read-heavy workload
17040
+ cacheConfig.maxSize = Math.min(300, (cacheConfig.maxSize || 100) * 2);
17041
+ cacheConfig.maxAge = Math.min(900000, (cacheConfig.maxAge || 300000) * 1.5); // Up to 15 minutes
17042
+ reasoning.push('Read-heavy workload detected - increased cache size and TTL');
17043
+ }
17044
+ else if (this.stats.readWriteRatio < 5) {
17045
+ // Write-heavy workload
17046
+ cacheConfig.maxSize = Math.max(50, (cacheConfig.maxSize || 100) * 0.7);
17047
+ cacheConfig.maxAge = Math.max(60000, (cacheConfig.maxAge || 300000) * 0.6);
17048
+ reasoning.push('Write-heavy workload detected - reduced cache size and TTL');
17049
+ }
17050
+ // Adjust for change frequency
17051
+ if (this.stats.changeFrequency > 10) { // More than 10 changes per minute
17052
+ realtimeConfig.interval = Math.max(10000, realtimeConfig.interval * 0.5);
17053
+ cacheConfig.maxAge = Math.max(30000, (cacheConfig.maxAge || 300000) * 0.5);
17054
+ reasoning.push('High change frequency detected - increased update frequency');
17055
+ }
17056
+ // Memory constraints
17057
+ if (this.detectMemoryConstraints()) {
17058
+ cacheConfig.maxSize = Math.max(20, (cacheConfig.maxSize || 100) * 0.6);
17059
+ reasoning.push('Memory constraints detected - reduced cache size');
17060
+ }
17061
+ // Performance optimization
17062
+ if (this.stats.avgQueryTime > 200) {
17063
+ cacheConfig.maxSize = Math.min(500, (cacheConfig.maxSize || 100) * 1.5);
17064
+ reasoning.push('Slow queries detected - increased cache size');
17065
+ }
17066
+ return {
17067
+ cacheConfig,
17068
+ realtimeConfig,
17069
+ reasoning
17070
+ };
17071
+ }
17072
+ calculateRealtimeConfig() {
17073
+ return {
17074
+ enabled: this.stats.isDistributed || this.stats.changeFrequency > 1,
17075
+ interval: this.stats.isDistributed ? 30000 : 60000,
17076
+ updateIndex: true,
17077
+ updateStatistics: true
17078
+ };
17079
+ }
17080
+ detectMemoryConstraints() {
17081
+ // Simple heuristic for memory constraints
17082
+ try {
17083
+ if (typeof performance !== 'undefined' && 'memory' in performance) {
17084
+ const memInfo = performance.memory;
17085
+ return memInfo.usedJSHeapSize > memInfo.jsHeapSizeLimit * 0.8;
17086
+ }
17087
+ }
17088
+ catch (e) {
17089
+ // Ignore errors
17090
+ }
17091
+ // Default assumption for constrained environments
17092
+ return false;
17093
+ }
17094
+ /**
17095
+ * Get human-readable explanation of current configuration
17096
+ */
17097
+ getConfigExplanation(config) {
17098
+ const lines = [
17099
+ '🤖 Brainy Auto-Configuration:',
17100
+ '',
17101
+ `📊 Cache: ${config.cacheConfig.maxSize} queries, ${config.cacheConfig.maxAge / 1000}s TTL`,
17102
+ `🔄 Updates: ${config.realtimeConfig.enabled ? `Every ${(config.realtimeConfig.interval || 30000) / 1000}s` : 'Disabled'}`,
17103
+ '',
17104
+ '🎯 Optimizations applied:'
17105
+ ];
17106
+ config.reasoning.forEach(reason => {
17107
+ lines.push(` • ${reason}`);
17108
+ });
17109
+ return lines.join('\n');
17110
+ }
17111
+ }
17112
+
15420
17113
  /**
15421
17114
  * BrainyData
15422
17115
  * Main class that provides the vector database functionality
@@ -15470,6 +17163,13 @@ class BrainyData {
15470
17163
  this.remoteServerConfig = null;
15471
17164
  this.serverSearchConduit = null;
15472
17165
  this.serverConnection = null;
17166
+ // Distributed mode properties
17167
+ this.distributedConfig = null;
17168
+ this.configManager = null;
17169
+ this.partitioner = null;
17170
+ this.operationalMode = null;
17171
+ this.domainDetector = null;
17172
+ this.healthMonitor = null;
15473
17173
  // Set dimensions to fixed value of 512 (Universal Sentence Encoder dimension)
15474
17174
  this._dimensions = 512;
15475
17175
  // Set distance function
@@ -15554,6 +17254,39 @@ class BrainyData {
15554
17254
  ...config.cache
15555
17255
  };
15556
17256
  }
17257
+ // Store distributed configuration
17258
+ if (config.distributed) {
17259
+ if (typeof config.distributed === 'boolean') {
17260
+ // Auto-mode enabled
17261
+ this.distributedConfig = {
17262
+ enabled: true
17263
+ };
17264
+ }
17265
+ else {
17266
+ // Explicit configuration
17267
+ this.distributedConfig = config.distributed;
17268
+ }
17269
+ }
17270
+ // Initialize cache auto-configurator first
17271
+ this.cacheAutoConfigurator = new CacheAutoConfigurator();
17272
+ // Auto-detect optimal cache configuration if not explicitly provided
17273
+ let finalSearchCacheConfig = config.searchCache;
17274
+ if (!config.searchCache || Object.keys(config.searchCache).length === 0) {
17275
+ const autoConfig = this.cacheAutoConfigurator.autoDetectOptimalConfig(config.storage);
17276
+ finalSearchCacheConfig = autoConfig.cacheConfig;
17277
+ // Apply auto-detected real-time update configuration if not explicitly set
17278
+ if (!config.realtimeUpdates && autoConfig.realtimeConfig.enabled) {
17279
+ this.realtimeUpdateConfig = {
17280
+ ...this.realtimeUpdateConfig,
17281
+ ...autoConfig.realtimeConfig
17282
+ };
17283
+ }
17284
+ if (this.loggingConfig?.verbose) {
17285
+ console.log(this.cacheAutoConfigurator.getConfigExplanation(autoConfig));
17286
+ }
17287
+ }
17288
+ // Initialize search cache with final configuration
17289
+ this.searchCache = new SearchCache(finalSearchCacheConfig);
15557
17290
  }
15558
17291
  /**
15559
17292
  * Check if the database is in read-only mode and throw an error if it is
@@ -15693,6 +17426,17 @@ class BrainyData {
15693
17426
  await this.applyChangesFromFullScan();
15694
17427
  }
15695
17428
  }
17429
+ // Cleanup expired cache entries (defensive mechanism for distributed scenarios)
17430
+ const expiredCount = this.searchCache.cleanupExpiredEntries();
17431
+ if (expiredCount > 0 && this.loggingConfig?.verbose) {
17432
+ console.log(`Cleaned up ${expiredCount} expired cache entries`);
17433
+ }
17434
+ // Adapt cache configuration based on performance (every few updates)
17435
+ // Only adapt every 5th update to avoid over-optimization
17436
+ const updateCount = Math.floor((Date.now() - (this.lastUpdateTime || 0)) / this.realtimeUpdateConfig.interval);
17437
+ if (updateCount % 5 === 0) {
17438
+ this.adaptCacheConfiguration();
17439
+ }
15696
17440
  // Update the last update time
15697
17441
  this.lastUpdateTime = Date.now();
15698
17442
  if (this.loggingConfig?.verbose) {
@@ -15767,6 +17511,13 @@ class BrainyData {
15767
17511
  (addedCount > 0 || updatedCount > 0 || deletedCount > 0)) {
15768
17512
  console.log(`Real-time update: Added ${addedCount}, updated ${updatedCount}, deleted ${deletedCount} nouns using change log`);
15769
17513
  }
17514
+ // Invalidate search cache if any external changes were detected
17515
+ if (addedCount > 0 || updatedCount > 0 || deletedCount > 0) {
17516
+ this.searchCache.invalidateOnDataChange('update');
17517
+ if (this.loggingConfig?.verbose) {
17518
+ console.log('Search cache invalidated due to external data changes');
17519
+ }
17520
+ }
15770
17521
  // Update the last known noun count
15771
17522
  this.lastKnownNounCount = await this.getNounCount();
15772
17523
  }
@@ -15810,6 +17561,13 @@ class BrainyData {
15810
17561
  }
15811
17562
  // Update the last known noun count
15812
17563
  this.lastKnownNounCount = currentCount;
17564
+ // Invalidate search cache if new nouns were detected
17565
+ if (newNouns.length > 0) {
17566
+ this.searchCache.invalidateOnDataChange('add');
17567
+ if (this.loggingConfig?.verbose) {
17568
+ console.log('Search cache invalidated due to external data changes');
17569
+ }
17570
+ }
15813
17571
  if (this.loggingConfig?.verbose && newNouns.length > 0) {
15814
17572
  console.log(`Real-time update: Added ${newNouns.length} new nouns to index using full scan`);
15815
17573
  }
@@ -15941,6 +17699,10 @@ class BrainyData {
15941
17699
  }
15942
17700
  // Initialize storage
15943
17701
  await this.storage.init();
17702
+ // Initialize distributed mode if configured
17703
+ if (this.distributedConfig) {
17704
+ await this.initializeDistributedMode();
17705
+ }
15944
17706
  // If using optimized index, set the storage adapter
15945
17707
  if (this.useOptimizedIndex && this.index instanceof HNSWIndexOptimized) {
15946
17708
  this.index.setStorage(this.storage);
@@ -16000,6 +17762,97 @@ class BrainyData {
16000
17762
  throw new Error(`Failed to initialize BrainyData: ${error}`);
16001
17763
  }
16002
17764
  }
17765
+ /**
17766
+ * Initialize distributed mode
17767
+ * Sets up configuration management, partitioning, and operational modes
17768
+ */
17769
+ async initializeDistributedMode() {
17770
+ if (!this.storage) {
17771
+ throw new Error('Storage must be initialized before distributed mode');
17772
+ }
17773
+ // Create configuration manager with mode hints
17774
+ this.configManager = new DistributedConfigManager(this.storage, this.distributedConfig || undefined, { readOnly: this.readOnly, writeOnly: this.writeOnly });
17775
+ // Initialize configuration
17776
+ const sharedConfig = await this.configManager.initialize();
17777
+ // Create partitioner based on strategy
17778
+ if (sharedConfig.settings.partitionStrategy === 'hash') {
17779
+ this.partitioner = new HashPartitioner(sharedConfig);
17780
+ }
17781
+ else {
17782
+ // Default to hash partitioner for now
17783
+ this.partitioner = new HashPartitioner(sharedConfig);
17784
+ }
17785
+ // Create operational mode based on role
17786
+ const role = this.configManager.getRole();
17787
+ this.operationalMode = OperationalModeFactory.createMode(role);
17788
+ // Validate that role matches the configured mode
17789
+ // Don't override explicitly set readOnly/writeOnly
17790
+ if (role === 'reader' && !this.readOnly) {
17791
+ console.warn('Distributed role is "reader" but readOnly is not set. Setting readOnly=true for consistency.');
17792
+ this.readOnly = true;
17793
+ this.writeOnly = false;
17794
+ }
17795
+ else if (role === 'writer' && !this.writeOnly) {
17796
+ console.warn('Distributed role is "writer" but writeOnly is not set. Setting writeOnly=true for consistency.');
17797
+ this.readOnly = false;
17798
+ this.writeOnly = true;
17799
+ }
17800
+ else if (role === 'hybrid' && (this.readOnly || this.writeOnly)) {
17801
+ console.warn('Distributed role is "hybrid" but readOnly or writeOnly is set. Clearing both for hybrid mode.');
17802
+ this.readOnly = false;
17803
+ this.writeOnly = false;
17804
+ }
17805
+ // Apply cache configuration from operational mode
17806
+ const modeCache = this.operationalMode.cacheStrategy;
17807
+ if (modeCache) {
17808
+ this.cacheConfig = {
17809
+ ...this.cacheConfig,
17810
+ hotCacheMaxSize: modeCache.hotCacheRatio * 1000000, // Convert ratio to size
17811
+ hotCacheEvictionThreshold: modeCache.hotCacheRatio,
17812
+ warmCacheTTL: modeCache.ttl,
17813
+ batchSize: modeCache.writeBufferSize || 100
17814
+ };
17815
+ // Update storage cache config if it supports it
17816
+ if (this.storage && 'updateCacheConfig' in this.storage) {
17817
+ this.storage.updateCacheConfig(this.cacheConfig);
17818
+ }
17819
+ }
17820
+ // Initialize domain detector
17821
+ this.domainDetector = new DomainDetector();
17822
+ // Initialize health monitor
17823
+ this.healthMonitor = new HealthMonitor(this.configManager);
17824
+ this.healthMonitor.start();
17825
+ // Set up config update listener
17826
+ this.configManager.setOnConfigUpdate((config) => {
17827
+ this.handleDistributedConfigUpdate(config);
17828
+ });
17829
+ if (this.loggingConfig?.verbose) {
17830
+ console.log(`Distributed mode initialized as ${role} with ${sharedConfig.settings.partitionStrategy} partitioning`);
17831
+ }
17832
+ }
17833
+ /**
17834
+ * Handle distributed configuration updates
17835
+ */
17836
+ handleDistributedConfigUpdate(config) {
17837
+ // Update partitioner if needed
17838
+ if (this.partitioner && config.settings) {
17839
+ this.partitioner = new HashPartitioner(config);
17840
+ }
17841
+ // Log configuration update
17842
+ if (this.loggingConfig?.verbose) {
17843
+ console.log('Distributed configuration updated:', config.version);
17844
+ }
17845
+ }
17846
+ /**
17847
+ * Get distributed health status
17848
+ * @returns Health status if distributed mode is enabled
17849
+ */
17850
+ getHealthStatus() {
17851
+ if (this.healthMonitor) {
17852
+ return this.healthMonitor.getHealthEndpointData();
17853
+ }
17854
+ return null;
17855
+ }
16003
17856
  /**
16004
17857
  * Connect to a remote Brainy server for search operations
16005
17858
  * @param serverUrl WebSocket URL of the remote Brainy server
@@ -16216,6 +18069,33 @@ class BrainyData {
16216
18069
  if (metadata && typeof metadata === 'object') {
16217
18070
  // Always make a copy without adding the ID
16218
18071
  metadataToSave = { ...metadata };
18072
+ // Add domain metadata if distributed mode is enabled
18073
+ if (this.domainDetector) {
18074
+ // First check if domain is already in metadata
18075
+ if (metadataToSave.domain) {
18076
+ // Domain already specified, keep it
18077
+ const domainInfo = this.domainDetector.detectDomain(metadataToSave);
18078
+ if (domainInfo.domainMetadata) {
18079
+ metadataToSave.domainMetadata = domainInfo.domainMetadata;
18080
+ }
18081
+ }
18082
+ else {
18083
+ // Try to detect domain from the data
18084
+ const dataToAnalyze = Array.isArray(vectorOrData) ? metadata : vectorOrData;
18085
+ const domainInfo = this.domainDetector.detectDomain(dataToAnalyze);
18086
+ if (domainInfo.domain) {
18087
+ metadataToSave.domain = domainInfo.domain;
18088
+ if (domainInfo.domainMetadata) {
18089
+ metadataToSave.domainMetadata = domainInfo.domainMetadata;
18090
+ }
18091
+ }
18092
+ }
18093
+ }
18094
+ // Add partition information if distributed mode is enabled
18095
+ if (this.partitioner) {
18096
+ const partition = this.partitioner.getPartition(id);
18097
+ metadataToSave.partition = partition;
18098
+ }
16219
18099
  }
16220
18100
  await this.storage.saveMetadata(id, metadataToSave);
16221
18101
  // Track metadata statistics
@@ -16225,6 +18105,11 @@ class BrainyData {
16225
18105
  }
16226
18106
  // Update HNSW index size (excluding verbs)
16227
18107
  await this.storage.updateHnswIndexSize(await this.getNounCount());
18108
+ // Update health metrics if in distributed mode
18109
+ if (this.healthMonitor) {
18110
+ const vectorCount = await this.getNounCount();
18111
+ this.healthMonitor.updateVectorCount(vectorCount);
18112
+ }
16228
18113
  // If addToRemote is true and we're connected to a remote server, add to remote as well
16229
18114
  if (options.addToRemote && this.isConnectedToRemoteServer()) {
16230
18115
  try {
@@ -16234,10 +18119,16 @@ class BrainyData {
16234
18119
  console.warn(`Failed to add to remote server: ${remoteError}. Continuing with local add.`);
16235
18120
  }
16236
18121
  }
18122
+ // Invalidate search cache since data has changed
18123
+ this.searchCache.invalidateOnDataChange('add');
16237
18124
  return id;
16238
18125
  }
16239
18126
  catch (error) {
16240
18127
  console.error('Failed to add vector:', error);
18128
+ // Track error in health monitor
18129
+ if (this.healthMonitor) {
18130
+ this.healthMonitor.recordRequest(0, true);
18131
+ }
16241
18132
  throw new Error(`Failed to add vector: ${error}`);
16242
18133
  }
16243
18134
  }
@@ -16487,11 +18378,16 @@ class BrainyData {
16487
18378
  console.log(`Lazy loading mode: Added ${limitedNouns.length} nodes to index for search`);
16488
18379
  }
16489
18380
  }
16490
- // Search in the index
16491
- const results = await this.index.search(queryVector, k);
18381
+ // When using offset, we need to fetch more results and then slice
18382
+ const offset = options.offset || 0;
18383
+ const totalNeeded = k + offset;
18384
+ // Search in the index for totalNeeded results
18385
+ const results = await this.index.search(queryVector, totalNeeded);
18386
+ // Skip the offset number of results
18387
+ const paginatedResults = results.slice(offset, offset + k);
16492
18388
  // Get metadata for each result
16493
18389
  const searchResults = [];
16494
- for (const [id, score] of results) {
18390
+ for (const [id, score] of paginatedResults) {
16495
18391
  const noun = this.index.getNouns().get(id);
16496
18392
  if (!noun) {
16497
18393
  continue;
@@ -16532,8 +18428,9 @@ class BrainyData {
16532
18428
  }
16533
18429
  // Sort by distance (ascending)
16534
18430
  results.sort((a, b) => a[1] - b[1]);
16535
- // Take top k results
16536
- const topResults = results.slice(0, k);
18431
+ // Apply offset and take k results
18432
+ const offset = options.offset || 0;
18433
+ const topResults = results.slice(offset, offset + k);
16537
18434
  // Get metadata for each result
16538
18435
  const searchResults = [];
16539
18436
  for (const [id, score] of topResults) {
@@ -16574,6 +18471,7 @@ class BrainyData {
16574
18471
  * @returns Array of search results
16575
18472
  */
16576
18473
  async search(queryVectorOrData, k = 10, options = {}) {
18474
+ const startTime = Date.now();
16577
18475
  // Validate input is not null or undefined
16578
18476
  if (queryVectorOrData === null || queryVectorOrData === undefined) {
16579
18477
  throw new Error('Query cannot be null or undefined');
@@ -16625,7 +18523,93 @@ class BrainyData {
16625
18523
  return this.searchCombined(queryVectorOrData, k, options);
16626
18524
  }
16627
18525
  // Default behavior (backward compatible): search locally
16628
- return this.searchLocal(queryVectorOrData, k, options);
18526
+ try {
18527
+ // Check cache first (transparent to user)
18528
+ const cacheKey = this.searchCache.getCacheKey(queryVectorOrData, k, options);
18529
+ const cachedResults = this.searchCache.get(cacheKey);
18530
+ if (cachedResults) {
18531
+ // Track cache hit in health monitor
18532
+ if (this.healthMonitor) {
18533
+ const latency = Date.now() - startTime;
18534
+ this.healthMonitor.recordRequest(latency, false);
18535
+ this.healthMonitor.recordCacheAccess(true);
18536
+ }
18537
+ return cachedResults;
18538
+ }
18539
+ // Cache miss - perform actual search
18540
+ const results = await this.searchLocal(queryVectorOrData, k, options);
18541
+ // Cache results for future queries (unless explicitly disabled)
18542
+ if (!options.skipCache) {
18543
+ this.searchCache.set(cacheKey, results);
18544
+ }
18545
+ // Track successful search in health monitor
18546
+ if (this.healthMonitor) {
18547
+ const latency = Date.now() - startTime;
18548
+ this.healthMonitor.recordRequest(latency, false);
18549
+ this.healthMonitor.recordCacheAccess(false);
18550
+ }
18551
+ return results;
18552
+ }
18553
+ catch (error) {
18554
+ // Track error in health monitor
18555
+ if (this.healthMonitor) {
18556
+ const latency = Date.now() - startTime;
18557
+ this.healthMonitor.recordRequest(latency, true);
18558
+ }
18559
+ throw error;
18560
+ }
18561
+ }
18562
+ /**
18563
+ * Search with cursor-based pagination for better performance on large datasets
18564
+ * @param queryVectorOrData Query vector or data to search for
18565
+ * @param k Number of results to return
18566
+ * @param options Additional options including cursor for pagination
18567
+ * @returns Paginated search results with cursor for next page
18568
+ */
18569
+ async searchWithCursor(queryVectorOrData, k = 10, options = {}) {
18570
+ // For cursor-based search, we need to fetch more results and filter
18571
+ const searchK = options.cursor ? k + 20 : k; // Get extra results for filtering
18572
+ // Perform regular search
18573
+ const allResults = await this.search(queryVectorOrData, searchK, {
18574
+ ...options,
18575
+ skipCache: options.skipCache
18576
+ });
18577
+ let results = allResults;
18578
+ let startIndex = 0;
18579
+ // If cursor provided, find starting position
18580
+ if (options.cursor) {
18581
+ startIndex = allResults.findIndex(r => r.id === options.cursor.lastId &&
18582
+ Math.abs(r.score - options.cursor.lastScore) < 0.0001);
18583
+ if (startIndex >= 0) {
18584
+ startIndex += 1; // Start after the cursor position
18585
+ results = allResults.slice(startIndex, startIndex + k);
18586
+ }
18587
+ else {
18588
+ // Cursor not found, might be stale - return from beginning
18589
+ results = allResults.slice(0, k);
18590
+ startIndex = 0;
18591
+ }
18592
+ }
18593
+ else {
18594
+ results = allResults.slice(0, k);
18595
+ }
18596
+ // Create cursor for next page
18597
+ let nextCursor;
18598
+ const hasMoreResults = (startIndex + results.length) < allResults.length || allResults.length >= searchK;
18599
+ if (results.length > 0 && hasMoreResults) {
18600
+ const lastResult = results[results.length - 1];
18601
+ nextCursor = {
18602
+ lastId: lastResult.id,
18603
+ lastScore: lastResult.score,
18604
+ position: startIndex + results.length
18605
+ };
18606
+ }
18607
+ return {
18608
+ results,
18609
+ cursor: nextCursor,
18610
+ hasMore: !!nextCursor,
18611
+ totalEstimate: allResults.length > searchK ? undefined : allResults.length
18612
+ };
16629
18613
  }
16630
18614
  /**
16631
18615
  * Search the local database for similar vectors
@@ -16682,14 +18666,16 @@ class BrainyData {
16682
18666
  if (options.nounTypes && options.nounTypes.length > 0) {
16683
18667
  searchResults = await this.searchByNounTypes(queryToUse, k, options.nounTypes, {
16684
18668
  forceEmbed: options.forceEmbed,
16685
- service: options.service
18669
+ service: options.service,
18670
+ offset: options.offset
16686
18671
  });
16687
18672
  }
16688
18673
  else {
16689
18674
  // Otherwise, search all GraphNouns
16690
18675
  searchResults = await this.searchByNounTypes(queryToUse, k, null, {
16691
18676
  forceEmbed: options.forceEmbed,
16692
- service: options.service
18677
+ service: options.service,
18678
+ offset: options.offset
16693
18679
  });
16694
18680
  }
16695
18681
  // Filter out placeholder nouns from search results
@@ -16697,7 +18683,15 @@ class BrainyData {
16697
18683
  if (result.metadata && typeof result.metadata === 'object') {
16698
18684
  const metadata = result.metadata;
16699
18685
  // Exclude placeholder nouns from search results
16700
- return !metadata.isPlaceholder;
18686
+ if (metadata.isPlaceholder) {
18687
+ return false;
18688
+ }
18689
+ // Apply domain filter if specified
18690
+ if (options.filter?.domain) {
18691
+ if (metadata.domain !== options.filter.domain) {
18692
+ return false;
18693
+ }
18694
+ }
16701
18695
  }
16702
18696
  return true;
16703
18697
  });
@@ -17015,6 +19009,8 @@ class BrainyData {
17015
19009
  catch (error) {
17016
19010
  // Ignore
17017
19011
  }
19012
+ // Invalidate search cache since data has changed
19013
+ this.searchCache.invalidateOnDataChange('delete');
17018
19014
  return true;
17019
19015
  }
17020
19016
  catch (error) {
@@ -17098,6 +19094,8 @@ class BrainyData {
17098
19094
  // Track metadata statistics
17099
19095
  const service = this.getServiceName(options);
17100
19096
  await this.storage.incrementStatistic('metadata', service);
19097
+ // Invalidate search cache since metadata has changed
19098
+ this.searchCache.invalidateOnDataChange('update');
17101
19099
  return true;
17102
19100
  }
17103
19101
  catch (error) {
@@ -17452,6 +19450,8 @@ class BrainyData {
17452
19450
  await this.storage.incrementStatistic('verb', serviceForStats);
17453
19451
  // Update HNSW index size (excluding verbs)
17454
19452
  await this.storage.updateHnswIndexSize(await this.getNounCount());
19453
+ // Invalidate search cache since verb data has changed
19454
+ this.searchCache.invalidateOnDataChange('add');
17455
19455
  return id;
17456
19456
  }
17457
19457
  catch (error) {
@@ -17674,6 +19674,8 @@ class BrainyData {
17674
19674
  await this.index.clear();
17675
19675
  // Clear storage
17676
19676
  await this.storage.clear();
19677
+ // Clear search cache since all data has been removed
19678
+ this.searchCache.invalidateOnDataChange('delete');
17677
19679
  }
17678
19680
  catch (error) {
17679
19681
  console.error('Failed to clear vector database:', error);
@@ -17686,6 +19688,66 @@ class BrainyData {
17686
19688
  size() {
17687
19689
  return this.index.size();
17688
19690
  }
19691
+ /**
19692
+ * Get search cache statistics for performance monitoring
19693
+ * @returns Cache statistics including hit rate and memory usage
19694
+ */
19695
+ getCacheStats() {
19696
+ return {
19697
+ search: this.searchCache.getStats(),
19698
+ searchMemoryUsage: this.searchCache.getMemoryUsage()
19699
+ };
19700
+ }
19701
+ /**
19702
+ * Clear search cache manually (useful for testing or memory management)
19703
+ */
19704
+ clearCache() {
19705
+ this.searchCache.clear();
19706
+ }
19707
+ /**
19708
+ * Adapt cache configuration based on current performance metrics
19709
+ * This method analyzes usage patterns and automatically optimizes cache settings
19710
+ * @private
19711
+ */
19712
+ adaptCacheConfiguration() {
19713
+ const stats = this.searchCache.getStats();
19714
+ const memoryUsage = this.searchCache.getMemoryUsage();
19715
+ const currentConfig = this.searchCache.getConfig();
19716
+ // Prepare performance metrics for adaptation
19717
+ const performanceMetrics = {
19718
+ hitRate: stats.hitRate,
19719
+ avgResponseTime: 50, // Would be measured in real implementation
19720
+ memoryUsage: memoryUsage,
19721
+ externalChangesDetected: 0, // Would be tracked from real-time updates
19722
+ timeSinceLastChange: Date.now() - this.lastUpdateTime
19723
+ };
19724
+ // Try to adapt configuration
19725
+ const newConfig = this.cacheAutoConfigurator.adaptConfiguration(currentConfig, performanceMetrics);
19726
+ if (newConfig) {
19727
+ // Apply new cache configuration
19728
+ this.searchCache.updateConfig(newConfig.cacheConfig);
19729
+ // Apply new real-time update configuration if needed
19730
+ if (newConfig.realtimeConfig.enabled !== this.realtimeUpdateConfig.enabled ||
19731
+ newConfig.realtimeConfig.interval !== this.realtimeUpdateConfig.interval) {
19732
+ const wasEnabled = this.realtimeUpdateConfig.enabled;
19733
+ this.realtimeUpdateConfig = {
19734
+ ...this.realtimeUpdateConfig,
19735
+ ...newConfig.realtimeConfig
19736
+ };
19737
+ // Restart real-time updates with new configuration
19738
+ if (wasEnabled) {
19739
+ this.stopRealtimeUpdates();
19740
+ }
19741
+ if (this.realtimeUpdateConfig.enabled && this.isInitialized) {
19742
+ this.startRealtimeUpdates();
19743
+ }
19744
+ }
19745
+ if (this.loggingConfig?.verbose) {
19746
+ console.log('🔧 Auto-adapted cache configuration:');
19747
+ console.log(this.cacheAutoConfigurator.getConfigExplanation(newConfig));
19748
+ }
19749
+ }
19750
+ }
17689
19751
  /**
17690
19752
  * Get the number of nouns in the database (excluding verbs)
17691
19753
  * This is used for statistics reporting to match the expected behavior in tests
@@ -18232,12 +20294,17 @@ class BrainyData {
18232
20294
  if (!this.serverSearchConduit || !this.serverConnection) {
18233
20295
  throw new Error('Server search conduit or connection is not initialized');
18234
20296
  }
18235
- // Search the remote server
18236
- const searchResult = await this.serverSearchConduit.searchServer(this.serverConnection.connectionId, query, k);
20297
+ // When using offset, fetch more results and slice
20298
+ const offset = options.offset || 0;
20299
+ const totalNeeded = k + offset;
20300
+ // Search the remote server for totalNeeded results
20301
+ const searchResult = await this.serverSearchConduit.searchServer(this.serverConnection.connectionId, query, totalNeeded);
18237
20302
  if (!searchResult.success) {
18238
20303
  throw new Error(`Remote search failed: ${searchResult.error}`);
18239
20304
  }
18240
- return searchResult.data;
20305
+ // Apply offset to remote results
20306
+ const allResults = searchResult.data;
20307
+ return allResults.slice(offset, offset + k);
18241
20308
  }
18242
20309
  catch (error) {
18243
20310
  console.error('Failed to search remote server:', error);
@@ -18889,6 +20956,26 @@ class BrainyData {
18889
20956
  // Sort by score and limit to k results
18890
20957
  return allResults.sort((a, b) => b.score - a.score).slice(0, k);
18891
20958
  }
20959
+ /**
20960
+ * Cleanup distributed resources
20961
+ * Should be called when shutting down the instance
20962
+ */
20963
+ async cleanup() {
20964
+ // Stop real-time updates
20965
+ if (this.updateTimerId) {
20966
+ clearInterval(this.updateTimerId);
20967
+ this.updateTimerId = null;
20968
+ }
20969
+ // Clean up distributed mode resources
20970
+ if (this.healthMonitor) {
20971
+ this.healthMonitor.stop();
20972
+ }
20973
+ if (this.configManager) {
20974
+ await this.configManager.cleanup();
20975
+ }
20976
+ // Clean up worker pools
20977
+ await cleanupWorkerPools();
20978
+ }
18892
20979
  }
18893
20980
 
18894
20981
  /**