@soulcraft/brainy 0.37.0 → 0.38.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/unified.js CHANGED
@@ -15417,6 +15417,1190 @@ class AugmentationPipeline {
15417
15417
  // Create and export a default instance of the pipeline
15418
15418
  const augmentationPipeline$1 = new AugmentationPipeline();
15419
15419
 
15420
+ /**
15421
+ * Distributed Configuration Manager
15422
+ * Manages shared configuration in S3 for distributed Brainy instances
15423
+ */
15424
+ class DistributedConfigManager {
15425
+ constructor(storage, distributedConfig, brainyMode) {
15426
+ this.config = null;
15427
+ this.lastConfigVersion = 0;
15428
+ this.storage = storage;
15429
+ this.instanceId = distributedConfig?.instanceId || `instance-${v4()}`;
15430
+ this.configPath = distributedConfig?.configPath || '_brainy/config.json';
15431
+ this.heartbeatInterval = distributedConfig?.heartbeatInterval || 30000;
15432
+ this.configCheckInterval = distributedConfig?.configCheckInterval || 10000;
15433
+ this.instanceTimeout = distributedConfig?.instanceTimeout || 60000;
15434
+ // Set role from distributed config if provided
15435
+ if (distributedConfig?.role) {
15436
+ this.role = distributedConfig.role;
15437
+ }
15438
+ // Infer role from Brainy's read/write mode if not explicitly set
15439
+ else if (brainyMode) {
15440
+ if (brainyMode.writeOnly) {
15441
+ this.role = 'writer';
15442
+ }
15443
+ else if (brainyMode.readOnly) {
15444
+ this.role = 'reader';
15445
+ }
15446
+ // If neither readOnly nor writeOnly, role must be explicitly set
15447
+ }
15448
+ }
15449
+ /**
15450
+ * Initialize the distributed configuration
15451
+ */
15452
+ async initialize() {
15453
+ // Load or create configuration
15454
+ this.config = await this.loadOrCreateConfig();
15455
+ // Determine role if not explicitly set
15456
+ if (!this.role) {
15457
+ this.role = await this.determineRole();
15458
+ }
15459
+ // Register this instance
15460
+ await this.registerInstance();
15461
+ // Start heartbeat and config watching
15462
+ this.startHeartbeat();
15463
+ this.startConfigWatch();
15464
+ return this.config;
15465
+ }
15466
+ /**
15467
+ * Load existing config or create new one
15468
+ */
15469
+ async loadOrCreateConfig() {
15470
+ try {
15471
+ // Use metadata storage with a special ID for config
15472
+ const configData = await this.storage.getMetadata('_distributed_config');
15473
+ if (configData) {
15474
+ this.lastConfigVersion = configData.version;
15475
+ return configData;
15476
+ }
15477
+ }
15478
+ catch (error) {
15479
+ // Config doesn't exist yet
15480
+ }
15481
+ // Create default config
15482
+ const newConfig = {
15483
+ version: 1,
15484
+ updated: new Date().toISOString(),
15485
+ settings: {
15486
+ partitionStrategy: 'hash',
15487
+ partitionCount: 100,
15488
+ embeddingModel: 'text-embedding-ada-002',
15489
+ dimensions: 1536,
15490
+ distanceMetric: 'cosine',
15491
+ hnswParams: {
15492
+ M: 16,
15493
+ efConstruction: 200
15494
+ }
15495
+ },
15496
+ instances: {}
15497
+ };
15498
+ await this.saveConfig(newConfig);
15499
+ return newConfig;
15500
+ }
15501
+ /**
15502
+ * Determine role based on configuration
15503
+ * IMPORTANT: Role must be explicitly set - no automatic assignment based on order
15504
+ */
15505
+ async determineRole() {
15506
+ // Check environment variable first
15507
+ if (process.env.BRAINY_ROLE) {
15508
+ const role = process.env.BRAINY_ROLE.toLowerCase();
15509
+ if (role === 'writer' || role === 'reader' || role === 'hybrid') {
15510
+ return role;
15511
+ }
15512
+ throw new Error(`Invalid BRAINY_ROLE: ${process.env.BRAINY_ROLE}. Must be 'writer', 'reader', or 'hybrid'`);
15513
+ }
15514
+ // Check if explicitly passed in distributed config
15515
+ if (this.role) {
15516
+ return this.role;
15517
+ }
15518
+ // DO NOT auto-assign roles based on deployment order or existing instances
15519
+ // This is dangerous and can lead to data corruption or loss
15520
+ throw new Error('Distributed mode requires explicit role configuration. ' +
15521
+ 'Set BRAINY_ROLE environment variable or pass role in distributed config. ' +
15522
+ 'Valid roles: "writer", "reader", "hybrid"');
15523
+ }
15524
+ /**
15525
+ * Check if an instance is still alive
15526
+ */
15527
+ isInstanceAlive(instance) {
15528
+ const lastSeen = new Date(instance.lastHeartbeat).getTime();
15529
+ const now = Date.now();
15530
+ return (now - lastSeen) < this.instanceTimeout;
15531
+ }
15532
+ /**
15533
+ * Register this instance in the shared config
15534
+ */
15535
+ async registerInstance() {
15536
+ if (!this.config)
15537
+ return;
15538
+ // Role must be set by this point
15539
+ if (!this.role) {
15540
+ throw new Error('Cannot register instance without a role');
15541
+ }
15542
+ const instanceInfo = {
15543
+ role: this.role,
15544
+ status: 'active',
15545
+ lastHeartbeat: new Date().toISOString(),
15546
+ metrics: {
15547
+ memoryUsage: process.memoryUsage().heapUsed
15548
+ }
15549
+ };
15550
+ // Add endpoint if available
15551
+ if (process.env.SERVICE_ENDPOINT) {
15552
+ instanceInfo.endpoint = process.env.SERVICE_ENDPOINT;
15553
+ }
15554
+ this.config.instances[this.instanceId] = instanceInfo;
15555
+ await this.saveConfig(this.config);
15556
+ }
15557
+ /**
15558
+ * Save configuration with version increment
15559
+ */
15560
+ async saveConfig(config) {
15561
+ config.version++;
15562
+ config.updated = new Date().toISOString();
15563
+ this.lastConfigVersion = config.version;
15564
+ // Use metadata storage with a special ID for config
15565
+ await this.storage.saveMetadata('_distributed_config', config);
15566
+ this.config = config;
15567
+ }
15568
+ /**
15569
+ * Start heartbeat to keep instance alive in config
15570
+ */
15571
+ startHeartbeat() {
15572
+ this.heartbeatTimer = setInterval(async () => {
15573
+ await this.updateHeartbeat();
15574
+ }, this.heartbeatInterval);
15575
+ }
15576
+ /**
15577
+ * Update heartbeat and clean stale instances
15578
+ */
15579
+ async updateHeartbeat() {
15580
+ if (!this.config)
15581
+ return;
15582
+ // Reload config to get latest state
15583
+ try {
15584
+ const latestConfig = await this.loadConfig();
15585
+ if (latestConfig) {
15586
+ this.config = latestConfig;
15587
+ }
15588
+ }
15589
+ catch (error) {
15590
+ console.error('Failed to reload config:', error);
15591
+ }
15592
+ // Update our heartbeat
15593
+ if (this.config.instances[this.instanceId]) {
15594
+ this.config.instances[this.instanceId].lastHeartbeat = new Date().toISOString();
15595
+ this.config.instances[this.instanceId].status = 'active';
15596
+ // Update metrics if available
15597
+ this.config.instances[this.instanceId].metrics = {
15598
+ memoryUsage: process.memoryUsage().heapUsed
15599
+ };
15600
+ }
15601
+ else {
15602
+ // Re-register if we were removed
15603
+ await this.registerInstance();
15604
+ return;
15605
+ }
15606
+ // Clean up stale instances
15607
+ const now = Date.now();
15608
+ let hasChanges = false;
15609
+ for (const [id, instance] of Object.entries(this.config.instances)) {
15610
+ if (id === this.instanceId)
15611
+ continue;
15612
+ const lastSeen = new Date(instance.lastHeartbeat).getTime();
15613
+ if (now - lastSeen > this.instanceTimeout) {
15614
+ delete this.config.instances[id];
15615
+ hasChanges = true;
15616
+ }
15617
+ }
15618
+ // Save if there were changes
15619
+ if (hasChanges) {
15620
+ await this.saveConfig(this.config);
15621
+ }
15622
+ else {
15623
+ // Just update our heartbeat without version increment
15624
+ await this.storage.saveMetadata('_distributed_config', this.config);
15625
+ }
15626
+ }
15627
+ /**
15628
+ * Start watching for config changes
15629
+ */
15630
+ startConfigWatch() {
15631
+ this.configWatchTimer = setInterval(async () => {
15632
+ await this.checkForConfigUpdates();
15633
+ }, this.configCheckInterval);
15634
+ }
15635
+ /**
15636
+ * Check for configuration updates
15637
+ */
15638
+ async checkForConfigUpdates() {
15639
+ try {
15640
+ const latestConfig = await this.loadConfig();
15641
+ if (!latestConfig)
15642
+ return;
15643
+ if (latestConfig.version > this.lastConfigVersion) {
15644
+ this.config = latestConfig;
15645
+ this.lastConfigVersion = latestConfig.version;
15646
+ // Notify listeners of config update
15647
+ if (this.onConfigUpdate) {
15648
+ this.onConfigUpdate(latestConfig);
15649
+ }
15650
+ }
15651
+ }
15652
+ catch (error) {
15653
+ console.error('Failed to check config updates:', error);
15654
+ }
15655
+ }
15656
+ /**
15657
+ * Load configuration from storage
15658
+ */
15659
+ async loadConfig() {
15660
+ try {
15661
+ const configData = await this.storage.getMetadata('_distributed_config');
15662
+ if (configData) {
15663
+ return configData;
15664
+ }
15665
+ }
15666
+ catch (error) {
15667
+ console.error('Failed to load config:', error);
15668
+ }
15669
+ return null;
15670
+ }
15671
+ /**
15672
+ * Get current configuration
15673
+ */
15674
+ getConfig() {
15675
+ return this.config;
15676
+ }
15677
+ /**
15678
+ * Get instance role
15679
+ */
15680
+ getRole() {
15681
+ if (!this.role) {
15682
+ throw new Error('Role not initialized');
15683
+ }
15684
+ return this.role;
15685
+ }
15686
+ /**
15687
+ * Get instance ID
15688
+ */
15689
+ getInstanceId() {
15690
+ return this.instanceId;
15691
+ }
15692
+ /**
15693
+ * Set config update callback
15694
+ */
15695
+ setOnConfigUpdate(callback) {
15696
+ this.onConfigUpdate = callback;
15697
+ }
15698
+ /**
15699
+ * Get all active instances of a specific role
15700
+ */
15701
+ getInstancesByRole(role) {
15702
+ if (!this.config)
15703
+ return [];
15704
+ return Object.entries(this.config.instances)
15705
+ .filter(([_, instance]) => instance.role === role &&
15706
+ this.isInstanceAlive(instance))
15707
+ .map(([_, instance]) => instance);
15708
+ }
15709
+ /**
15710
+ * Update instance metrics
15711
+ */
15712
+ async updateMetrics(metrics) {
15713
+ if (!this.config || !this.config.instances[this.instanceId])
15714
+ return;
15715
+ this.config.instances[this.instanceId].metrics = {
15716
+ ...this.config.instances[this.instanceId].metrics,
15717
+ ...metrics
15718
+ };
15719
+ // Don't increment version for metric updates
15720
+ await this.storage.saveMetadata('_distributed_config', this.config);
15721
+ }
15722
+ /**
15723
+ * Cleanup resources
15724
+ */
15725
+ async cleanup() {
15726
+ // Stop timers
15727
+ if (this.heartbeatTimer) {
15728
+ clearInterval(this.heartbeatTimer);
15729
+ }
15730
+ if (this.configWatchTimer) {
15731
+ clearInterval(this.configWatchTimer);
15732
+ }
15733
+ // Mark instance as inactive
15734
+ if (this.config && this.config.instances[this.instanceId]) {
15735
+ this.config.instances[this.instanceId].status = 'inactive';
15736
+ await this.saveConfig(this.config);
15737
+ }
15738
+ }
15739
+ }
15740
+
15741
+ /**
15742
+ * Cross-platform crypto utilities
15743
+ * Provides hashing functions that work in both Node.js and browser environments
15744
+ */
15745
+ /**
15746
+ * Simple string hash function that works in all environments
15747
+ * Uses djb2 algorithm - fast and good distribution
15748
+ * @param str - String to hash
15749
+ * @returns Positive integer hash
15750
+ */
15751
+ function hashString(str) {
15752
+ let hash = 5381;
15753
+ for (let i = 0; i < str.length; i++) {
15754
+ const char = str.charCodeAt(i);
15755
+ hash = ((hash << 5) + hash) + char; // hash * 33 + char
15756
+ }
15757
+ // Ensure positive number
15758
+ return Math.abs(hash);
15759
+ }
15760
+ /**
15761
+ * Generate a deterministic hash for partitioning
15762
+ * Uses the most appropriate algorithm for the environment
15763
+ * @param input - Input string to hash
15764
+ * @returns Positive integer hash suitable for modulo operations
15765
+ */
15766
+ function getPartitionHash(input) {
15767
+ // Use djb2 by default as it's fast and has good distribution
15768
+ // This ensures consistent partitioning across all environments
15769
+ return hashString(input);
15770
+ }
15771
+
15772
+ /**
15773
+ * Hash-based Partitioner
15774
+ * Provides deterministic partitioning for distributed writes
15775
+ */
15776
+ class HashPartitioner {
15777
+ constructor(config) {
15778
+ this.partitionPrefix = 'vectors/p';
15779
+ this.partitionCount = config.settings.partitionCount || 100;
15780
+ }
15781
+ /**
15782
+ * Get partition for a given vector ID using deterministic hashing
15783
+ * @param vectorId - The unique identifier of the vector
15784
+ * @returns The partition path
15785
+ */
15786
+ getPartition(vectorId) {
15787
+ const hash = this.hashString(vectorId);
15788
+ const partitionIndex = hash % this.partitionCount;
15789
+ return `${this.partitionPrefix}${partitionIndex.toString().padStart(3, '0')}`;
15790
+ }
15791
+ /**
15792
+ * Get partition with domain metadata (domain stored as metadata, not in path)
15793
+ * @param vectorId - The unique identifier of the vector
15794
+ * @param domain - The domain identifier (for metadata only)
15795
+ * @returns The partition path
15796
+ */
15797
+ getPartitionWithDomain(vectorId, domain) {
15798
+ // Domain doesn't affect partitioning - it's just metadata
15799
+ return this.getPartition(vectorId);
15800
+ }
15801
+ /**
15802
+ * Get all partition paths
15803
+ * @returns Array of all partition paths
15804
+ */
15805
+ getAllPartitions() {
15806
+ const partitions = [];
15807
+ for (let i = 0; i < this.partitionCount; i++) {
15808
+ partitions.push(`${this.partitionPrefix}${i.toString().padStart(3, '0')}`);
15809
+ }
15810
+ return partitions;
15811
+ }
15812
+ /**
15813
+ * Get partition index from partition path
15814
+ * @param partitionPath - The partition path
15815
+ * @returns The partition index
15816
+ */
15817
+ getPartitionIndex(partitionPath) {
15818
+ const match = partitionPath.match(/p(\d+)$/);
15819
+ if (match) {
15820
+ return parseInt(match[1], 10);
15821
+ }
15822
+ throw new Error(`Invalid partition path: ${partitionPath}`);
15823
+ }
15824
+ /**
15825
+ * Hash a string to a number for consistent partitioning
15826
+ * @param str - The string to hash
15827
+ * @returns A positive integer hash
15828
+ */
15829
+ hashString(str) {
15830
+ // Use our cross-platform hash function
15831
+ return getPartitionHash(str);
15832
+ }
15833
+ /**
15834
+ * Get partitions for batch operations
15835
+ * Groups vector IDs by their target partition
15836
+ * @param vectorIds - Array of vector IDs
15837
+ * @returns Map of partition to vector IDs
15838
+ */
15839
+ getPartitionsForBatch(vectorIds) {
15840
+ const partitionMap = new Map();
15841
+ for (const id of vectorIds) {
15842
+ const partition = this.getPartition(id);
15843
+ if (!partitionMap.has(partition)) {
15844
+ partitionMap.set(partition, []);
15845
+ }
15846
+ partitionMap.get(partition).push(id);
15847
+ }
15848
+ return partitionMap;
15849
+ }
15850
+ }
15851
+
15852
+ /**
15853
+ * Operational Modes for Distributed Brainy
15854
+ * Defines different modes with optimized caching strategies
15855
+ */
15856
+ /**
15857
+ * Base operational mode
15858
+ */
15859
+ class BaseOperationalMode {
15860
+ /**
15861
+ * Validate operation is allowed in this mode
15862
+ */
15863
+ validateOperation(operation) {
15864
+ switch (operation) {
15865
+ case 'read':
15866
+ if (!this.canRead) {
15867
+ throw new Error('Read operations are not allowed in write-only mode');
15868
+ }
15869
+ break;
15870
+ case 'write':
15871
+ if (!this.canWrite) {
15872
+ throw new Error('Write operations are not allowed in read-only mode');
15873
+ }
15874
+ break;
15875
+ case 'delete':
15876
+ if (!this.canDelete) {
15877
+ throw new Error('Delete operations are not allowed in this mode');
15878
+ }
15879
+ break;
15880
+ }
15881
+ }
15882
+ }
15883
+ /**
15884
+ * Read-only mode optimized for query performance
15885
+ */
15886
+ class ReaderMode extends BaseOperationalMode {
15887
+ constructor() {
15888
+ super(...arguments);
15889
+ this.canRead = true;
15890
+ this.canWrite = false;
15891
+ this.canDelete = false;
15892
+ this.cacheStrategy = {
15893
+ hotCacheRatio: 0.8, // 80% of memory for read cache
15894
+ prefetchAggressive: true, // Aggressively prefetch related vectors
15895
+ ttl: 3600000, // 1 hour cache TTL
15896
+ compressionEnabled: true, // Trade CPU for more cache capacity
15897
+ writeBufferSize: 0, // No write buffer needed
15898
+ batchWrites: false, // No writes
15899
+ adaptive: true // Adapt to query patterns
15900
+ };
15901
+ }
15902
+ /**
15903
+ * Get optimized cache configuration for readers
15904
+ */
15905
+ getCacheConfig() {
15906
+ return {
15907
+ hotCacheMaxSize: 1000000, // Large hot cache
15908
+ hotCacheEvictionThreshold: 0.9, // Keep cache full
15909
+ warmCacheTTL: 3600000, // 1 hour warm cache
15910
+ batchSize: 100, // Large batch reads
15911
+ autoTune: true, // Auto-tune for read patterns
15912
+ autoTuneInterval: 60000, // Tune every minute
15913
+ readOnly: true // Enable read-only optimizations
15914
+ };
15915
+ }
15916
+ }
15917
+ /**
15918
+ * Write-only mode optimized for ingestion
15919
+ */
15920
+ class WriterMode extends BaseOperationalMode {
15921
+ constructor() {
15922
+ super(...arguments);
15923
+ this.canRead = false;
15924
+ this.canWrite = true;
15925
+ this.canDelete = true;
15926
+ this.cacheStrategy = {
15927
+ hotCacheRatio: 0.2, // Only 20% for cache, rest for write buffer
15928
+ prefetchAggressive: false, // No prefetching needed
15929
+ ttl: 60000, // Short TTL (1 minute)
15930
+ compressionEnabled: false, // Speed over memory efficiency
15931
+ writeBufferSize: 10000, // Large write buffer for batching
15932
+ batchWrites: true, // Enable write batching
15933
+ adaptive: false // Fixed strategy for consistent writes
15934
+ };
15935
+ }
15936
+ /**
15937
+ * Get optimized cache configuration for writers
15938
+ */
15939
+ getCacheConfig() {
15940
+ return {
15941
+ hotCacheMaxSize: 100000, // Small hot cache
15942
+ hotCacheEvictionThreshold: 0.5, // Aggressive eviction
15943
+ warmCacheTTL: 60000, // 1 minute warm cache
15944
+ batchSize: 1000, // Large batch writes
15945
+ autoTune: false, // Fixed configuration
15946
+ writeOnly: true // Enable write-only optimizations
15947
+ };
15948
+ }
15949
+ }
15950
+ /**
15951
+ * Hybrid mode that can both read and write
15952
+ */
15953
+ class HybridMode extends BaseOperationalMode {
15954
+ constructor() {
15955
+ super(...arguments);
15956
+ this.canRead = true;
15957
+ this.canWrite = true;
15958
+ this.canDelete = true;
15959
+ this.cacheStrategy = {
15960
+ hotCacheRatio: 0.5, // Balanced cache/buffer allocation
15961
+ prefetchAggressive: false, // Moderate prefetching
15962
+ ttl: 600000, // 10 minute TTL
15963
+ compressionEnabled: true, // Compress when beneficial
15964
+ writeBufferSize: 5000, // Moderate write buffer
15965
+ batchWrites: true, // Batch writes when possible
15966
+ adaptive: true // Adapt to workload mix
15967
+ };
15968
+ this.readWriteRatio = 0.5; // Track read/write ratio
15969
+ }
15970
+ /**
15971
+ * Get balanced cache configuration
15972
+ */
15973
+ getCacheConfig() {
15974
+ return {
15975
+ hotCacheMaxSize: 500000, // Medium cache size
15976
+ hotCacheEvictionThreshold: 0.7, // Balanced eviction
15977
+ warmCacheTTL: 600000, // 10 minute warm cache
15978
+ batchSize: 500, // Medium batch size
15979
+ autoTune: true, // Auto-tune based on workload
15980
+ autoTuneInterval: 300000 // Tune every 5 minutes
15981
+ };
15982
+ }
15983
+ /**
15984
+ * Update cache strategy based on workload
15985
+ * @param readCount - Number of recent reads
15986
+ * @param writeCount - Number of recent writes
15987
+ */
15988
+ updateWorkloadBalance(readCount, writeCount) {
15989
+ const total = readCount + writeCount;
15990
+ if (total === 0)
15991
+ return;
15992
+ this.readWriteRatio = readCount / total;
15993
+ // Adjust cache strategy based on workload
15994
+ if (this.readWriteRatio > 0.8) {
15995
+ // Read-heavy workload
15996
+ this.cacheStrategy.hotCacheRatio = 0.7;
15997
+ this.cacheStrategy.prefetchAggressive = true;
15998
+ this.cacheStrategy.writeBufferSize = 2000;
15999
+ }
16000
+ else if (this.readWriteRatio < 0.2) {
16001
+ // Write-heavy workload
16002
+ this.cacheStrategy.hotCacheRatio = 0.3;
16003
+ this.cacheStrategy.prefetchAggressive = false;
16004
+ this.cacheStrategy.writeBufferSize = 8000;
16005
+ }
16006
+ else {
16007
+ // Balanced workload
16008
+ this.cacheStrategy.hotCacheRatio = 0.5;
16009
+ this.cacheStrategy.prefetchAggressive = false;
16010
+ this.cacheStrategy.writeBufferSize = 5000;
16011
+ }
16012
+ }
16013
+ }
16014
+ /**
16015
+ * Factory for creating operational modes
16016
+ */
16017
+ class OperationalModeFactory {
16018
+ /**
16019
+ * Create operational mode based on role
16020
+ * @param role - The instance role
16021
+ * @returns The appropriate operational mode
16022
+ */
16023
+ static createMode(role) {
16024
+ switch (role) {
16025
+ case 'reader':
16026
+ return new ReaderMode();
16027
+ case 'writer':
16028
+ return new WriterMode();
16029
+ case 'hybrid':
16030
+ return new HybridMode();
16031
+ default:
16032
+ // Default to reader for safety
16033
+ return new ReaderMode();
16034
+ }
16035
+ }
16036
+ /**
16037
+ * Create mode with custom cache strategy
16038
+ * @param role - The instance role
16039
+ * @param customStrategy - Custom cache strategy overrides
16040
+ * @returns The operational mode with custom strategy
16041
+ */
16042
+ static createModeWithStrategy(role, customStrategy) {
16043
+ const mode = this.createMode(role);
16044
+ // Apply custom strategy overrides
16045
+ mode.cacheStrategy = {
16046
+ ...mode.cacheStrategy,
16047
+ ...customStrategy
16048
+ };
16049
+ return mode;
16050
+ }
16051
+ }
16052
+
16053
+ /**
16054
+ * Domain Detector
16055
+ * Automatically detects and manages data domains for logical separation
16056
+ */
16057
+ class DomainDetector {
16058
+ constructor() {
16059
+ this.domainPatterns = [
16060
+ {
16061
+ domain: 'medical',
16062
+ patterns: {
16063
+ fields: ['symptoms', 'diagnosis', 'treatment', 'medication', 'patient'],
16064
+ keywords: ['medical', 'health', 'disease', 'symptom', 'treatment', 'doctor', 'patient']
16065
+ },
16066
+ priority: 1
16067
+ },
16068
+ {
16069
+ domain: 'legal',
16070
+ patterns: {
16071
+ fields: ['contract', 'clause', 'litigation', 'statute', 'jurisdiction'],
16072
+ keywords: ['legal', 'law', 'contract', 'court', 'attorney', 'litigation', 'statute']
16073
+ },
16074
+ priority: 1
16075
+ },
16076
+ {
16077
+ domain: 'product',
16078
+ patterns: {
16079
+ fields: ['price', 'sku', 'inventory', 'category', 'brand'],
16080
+ keywords: ['product', 'price', 'sale', 'inventory', 'catalog', 'item', 'sku']
16081
+ },
16082
+ priority: 1
16083
+ },
16084
+ {
16085
+ domain: 'customer',
16086
+ patterns: {
16087
+ fields: ['customerId', 'email', 'phone', 'address', 'orders'],
16088
+ keywords: ['customer', 'client', 'user', 'account', 'profile', 'contact']
16089
+ },
16090
+ priority: 1
16091
+ },
16092
+ {
16093
+ domain: 'financial',
16094
+ patterns: {
16095
+ fields: ['amount', 'currency', 'transaction', 'balance', 'account'],
16096
+ keywords: ['financial', 'money', 'payment', 'transaction', 'bank', 'credit', 'debit']
16097
+ },
16098
+ priority: 1
16099
+ },
16100
+ {
16101
+ domain: 'technical',
16102
+ patterns: {
16103
+ fields: ['code', 'function', 'error', 'stack', 'api'],
16104
+ keywords: ['code', 'software', 'api', 'error', 'debug', 'function', 'class', 'method']
16105
+ },
16106
+ priority: 2
16107
+ }
16108
+ ];
16109
+ this.customPatterns = [];
16110
+ this.domainStats = new Map();
16111
+ }
16112
+ /**
16113
+ * Detect domain from data object
16114
+ * @param data - The data object to analyze
16115
+ * @returns The detected domain and metadata
16116
+ */
16117
+ detectDomain(data) {
16118
+ if (!data || typeof data !== 'object') {
16119
+ return { domain: 'general' };
16120
+ }
16121
+ // Check for explicit domain field
16122
+ if (data.domain && typeof data.domain === 'string') {
16123
+ this.updateStats(data.domain);
16124
+ return {
16125
+ domain: data.domain,
16126
+ domainMetadata: this.extractDomainMetadata(data, data.domain)
16127
+ };
16128
+ }
16129
+ // Score each domain pattern
16130
+ const scores = new Map();
16131
+ // Check custom patterns first (higher priority)
16132
+ for (const pattern of this.customPatterns) {
16133
+ const score = this.scorePattern(data, pattern);
16134
+ if (score > 0) {
16135
+ scores.set(pattern.domain, score * (pattern.priority || 1));
16136
+ }
16137
+ }
16138
+ // Check default patterns
16139
+ for (const pattern of this.domainPatterns) {
16140
+ const score = this.scorePattern(data, pattern);
16141
+ if (score > 0) {
16142
+ const currentScore = scores.get(pattern.domain) || 0;
16143
+ scores.set(pattern.domain, currentScore + score * (pattern.priority || 1));
16144
+ }
16145
+ }
16146
+ // Find highest scoring domain
16147
+ let bestDomain = 'general';
16148
+ let bestScore = 0;
16149
+ for (const [domain, score] of scores.entries()) {
16150
+ if (score > bestScore) {
16151
+ bestDomain = domain;
16152
+ bestScore = score;
16153
+ }
16154
+ }
16155
+ this.updateStats(bestDomain);
16156
+ return {
16157
+ domain: bestDomain,
16158
+ domainMetadata: this.extractDomainMetadata(data, bestDomain)
16159
+ };
16160
+ }
16161
+ /**
16162
+ * Score a data object against a domain pattern
16163
+ */
16164
+ scorePattern(data, pattern) {
16165
+ let score = 0;
16166
+ // Check field matches
16167
+ if (pattern.patterns.fields) {
16168
+ const dataKeys = Object.keys(data);
16169
+ for (const field of pattern.patterns.fields) {
16170
+ if (dataKeys.some(key => key.toLowerCase().includes(field.toLowerCase()))) {
16171
+ score += 2; // Field match is strong signal
16172
+ }
16173
+ }
16174
+ }
16175
+ // Check keyword matches in values
16176
+ if (pattern.patterns.keywords) {
16177
+ const dataStr = JSON.stringify(data).toLowerCase();
16178
+ for (const keyword of pattern.patterns.keywords) {
16179
+ if (dataStr.includes(keyword.toLowerCase())) {
16180
+ score += 1;
16181
+ }
16182
+ }
16183
+ }
16184
+ // Check regex patterns
16185
+ if (pattern.patterns.regex) {
16186
+ const dataStr = JSON.stringify(data);
16187
+ if (pattern.patterns.regex.test(dataStr)) {
16188
+ score += 3; // Regex match is very specific
16189
+ }
16190
+ }
16191
+ return score;
16192
+ }
16193
+ /**
16194
+ * Extract domain-specific metadata
16195
+ */
16196
+ extractDomainMetadata(data, domain) {
16197
+ const metadata = {};
16198
+ switch (domain) {
16199
+ case 'medical':
16200
+ if (data.patientId)
16201
+ metadata.patientId = data.patientId;
16202
+ if (data.condition)
16203
+ metadata.condition = data.condition;
16204
+ if (data.severity)
16205
+ metadata.severity = data.severity;
16206
+ break;
16207
+ case 'legal':
16208
+ if (data.caseId)
16209
+ metadata.caseId = data.caseId;
16210
+ if (data.jurisdiction)
16211
+ metadata.jurisdiction = data.jurisdiction;
16212
+ if (data.documentType)
16213
+ metadata.documentType = data.documentType;
16214
+ break;
16215
+ case 'product':
16216
+ if (data.sku)
16217
+ metadata.sku = data.sku;
16218
+ if (data.category)
16219
+ metadata.category = data.category;
16220
+ if (data.brand)
16221
+ metadata.brand = data.brand;
16222
+ if (data.price)
16223
+ metadata.priceRange = this.getPriceRange(data.price);
16224
+ break;
16225
+ case 'customer':
16226
+ if (data.customerId)
16227
+ metadata.customerId = data.customerId;
16228
+ if (data.segment)
16229
+ metadata.segment = data.segment;
16230
+ if (data.lifetime_value)
16231
+ metadata.valueCategory = this.getValueCategory(data.lifetime_value);
16232
+ break;
16233
+ case 'financial':
16234
+ if (data.accountId)
16235
+ metadata.accountId = data.accountId;
16236
+ if (data.transactionType)
16237
+ metadata.transactionType = data.transactionType;
16238
+ if (data.amount)
16239
+ metadata.amountRange = this.getAmountRange(data.amount);
16240
+ break;
16241
+ case 'technical':
16242
+ if (data.service)
16243
+ metadata.service = data.service;
16244
+ if (data.environment)
16245
+ metadata.environment = data.environment;
16246
+ if (data.severity)
16247
+ metadata.severity = data.severity;
16248
+ break;
16249
+ }
16250
+ // Add detection confidence
16251
+ metadata.detectionConfidence = this.calculateConfidence(data, domain);
16252
+ return metadata;
16253
+ }
16254
+ /**
16255
+ * Calculate detection confidence
16256
+ */
16257
+ calculateConfidence(data, domain) {
16258
+ // If domain was explicitly specified
16259
+ if (data.domain === domain)
16260
+ return 'high';
16261
+ // Check how many patterns matched
16262
+ const pattern = [...this.customPatterns, ...this.domainPatterns]
16263
+ .find(p => p.domain === domain);
16264
+ if (!pattern)
16265
+ return 'low';
16266
+ const score = this.scorePattern(data, pattern);
16267
+ if (score >= 5)
16268
+ return 'high';
16269
+ if (score >= 2)
16270
+ return 'medium';
16271
+ return 'low';
16272
+ }
16273
+ /**
16274
+ * Categorize price ranges
16275
+ */
16276
+ getPriceRange(price) {
16277
+ if (price < 10)
16278
+ return 'low';
16279
+ if (price < 100)
16280
+ return 'medium';
16281
+ if (price < 1000)
16282
+ return 'high';
16283
+ return 'premium';
16284
+ }
16285
+ /**
16286
+ * Categorize customer value
16287
+ */
16288
+ getValueCategory(value) {
16289
+ if (value < 100)
16290
+ return 'low';
16291
+ if (value < 1000)
16292
+ return 'medium';
16293
+ if (value < 10000)
16294
+ return 'high';
16295
+ return 'vip';
16296
+ }
16297
+ /**
16298
+ * Categorize amount ranges
16299
+ */
16300
+ getAmountRange(amount) {
16301
+ if (amount < 100)
16302
+ return 'micro';
16303
+ if (amount < 1000)
16304
+ return 'small';
16305
+ if (amount < 10000)
16306
+ return 'medium';
16307
+ if (amount < 100000)
16308
+ return 'large';
16309
+ return 'enterprise';
16310
+ }
16311
+ /**
16312
+ * Add custom domain pattern
16313
+ * @param pattern - Custom domain pattern to add
16314
+ */
16315
+ addCustomPattern(pattern) {
16316
+ // Remove existing pattern for same domain if exists
16317
+ this.customPatterns = this.customPatterns.filter(p => p.domain !== pattern.domain);
16318
+ this.customPatterns.push(pattern);
16319
+ }
16320
+ /**
16321
+ * Remove custom domain pattern
16322
+ * @param domain - Domain to remove pattern for
16323
+ */
16324
+ removeCustomPattern(domain) {
16325
+ this.customPatterns = this.customPatterns.filter(p => p.domain !== domain);
16326
+ }
16327
+ /**
16328
+ * Update domain statistics
16329
+ */
16330
+ updateStats(domain) {
16331
+ const count = this.domainStats.get(domain) || 0;
16332
+ this.domainStats.set(domain, count + 1);
16333
+ }
16334
+ /**
16335
+ * Get domain statistics
16336
+ * @returns Map of domain to count
16337
+ */
16338
+ getDomainStats() {
16339
+ return new Map(this.domainStats);
16340
+ }
16341
+ /**
16342
+ * Clear domain statistics
16343
+ */
16344
+ clearStats() {
16345
+ this.domainStats.clear();
16346
+ }
16347
+ /**
16348
+ * Get all configured domains
16349
+ * @returns Array of domain names
16350
+ */
16351
+ getConfiguredDomains() {
16352
+ const domains = new Set();
16353
+ for (const pattern of [...this.domainPatterns, ...this.customPatterns]) {
16354
+ domains.add(pattern.domain);
16355
+ }
16356
+ return Array.from(domains).sort();
16357
+ }
16358
+ }
16359
+
16360
+ /**
16361
+ * Health Monitor
16362
+ * Monitors and reports instance health in distributed deployments
16363
+ */
16364
+ class HealthMonitor {
16365
+ constructor(configManager) {
16366
+ this.requestCount = 0;
16367
+ this.errorCount = 0;
16368
+ this.totalLatency = 0;
16369
+ this.cacheHits = 0;
16370
+ this.cacheMisses = 0;
16371
+ this.vectorCount = 0;
16372
+ this.checkInterval = 30000; // 30 seconds
16373
+ this.metricsWindow = []; // Sliding window for RPS calculation
16374
+ this.latencyWindow = []; // Sliding window for latency
16375
+ this.windowSize = 60000; // 1 minute window
16376
+ this.configManager = configManager;
16377
+ this.startTime = Date.now();
16378
+ }
16379
+ /**
16380
+ * Start health monitoring
16381
+ */
16382
+ start() {
16383
+ // Initial health update
16384
+ this.updateHealth();
16385
+ // Schedule periodic health checks
16386
+ this.healthCheckTimer = setInterval(() => {
16387
+ this.updateHealth();
16388
+ }, this.checkInterval);
16389
+ }
16390
+ /**
16391
+ * Stop health monitoring
16392
+ */
16393
+ stop() {
16394
+ if (this.healthCheckTimer) {
16395
+ clearInterval(this.healthCheckTimer);
16396
+ this.healthCheckTimer = undefined;
16397
+ }
16398
+ }
16399
+ /**
16400
+ * Update health status and metrics
16401
+ */
16402
+ async updateHealth() {
16403
+ const metrics = this.collectMetrics();
16404
+ // Update config with latest metrics
16405
+ await this.configManager.updateMetrics({
16406
+ vectorCount: metrics.vectorCount,
16407
+ cacheHitRate: metrics.cacheHitRate,
16408
+ memoryUsage: metrics.memoryUsage,
16409
+ cpuUsage: metrics.cpuUsage
16410
+ });
16411
+ // Clean sliding windows
16412
+ this.cleanWindows();
16413
+ }
16414
+ /**
16415
+ * Collect current metrics
16416
+ */
16417
+ collectMetrics() {
16418
+ const memUsage = process.memoryUsage();
16419
+ return {
16420
+ vectorCount: this.vectorCount,
16421
+ cacheHitRate: this.calculateCacheHitRate(),
16422
+ memoryUsage: memUsage.heapUsed,
16423
+ cpuUsage: this.getCPUUsage(),
16424
+ requestsPerSecond: this.calculateRPS(),
16425
+ averageLatency: this.calculateAverageLatency(),
16426
+ errorRate: this.calculateErrorRate()
16427
+ };
16428
+ }
16429
+ /**
16430
+ * Calculate cache hit rate
16431
+ */
16432
+ calculateCacheHitRate() {
16433
+ const total = this.cacheHits + this.cacheMisses;
16434
+ if (total === 0)
16435
+ return 0;
16436
+ return this.cacheHits / total;
16437
+ }
16438
+ /**
16439
+ * Calculate requests per second
16440
+ */
16441
+ calculateRPS() {
16442
+ const now = Date.now();
16443
+ const recentRequests = this.metricsWindow.filter(timestamp => now - timestamp < this.windowSize);
16444
+ return recentRequests.length / (this.windowSize / 1000);
16445
+ }
16446
+ /**
16447
+ * Calculate average latency
16448
+ */
16449
+ calculateAverageLatency() {
16450
+ if (this.latencyWindow.length === 0)
16451
+ return 0;
16452
+ const sum = this.latencyWindow.reduce((a, b) => a + b, 0);
16453
+ return sum / this.latencyWindow.length;
16454
+ }
16455
+ /**
16456
+ * Calculate error rate
16457
+ */
16458
+ calculateErrorRate() {
16459
+ if (this.requestCount === 0)
16460
+ return 0;
16461
+ return this.errorCount / this.requestCount;
16462
+ }
16463
+ /**
16464
+ * Get CPU usage (simplified)
16465
+ */
16466
+ getCPUUsage() {
16467
+ // Simplified CPU usage based on process time
16468
+ const usage = process.cpuUsage();
16469
+ const total = usage.user + usage.system;
16470
+ const seconds = (Date.now() - this.startTime) / 1000;
16471
+ return Math.min(100, (total / 1000000 / seconds) * 100);
16472
+ }
16473
+ /**
16474
+ * Clean old entries from sliding windows
16475
+ */
16476
+ cleanWindows() {
16477
+ const now = Date.now();
16478
+ const cutoff = now - this.windowSize;
16479
+ this.metricsWindow = this.metricsWindow.filter(t => t > cutoff);
16480
+ // Keep only recent latency measurements
16481
+ if (this.latencyWindow.length > 100) {
16482
+ this.latencyWindow = this.latencyWindow.slice(-100);
16483
+ }
16484
+ }
16485
+ /**
16486
+ * Record a request
16487
+ * @param latency - Request latency in milliseconds
16488
+ * @param error - Whether the request resulted in an error
16489
+ */
16490
+ recordRequest(latency, error = false) {
16491
+ this.requestCount++;
16492
+ this.metricsWindow.push(Date.now());
16493
+ this.latencyWindow.push(latency);
16494
+ if (error) {
16495
+ this.errorCount++;
16496
+ }
16497
+ }
16498
+ /**
16499
+ * Record cache access
16500
+ * @param hit - Whether it was a cache hit
16501
+ */
16502
+ recordCacheAccess(hit) {
16503
+ if (hit) {
16504
+ this.cacheHits++;
16505
+ }
16506
+ else {
16507
+ this.cacheMisses++;
16508
+ }
16509
+ }
16510
+ /**
16511
+ * Update vector count
16512
+ * @param count - New vector count
16513
+ */
16514
+ updateVectorCount(count) {
16515
+ this.vectorCount = count;
16516
+ }
16517
+ /**
16518
+ * Get current health status
16519
+ * @returns Health status object
16520
+ */
16521
+ getHealthStatus() {
16522
+ const metrics = this.collectMetrics();
16523
+ const uptime = Date.now() - this.startTime;
16524
+ const warnings = [];
16525
+ const errors = [];
16526
+ // Check for warnings
16527
+ if (metrics.memoryUsage > 1024 * 1024 * 1024) { // > 1GB
16528
+ warnings.push('High memory usage detected');
16529
+ }
16530
+ if (metrics.cacheHitRate < 0.5) {
16531
+ warnings.push('Low cache hit rate');
16532
+ }
16533
+ if (metrics.errorRate && metrics.errorRate > 0.05) {
16534
+ warnings.push('High error rate detected');
16535
+ }
16536
+ if (metrics.averageLatency && metrics.averageLatency > 1000) {
16537
+ warnings.push('High latency detected');
16538
+ }
16539
+ // Check for errors
16540
+ if (metrics.memoryUsage > 2 * 1024 * 1024 * 1024) { // > 2GB
16541
+ errors.push('Critical memory usage');
16542
+ }
16543
+ if (metrics.errorRate && metrics.errorRate > 0.2) {
16544
+ errors.push('Critical error rate');
16545
+ }
16546
+ // Determine overall status
16547
+ let status = 'healthy';
16548
+ if (errors.length > 0) {
16549
+ status = 'unhealthy';
16550
+ }
16551
+ else if (warnings.length > 0) {
16552
+ status = 'degraded';
16553
+ }
16554
+ return {
16555
+ status,
16556
+ instanceId: this.configManager.getInstanceId(),
16557
+ role: this.configManager.getRole(),
16558
+ uptime,
16559
+ lastCheck: new Date().toISOString(),
16560
+ metrics,
16561
+ warnings: warnings.length > 0 ? warnings : undefined,
16562
+ errors: errors.length > 0 ? errors : undefined
16563
+ };
16564
+ }
16565
+ /**
16566
+ * Get health check endpoint data
16567
+ * @returns JSON-serializable health data
16568
+ */
16569
+ getHealthEndpointData() {
16570
+ const status = this.getHealthStatus();
16571
+ return {
16572
+ status: status.status,
16573
+ instanceId: status.instanceId,
16574
+ role: status.role,
16575
+ uptime: Math.floor(status.uptime / 1000), // Convert to seconds
16576
+ lastCheck: status.lastCheck,
16577
+ metrics: {
16578
+ vectorCount: status.metrics.vectorCount,
16579
+ cacheHitRate: Math.round(status.metrics.cacheHitRate * 100) / 100,
16580
+ memoryUsageMB: Math.round(status.metrics.memoryUsage / 1024 / 1024),
16581
+ cpuUsagePercent: Math.round(status.metrics.cpuUsage || 0),
16582
+ requestsPerSecond: Math.round(status.metrics.requestsPerSecond || 0),
16583
+ averageLatencyMs: Math.round(status.metrics.averageLatency || 0),
16584
+ errorRate: Math.round((status.metrics.errorRate || 0) * 100) / 100
16585
+ },
16586
+ warnings: status.warnings,
16587
+ errors: status.errors
16588
+ };
16589
+ }
16590
+ /**
16591
+ * Reset metrics (useful for testing)
16592
+ */
16593
+ resetMetrics() {
16594
+ this.requestCount = 0;
16595
+ this.errorCount = 0;
16596
+ this.totalLatency = 0;
16597
+ this.cacheHits = 0;
16598
+ this.cacheMisses = 0;
16599
+ this.metricsWindow = [];
16600
+ this.latencyWindow = [];
16601
+ }
16602
+ }
16603
+
15420
16604
  /**
15421
16605
  * BrainyData
15422
16606
  * Main class that provides the vector database functionality
@@ -15470,6 +16654,13 @@ class BrainyData {
15470
16654
  this.remoteServerConfig = null;
15471
16655
  this.serverSearchConduit = null;
15472
16656
  this.serverConnection = null;
16657
+ // Distributed mode properties
16658
+ this.distributedConfig = null;
16659
+ this.configManager = null;
16660
+ this.partitioner = null;
16661
+ this.operationalMode = null;
16662
+ this.domainDetector = null;
16663
+ this.healthMonitor = null;
15473
16664
  // Set dimensions to fixed value of 512 (Universal Sentence Encoder dimension)
15474
16665
  this._dimensions = 512;
15475
16666
  // Set distance function
@@ -15554,6 +16745,19 @@ class BrainyData {
15554
16745
  ...config.cache
15555
16746
  };
15556
16747
  }
16748
+ // Store distributed configuration
16749
+ if (config.distributed) {
16750
+ if (typeof config.distributed === 'boolean') {
16751
+ // Auto-mode enabled
16752
+ this.distributedConfig = {
16753
+ enabled: true
16754
+ };
16755
+ }
16756
+ else {
16757
+ // Explicit configuration
16758
+ this.distributedConfig = config.distributed;
16759
+ }
16760
+ }
15557
16761
  }
15558
16762
  /**
15559
16763
  * Check if the database is in read-only mode and throw an error if it is
@@ -15941,6 +17145,10 @@ class BrainyData {
15941
17145
  }
15942
17146
  // Initialize storage
15943
17147
  await this.storage.init();
17148
+ // Initialize distributed mode if configured
17149
+ if (this.distributedConfig) {
17150
+ await this.initializeDistributedMode();
17151
+ }
15944
17152
  // If using optimized index, set the storage adapter
15945
17153
  if (this.useOptimizedIndex && this.index instanceof HNSWIndexOptimized) {
15946
17154
  this.index.setStorage(this.storage);
@@ -16000,6 +17208,97 @@ class BrainyData {
16000
17208
  throw new Error(`Failed to initialize BrainyData: ${error}`);
16001
17209
  }
16002
17210
  }
17211
+ /**
17212
+ * Initialize distributed mode
17213
+ * Sets up configuration management, partitioning, and operational modes
17214
+ */
17215
+ async initializeDistributedMode() {
17216
+ if (!this.storage) {
17217
+ throw new Error('Storage must be initialized before distributed mode');
17218
+ }
17219
+ // Create configuration manager with mode hints
17220
+ this.configManager = new DistributedConfigManager(this.storage, this.distributedConfig || undefined, { readOnly: this.readOnly, writeOnly: this.writeOnly });
17221
+ // Initialize configuration
17222
+ const sharedConfig = await this.configManager.initialize();
17223
+ // Create partitioner based on strategy
17224
+ if (sharedConfig.settings.partitionStrategy === 'hash') {
17225
+ this.partitioner = new HashPartitioner(sharedConfig);
17226
+ }
17227
+ else {
17228
+ // Default to hash partitioner for now
17229
+ this.partitioner = new HashPartitioner(sharedConfig);
17230
+ }
17231
+ // Create operational mode based on role
17232
+ const role = this.configManager.getRole();
17233
+ this.operationalMode = OperationalModeFactory.createMode(role);
17234
+ // Validate that role matches the configured mode
17235
+ // Don't override explicitly set readOnly/writeOnly
17236
+ if (role === 'reader' && !this.readOnly) {
17237
+ console.warn('Distributed role is "reader" but readOnly is not set. Setting readOnly=true for consistency.');
17238
+ this.readOnly = true;
17239
+ this.writeOnly = false;
17240
+ }
17241
+ else if (role === 'writer' && !this.writeOnly) {
17242
+ console.warn('Distributed role is "writer" but writeOnly is not set. Setting writeOnly=true for consistency.');
17243
+ this.readOnly = false;
17244
+ this.writeOnly = true;
17245
+ }
17246
+ else if (role === 'hybrid' && (this.readOnly || this.writeOnly)) {
17247
+ console.warn('Distributed role is "hybrid" but readOnly or writeOnly is set. Clearing both for hybrid mode.');
17248
+ this.readOnly = false;
17249
+ this.writeOnly = false;
17250
+ }
17251
+ // Apply cache configuration from operational mode
17252
+ const modeCache = this.operationalMode.cacheStrategy;
17253
+ if (modeCache) {
17254
+ this.cacheConfig = {
17255
+ ...this.cacheConfig,
17256
+ hotCacheMaxSize: modeCache.hotCacheRatio * 1000000, // Convert ratio to size
17257
+ hotCacheEvictionThreshold: modeCache.hotCacheRatio,
17258
+ warmCacheTTL: modeCache.ttl,
17259
+ batchSize: modeCache.writeBufferSize || 100
17260
+ };
17261
+ // Update storage cache config if it supports it
17262
+ if (this.storage && 'updateCacheConfig' in this.storage) {
17263
+ this.storage.updateCacheConfig(this.cacheConfig);
17264
+ }
17265
+ }
17266
+ // Initialize domain detector
17267
+ this.domainDetector = new DomainDetector();
17268
+ // Initialize health monitor
17269
+ this.healthMonitor = new HealthMonitor(this.configManager);
17270
+ this.healthMonitor.start();
17271
+ // Set up config update listener
17272
+ this.configManager.setOnConfigUpdate((config) => {
17273
+ this.handleDistributedConfigUpdate(config);
17274
+ });
17275
+ if (this.loggingConfig?.verbose) {
17276
+ console.log(`Distributed mode initialized as ${role} with ${sharedConfig.settings.partitionStrategy} partitioning`);
17277
+ }
17278
+ }
17279
+ /**
17280
+ * Handle distributed configuration updates
17281
+ */
17282
+ handleDistributedConfigUpdate(config) {
17283
+ // Update partitioner if needed
17284
+ if (this.partitioner && config.settings) {
17285
+ this.partitioner = new HashPartitioner(config);
17286
+ }
17287
+ // Log configuration update
17288
+ if (this.loggingConfig?.verbose) {
17289
+ console.log('Distributed configuration updated:', config.version);
17290
+ }
17291
+ }
17292
+ /**
17293
+ * Get distributed health status
17294
+ * @returns Health status if distributed mode is enabled
17295
+ */
17296
+ getHealthStatus() {
17297
+ if (this.healthMonitor) {
17298
+ return this.healthMonitor.getHealthEndpointData();
17299
+ }
17300
+ return null;
17301
+ }
16003
17302
  /**
16004
17303
  * Connect to a remote Brainy server for search operations
16005
17304
  * @param serverUrl WebSocket URL of the remote Brainy server
@@ -16216,6 +17515,33 @@ class BrainyData {
16216
17515
  if (metadata && typeof metadata === 'object') {
16217
17516
  // Always make a copy without adding the ID
16218
17517
  metadataToSave = { ...metadata };
17518
+ // Add domain metadata if distributed mode is enabled
17519
+ if (this.domainDetector) {
17520
+ // First check if domain is already in metadata
17521
+ if (metadataToSave.domain) {
17522
+ // Domain already specified, keep it
17523
+ const domainInfo = this.domainDetector.detectDomain(metadataToSave);
17524
+ if (domainInfo.domainMetadata) {
17525
+ metadataToSave.domainMetadata = domainInfo.domainMetadata;
17526
+ }
17527
+ }
17528
+ else {
17529
+ // Try to detect domain from the data
17530
+ const dataToAnalyze = Array.isArray(vectorOrData) ? metadata : vectorOrData;
17531
+ const domainInfo = this.domainDetector.detectDomain(dataToAnalyze);
17532
+ if (domainInfo.domain) {
17533
+ metadataToSave.domain = domainInfo.domain;
17534
+ if (domainInfo.domainMetadata) {
17535
+ metadataToSave.domainMetadata = domainInfo.domainMetadata;
17536
+ }
17537
+ }
17538
+ }
17539
+ }
17540
+ // Add partition information if distributed mode is enabled
17541
+ if (this.partitioner) {
17542
+ const partition = this.partitioner.getPartition(id);
17543
+ metadataToSave.partition = partition;
17544
+ }
16219
17545
  }
16220
17546
  await this.storage.saveMetadata(id, metadataToSave);
16221
17547
  // Track metadata statistics
@@ -16225,6 +17551,11 @@ class BrainyData {
16225
17551
  }
16226
17552
  // Update HNSW index size (excluding verbs)
16227
17553
  await this.storage.updateHnswIndexSize(await this.getNounCount());
17554
+ // Update health metrics if in distributed mode
17555
+ if (this.healthMonitor) {
17556
+ const vectorCount = await this.getNounCount();
17557
+ this.healthMonitor.updateVectorCount(vectorCount);
17558
+ }
16228
17559
  // If addToRemote is true and we're connected to a remote server, add to remote as well
16229
17560
  if (options.addToRemote && this.isConnectedToRemoteServer()) {
16230
17561
  try {
@@ -16238,6 +17569,10 @@ class BrainyData {
16238
17569
  }
16239
17570
  catch (error) {
16240
17571
  console.error('Failed to add vector:', error);
17572
+ // Track error in health monitor
17573
+ if (this.healthMonitor) {
17574
+ this.healthMonitor.recordRequest(0, true);
17575
+ }
16241
17576
  throw new Error(`Failed to add vector: ${error}`);
16242
17577
  }
16243
17578
  }
@@ -16574,6 +17909,7 @@ class BrainyData {
16574
17909
  * @returns Array of search results
16575
17910
  */
16576
17911
  async search(queryVectorOrData, k = 10, options = {}) {
17912
+ const startTime = Date.now();
16577
17913
  // Validate input is not null or undefined
16578
17914
  if (queryVectorOrData === null || queryVectorOrData === undefined) {
16579
17915
  throw new Error('Query cannot be null or undefined');
@@ -16625,7 +17961,24 @@ class BrainyData {
16625
17961
  return this.searchCombined(queryVectorOrData, k, options);
16626
17962
  }
16627
17963
  // Default behavior (backward compatible): search locally
16628
- return this.searchLocal(queryVectorOrData, k, options);
17964
+ try {
17965
+ const results = await this.searchLocal(queryVectorOrData, k, options);
17966
+ // Track successful search in health monitor
17967
+ if (this.healthMonitor) {
17968
+ const latency = Date.now() - startTime;
17969
+ this.healthMonitor.recordRequest(latency, false);
17970
+ this.healthMonitor.recordCacheAccess(results.length > 0);
17971
+ }
17972
+ return results;
17973
+ }
17974
+ catch (error) {
17975
+ // Track error in health monitor
17976
+ if (this.healthMonitor) {
17977
+ const latency = Date.now() - startTime;
17978
+ this.healthMonitor.recordRequest(latency, true);
17979
+ }
17980
+ throw error;
17981
+ }
16629
17982
  }
16630
17983
  /**
16631
17984
  * Search the local database for similar vectors
@@ -16697,7 +18050,15 @@ class BrainyData {
16697
18050
  if (result.metadata && typeof result.metadata === 'object') {
16698
18051
  const metadata = result.metadata;
16699
18052
  // Exclude placeholder nouns from search results
16700
- return !metadata.isPlaceholder;
18053
+ if (metadata.isPlaceholder) {
18054
+ return false;
18055
+ }
18056
+ // Apply domain filter if specified
18057
+ if (options.filter?.domain) {
18058
+ if (metadata.domain !== options.filter.domain) {
18059
+ return false;
18060
+ }
18061
+ }
16701
18062
  }
16702
18063
  return true;
16703
18064
  });
@@ -18889,6 +20250,26 @@ class BrainyData {
18889
20250
  // Sort by score and limit to k results
18890
20251
  return allResults.sort((a, b) => b.score - a.score).slice(0, k);
18891
20252
  }
20253
+ /**
20254
+ * Cleanup distributed resources
20255
+ * Should be called when shutting down the instance
20256
+ */
20257
+ async cleanup() {
20258
+ // Stop real-time updates
20259
+ if (this.updateTimerId) {
20260
+ clearInterval(this.updateTimerId);
20261
+ this.updateTimerId = null;
20262
+ }
20263
+ // Clean up distributed mode resources
20264
+ if (this.healthMonitor) {
20265
+ this.healthMonitor.stop();
20266
+ }
20267
+ if (this.configManager) {
20268
+ await this.configManager.cleanup();
20269
+ }
20270
+ // Clean up worker pools
20271
+ await cleanupWorkerPools();
20272
+ }
18892
20273
  }
18893
20274
 
18894
20275
  /**