@soulcraft/brainy 0.32.0 → 0.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/unified.js CHANGED
@@ -4432,6 +4432,275 @@ async function getStatistics(instance, options = {}) {
4432
4432
  }
4433
4433
  }
4434
4434
 
4435
+ /**
4436
+ * Utility functions for processing JSON documents for vectorization and search
4437
+ */
4438
+ /**
4439
+ * Extracts text from a JSON object for vectorization
4440
+ * This function recursively processes the JSON object and extracts text from all fields
4441
+ * It can also prioritize specific fields if provided
4442
+ *
4443
+ * @param jsonObject The JSON object to extract text from
4444
+ * @param options Configuration options for text extraction
4445
+ * @returns A string containing the extracted text
4446
+ */
4447
+ function extractTextFromJson(jsonObject, options = {}) {
4448
+ // Set default options
4449
+ const { priorityFields = [], excludeFields = [], includeFieldNames = true, maxDepth = 5, currentDepth = 0, fieldPath = [] } = options;
4450
+ // If input is not an object or array, or we've reached max depth, return as string
4451
+ if (jsonObject === null ||
4452
+ jsonObject === undefined ||
4453
+ typeof jsonObject !== 'object' ||
4454
+ currentDepth >= maxDepth) {
4455
+ return String(jsonObject || '');
4456
+ }
4457
+ const extractedText = [];
4458
+ const priorityText = [];
4459
+ // Process arrays
4460
+ if (Array.isArray(jsonObject)) {
4461
+ for (let i = 0; i < jsonObject.length; i++) {
4462
+ const value = jsonObject[i];
4463
+ const newPath = [...fieldPath, i.toString()];
4464
+ // Recursively extract text from array items
4465
+ const itemText = extractTextFromJson(value, {
4466
+ priorityFields,
4467
+ excludeFields,
4468
+ includeFieldNames,
4469
+ maxDepth,
4470
+ currentDepth: currentDepth + 1,
4471
+ fieldPath: newPath
4472
+ });
4473
+ if (itemText) {
4474
+ extractedText.push(itemText);
4475
+ }
4476
+ }
4477
+ }
4478
+ // Process objects
4479
+ else {
4480
+ for (const [key, value] of Object.entries(jsonObject)) {
4481
+ // Skip excluded fields
4482
+ if (excludeFields.includes(key)) {
4483
+ continue;
4484
+ }
4485
+ const newPath = [...fieldPath, key];
4486
+ const fullPath = newPath.join('.');
4487
+ // Check if this is a priority field
4488
+ const isPriority = priorityFields.some(field => {
4489
+ // Exact match
4490
+ if (field === key)
4491
+ return true;
4492
+ // Path match
4493
+ if (field === fullPath)
4494
+ return true;
4495
+ // Wildcard match (e.g., "user.*" matches "user.name", "user.email", etc.)
4496
+ if (field.endsWith('.*') && fullPath.startsWith(field.slice(0, -2)))
4497
+ return true;
4498
+ return false;
4499
+ });
4500
+ // Get the field value as text
4501
+ let fieldText;
4502
+ if (typeof value === 'object' && value !== null) {
4503
+ // Recursively extract text from nested objects
4504
+ fieldText = extractTextFromJson(value, {
4505
+ priorityFields,
4506
+ excludeFields,
4507
+ includeFieldNames,
4508
+ maxDepth,
4509
+ currentDepth: currentDepth + 1,
4510
+ fieldPath: newPath
4511
+ });
4512
+ }
4513
+ else {
4514
+ fieldText = String(value || '');
4515
+ }
4516
+ // Add field name if requested
4517
+ if (includeFieldNames && fieldText) {
4518
+ fieldText = `${key}: ${fieldText}`;
4519
+ }
4520
+ // Add to appropriate collection
4521
+ if (fieldText) {
4522
+ if (isPriority) {
4523
+ priorityText.push(fieldText);
4524
+ }
4525
+ else {
4526
+ extractedText.push(fieldText);
4527
+ }
4528
+ }
4529
+ }
4530
+ }
4531
+ // Combine priority text (repeated for emphasis) and regular text
4532
+ return [...priorityText, ...priorityText, ...extractedText].join(' ');
4533
+ }
4534
+ /**
4535
+ * Prepares a JSON document for vectorization
4536
+ * This function extracts text from the JSON document and formats it for optimal vectorization
4537
+ *
4538
+ * @param jsonDocument The JSON document to prepare
4539
+ * @param options Configuration options for preparation
4540
+ * @returns A string ready for vectorization
4541
+ */
4542
+ function prepareJsonForVectorization(jsonDocument, options = {}) {
4543
+ // If input is a string, try to parse it as JSON
4544
+ let document = jsonDocument;
4545
+ if (typeof jsonDocument === 'string') {
4546
+ try {
4547
+ document = JSON.parse(jsonDocument);
4548
+ }
4549
+ catch (e) {
4550
+ // If parsing fails, treat it as a plain string
4551
+ return jsonDocument;
4552
+ }
4553
+ }
4554
+ // If not an object after parsing, return as is
4555
+ if (typeof document !== 'object' || document === null) {
4556
+ return String(document || '');
4557
+ }
4558
+ // Extract text from the document
4559
+ return extractTextFromJson(document, options);
4560
+ }
4561
+ /**
4562
+ * Extracts text from a specific field in a JSON document
4563
+ * This is useful for searching within specific fields
4564
+ *
4565
+ * @param jsonDocument The JSON document to extract from
4566
+ * @param fieldPath The path to the field (e.g., "user.name" or "addresses[0].city")
4567
+ * @returns The extracted text or empty string if field not found
4568
+ */
4569
+ function extractFieldFromJson(jsonDocument, fieldPath) {
4570
+ // If input is a string, try to parse it as JSON
4571
+ let document = jsonDocument;
4572
+ if (typeof jsonDocument === 'string') {
4573
+ try {
4574
+ document = JSON.parse(jsonDocument);
4575
+ }
4576
+ catch (e) {
4577
+ // If parsing fails, return empty string
4578
+ return '';
4579
+ }
4580
+ }
4581
+ // If not an object after parsing, return empty string
4582
+ if (typeof document !== 'object' || document === null) {
4583
+ return '';
4584
+ }
4585
+ // Parse the field path
4586
+ const parts = fieldPath.split('.');
4587
+ let current = document;
4588
+ // Navigate through the path
4589
+ for (const part of parts) {
4590
+ // Handle array indexing (e.g., "addresses[0]")
4591
+ const match = part.match(/^([^[]+)(?:\[(\d+)\])?$/);
4592
+ if (!match) {
4593
+ return '';
4594
+ }
4595
+ const [, key, indexStr] = match;
4596
+ // Move to the next level
4597
+ current = current[key];
4598
+ // If we have an array index, access that element
4599
+ if (indexStr !== undefined && Array.isArray(current)) {
4600
+ const index = parseInt(indexStr, 10);
4601
+ current = current[index];
4602
+ }
4603
+ // If we've reached a null or undefined value, return empty string
4604
+ if (current === null || current === undefined) {
4605
+ return '';
4606
+ }
4607
+ }
4608
+ // Convert the final value to string
4609
+ return typeof current === 'object'
4610
+ ? JSON.stringify(current)
4611
+ : String(current);
4612
+ }
4613
+
4614
+ /**
4615
+ * Utility functions for tracking and managing field names in JSON documents
4616
+ */
4617
+ /**
4618
+ * Extracts field names from a JSON document
4619
+ * @param jsonObject The JSON object to extract field names from
4620
+ * @param options Configuration options
4621
+ * @returns An array of field paths (e.g., "user.name", "addresses[0].city")
4622
+ */
4623
+ function extractFieldNamesFromJson(jsonObject, options = {}) {
4624
+ const { maxDepth = 5, currentDepth = 0, currentPath = '', fieldNames = new Set() } = options;
4625
+ if (jsonObject === null ||
4626
+ jsonObject === undefined ||
4627
+ typeof jsonObject !== 'object' ||
4628
+ currentDepth >= maxDepth) {
4629
+ return Array.from(fieldNames);
4630
+ }
4631
+ if (Array.isArray(jsonObject)) {
4632
+ // For arrays, we'll just check the first item to avoid explosion of paths
4633
+ if (jsonObject.length > 0) {
4634
+ const arrayPath = currentPath ? `${currentPath}[0]` : '[0]';
4635
+ extractFieldNamesFromJson(jsonObject[0], {
4636
+ maxDepth,
4637
+ currentDepth: currentDepth + 1,
4638
+ currentPath: arrayPath,
4639
+ fieldNames
4640
+ });
4641
+ }
4642
+ }
4643
+ else {
4644
+ // For objects, process each property
4645
+ for (const key of Object.keys(jsonObject)) {
4646
+ const value = jsonObject[key];
4647
+ const fieldPath = currentPath ? `${currentPath}.${key}` : key;
4648
+ // Add this field path
4649
+ fieldNames.add(fieldPath);
4650
+ // Recursively process nested objects
4651
+ if (typeof value === 'object' && value !== null) {
4652
+ extractFieldNamesFromJson(value, {
4653
+ maxDepth,
4654
+ currentDepth: currentDepth + 1,
4655
+ currentPath: fieldPath,
4656
+ fieldNames
4657
+ });
4658
+ }
4659
+ }
4660
+ }
4661
+ return Array.from(fieldNames);
4662
+ }
4663
+ /**
4664
+ * Maps field names to standard field names based on common patterns
4665
+ * @param fieldName The field name to map
4666
+ * @returns The standard field name if a match is found, or null if no match
4667
+ */
4668
+ function mapToStandardField(fieldName) {
4669
+ // Standard field mappings
4670
+ const standardMappings = {
4671
+ 'title': ['title', 'name', 'headline', 'subject'],
4672
+ 'description': ['description', 'summary', 'content', 'text', 'body'],
4673
+ 'author': ['author', 'creator', 'user', 'owner', 'by'],
4674
+ 'date': ['date', 'created', 'createdAt', 'timestamp', 'published'],
4675
+ 'url': ['url', 'link', 'href', 'source'],
4676
+ 'image': ['image', 'thumbnail', 'photo', 'picture'],
4677
+ 'tags': ['tags', 'categories', 'keywords', 'topics']
4678
+ };
4679
+ // Check for matches
4680
+ for (const [standardField, possibleMatches] of Object.entries(standardMappings)) {
4681
+ // Exact match
4682
+ if (possibleMatches.includes(fieldName)) {
4683
+ return standardField;
4684
+ }
4685
+ // Path match (e.g., "user.name" matches "name")
4686
+ const parts = fieldName.split('.');
4687
+ const lastPart = parts[parts.length - 1];
4688
+ if (possibleMatches.includes(lastPart)) {
4689
+ return standardField;
4690
+ }
4691
+ // Array match (e.g., "items[0].name" matches "name")
4692
+ if (fieldName.includes('[')) {
4693
+ for (const part of parts) {
4694
+ const cleanPart = part.split('[')[0];
4695
+ if (possibleMatches.includes(cleanPart)) {
4696
+ return standardField;
4697
+ }
4698
+ }
4699
+ }
4700
+ }
4701
+ return null;
4702
+ }
4703
+
4435
4704
  /**
4436
4705
  * HNSW (Hierarchical Navigable Small World) Index implementation
4437
4706
  * Based on the paper: "Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs"
@@ -5708,6 +5977,107 @@ class BaseStorageAdapter {
5708
5977
  // Call the protected flushStatistics method to immediately write to storage
5709
5978
  await this.flushStatistics();
5710
5979
  }
5980
+ /**
5981
+ * Track field names from a JSON document
5982
+ * @param jsonDocument The JSON document to extract field names from
5983
+ * @param service The service that inserted the data
5984
+ */
5985
+ async trackFieldNames(jsonDocument, service) {
5986
+ // Skip if not a JSON object
5987
+ if (typeof jsonDocument !== 'object' || jsonDocument === null || Array.isArray(jsonDocument)) {
5988
+ return;
5989
+ }
5990
+ // Get current statistics from cache or storage
5991
+ let statistics = this.statisticsCache;
5992
+ if (!statistics) {
5993
+ statistics = await this.getStatisticsData();
5994
+ if (!statistics) {
5995
+ statistics = this.createDefaultStatistics();
5996
+ }
5997
+ // Update the cache
5998
+ this.statisticsCache = {
5999
+ ...statistics,
6000
+ nounCount: { ...statistics.nounCount },
6001
+ verbCount: { ...statistics.verbCount },
6002
+ metadataCount: { ...statistics.metadataCount },
6003
+ fieldNames: { ...statistics.fieldNames },
6004
+ standardFieldMappings: { ...statistics.standardFieldMappings }
6005
+ };
6006
+ }
6007
+ // Ensure fieldNames exists
6008
+ if (!this.statisticsCache.fieldNames) {
6009
+ this.statisticsCache.fieldNames = {};
6010
+ }
6011
+ // Ensure standardFieldMappings exists
6012
+ if (!this.statisticsCache.standardFieldMappings) {
6013
+ this.statisticsCache.standardFieldMappings = {};
6014
+ }
6015
+ // Extract field names from the JSON document
6016
+ const fieldNames = extractFieldNamesFromJson(jsonDocument);
6017
+ // Initialize service entry if it doesn't exist
6018
+ if (!this.statisticsCache.fieldNames[service]) {
6019
+ this.statisticsCache.fieldNames[service] = [];
6020
+ }
6021
+ // Add new field names to the service's list
6022
+ for (const fieldName of fieldNames) {
6023
+ if (!this.statisticsCache.fieldNames[service].includes(fieldName)) {
6024
+ this.statisticsCache.fieldNames[service].push(fieldName);
6025
+ }
6026
+ // Map to standard field if possible
6027
+ const standardField = mapToStandardField(fieldName);
6028
+ if (standardField) {
6029
+ // Initialize standard field entry if it doesn't exist
6030
+ if (!this.statisticsCache.standardFieldMappings[standardField]) {
6031
+ this.statisticsCache.standardFieldMappings[standardField] = {};
6032
+ }
6033
+ // Initialize service entry if it doesn't exist
6034
+ if (!this.statisticsCache.standardFieldMappings[standardField][service]) {
6035
+ this.statisticsCache.standardFieldMappings[standardField][service] = [];
6036
+ }
6037
+ // Add field name to standard field mapping if not already there
6038
+ if (!this.statisticsCache.standardFieldMappings[standardField][service].includes(fieldName)) {
6039
+ this.statisticsCache.standardFieldMappings[standardField][service].push(fieldName);
6040
+ }
6041
+ }
6042
+ }
6043
+ // Update timestamp
6044
+ this.statisticsCache.lastUpdated = new Date().toISOString();
6045
+ // Schedule a batch update
6046
+ this.statisticsModified = true;
6047
+ this.scheduleBatchUpdate();
6048
+ }
6049
+ /**
6050
+ * Get available field names by service
6051
+ * @returns Record of field names by service
6052
+ */
6053
+ async getAvailableFieldNames() {
6054
+ // Get current statistics from cache or storage
6055
+ let statistics = this.statisticsCache;
6056
+ if (!statistics) {
6057
+ statistics = await this.getStatisticsData();
6058
+ if (!statistics) {
6059
+ return {};
6060
+ }
6061
+ }
6062
+ // Return field names by service
6063
+ return statistics.fieldNames || {};
6064
+ }
6065
+ /**
6066
+ * Get standard field mappings
6067
+ * @returns Record of standard field mappings
6068
+ */
6069
+ async getStandardFieldMappings() {
6070
+ // Get current statistics from cache or storage
6071
+ let statistics = this.statisticsCache;
6072
+ if (!statistics) {
6073
+ statistics = await this.getStatisticsData();
6074
+ if (!statistics) {
6075
+ return {};
6076
+ }
6077
+ }
6078
+ // Return standard field mappings
6079
+ return statistics.standardFieldMappings || {};
6080
+ }
5711
6081
  /**
5712
6082
  * Create default statistics data
5713
6083
  * @returns Default statistics data
@@ -5718,6 +6088,8 @@ class BaseStorageAdapter {
5718
6088
  verbCount: {},
5719
6089
  metadataCount: {},
5720
6090
  hnswIndexSize: 0,
6091
+ fieldNames: {},
6092
+ standardFieldMappings: {},
5721
6093
  lastUpdated: new Date().toISOString()
5722
6094
  };
5723
6095
  }
@@ -8055,6 +8427,7 @@ var StorageType;
8055
8427
  StorageType[StorageType["OPFS"] = 1] = "OPFS";
8056
8428
  StorageType[StorageType["FILESYSTEM"] = 2] = "FILESYSTEM";
8057
8429
  StorageType[StorageType["S3"] = 3] = "S3";
8430
+ StorageType[StorageType["REMOTE_API"] = 4] = "REMOTE_API";
8058
8431
  })(StorageType || (StorageType = {}));
8059
8432
  /**
8060
8433
  * Multi-level cache manager for efficient data access
@@ -8078,6 +8451,8 @@ class CacheManager {
8078
8451
  this.lastAutoTuneTime = 0;
8079
8452
  this.autoTuneInterval = 5 * 60 * 1000; // 5 minutes
8080
8453
  this.storageStatistics = null;
8454
+ // Store options for later reference
8455
+ this.options = options;
8081
8456
  // Detect environment
8082
8457
  this.environment = this.detectEnvironment();
8083
8458
  // Set storage types based on environment
@@ -8127,13 +8502,26 @@ class CacheManager {
8127
8502
  }
8128
8503
  }
8129
8504
  /**
8130
- * Detect the optimal cache size based on available memory
8505
+ * Detect the optimal cache size based on available memory and operating mode
8506
+ *
8507
+ * Enhanced to better handle large datasets in S3 or other storage:
8508
+ * - Increases cache size for read-only mode
8509
+ * - Adjusts based on total dataset size when available
8510
+ * - Provides more aggressive caching for large datasets
8511
+ * - Optimizes memory usage based on environment
8131
8512
  */
8132
8513
  detectOptimalCacheSize() {
8133
8514
  try {
8134
8515
  // Default to a conservative value
8135
8516
  const defaultSize = 1000;
8136
- // In Node.js, use available system memory
8517
+ // Get the total dataset size if available
8518
+ const totalItems = this.storageStatistics ?
8519
+ (this.storageStatistics.totalNodes || 0) + (this.storageStatistics.totalEdges || 0) : 0;
8520
+ // Determine if we're dealing with a large dataset (>100K items)
8521
+ const isLargeDataset = totalItems > 100000;
8522
+ // Check if we're in read-only mode (from parent BrainyData instance)
8523
+ const isReadOnly = this.options?.readOnly || false;
8524
+ // In Node.js, use available system memory with enhanced allocation
8137
8525
  if (this.environment === Environment$1.NODE) {
8138
8526
  try {
8139
8527
  // Use dynamic import to avoid ESLint warning
@@ -8142,12 +8530,36 @@ class CacheManager {
8142
8530
  return require('os');
8143
8531
  };
8144
8532
  const os = getOS();
8533
+ const totalMemory = os.totalmem();
8145
8534
  const freeMemory = os.freemem();
8146
8535
  // Estimate average entry size (in bytes)
8147
8536
  // This is a conservative estimate for complex objects with vectors
8148
8537
  const ESTIMATED_BYTES_PER_ENTRY = 1024; // 1KB per entry
8149
- // Use 10% of free memory, with a minimum of 1000 entries
8150
- const optimalSize = Math.max(Math.floor(freeMemory * 0.1 / ESTIMATED_BYTES_PER_ENTRY), 1000);
8538
+ // Base memory percentage - 10% by default
8539
+ let memoryPercentage = 0.1;
8540
+ // Adjust based on operating mode and dataset size
8541
+ if (isReadOnly) {
8542
+ // In read-only mode, we can use more memory for caching
8543
+ memoryPercentage = 0.25; // 25% of free memory
8544
+ // For large datasets in read-only mode, be even more aggressive
8545
+ if (isLargeDataset) {
8546
+ memoryPercentage = 0.4; // 40% of free memory
8547
+ }
8548
+ }
8549
+ else if (isLargeDataset) {
8550
+ // For large datasets in normal mode, increase slightly
8551
+ memoryPercentage = 0.15; // 15% of free memory
8552
+ }
8553
+ // Calculate optimal size based on adjusted percentage
8554
+ const optimalSize = Math.max(Math.floor(freeMemory * memoryPercentage / ESTIMATED_BYTES_PER_ENTRY), 1000);
8555
+ // If we know the total dataset size, cap at a reasonable percentage
8556
+ if (totalItems > 0) {
8557
+ // In read-only mode, we can cache a larger percentage
8558
+ const maxPercentage = isReadOnly ? 0.5 : 0.3;
8559
+ const maxItems = Math.ceil(totalItems * maxPercentage);
8560
+ // Return the smaller of the two to avoid excessive memory usage
8561
+ return Math.min(optimalSize, maxItems);
8562
+ }
8151
8563
  return optimalSize;
8152
8564
  }
8153
8565
  catch (error) {
@@ -8155,10 +8567,36 @@ class CacheManager {
8155
8567
  return defaultSize;
8156
8568
  }
8157
8569
  }
8158
- // In browser, use navigator.deviceMemory if available
8570
+ // In browser, use navigator.deviceMemory with enhanced allocation
8159
8571
  if (this.environment === Environment$1.BROWSER && navigator.deviceMemory) {
8160
- // deviceMemory is in GB, scale accordingly
8161
- return Math.max(navigator.deviceMemory * 500, 1000);
8572
+ // Base entries per GB
8573
+ let entriesPerGB = 500;
8574
+ // Adjust based on operating mode and dataset size
8575
+ if (isReadOnly) {
8576
+ entriesPerGB = 800; // More aggressive caching in read-only mode
8577
+ if (isLargeDataset) {
8578
+ entriesPerGB = 1000; // Even more aggressive for large datasets
8579
+ }
8580
+ }
8581
+ else if (isLargeDataset) {
8582
+ entriesPerGB = 600; // Slightly more aggressive for large datasets
8583
+ }
8584
+ // Calculate based on device memory
8585
+ const browserCacheSize = Math.max(navigator.deviceMemory * entriesPerGB, 1000);
8586
+ // If we know the total dataset size, cap at a reasonable percentage
8587
+ if (totalItems > 0) {
8588
+ // In read-only mode, we can cache a larger percentage
8589
+ const maxPercentage = isReadOnly ? 0.4 : 0.25;
8590
+ const maxItems = Math.ceil(totalItems * maxPercentage);
8591
+ // Return the smaller of the two to avoid excessive memory usage
8592
+ return Math.min(browserCacheSize, maxItems);
8593
+ }
8594
+ return browserCacheSize;
8595
+ }
8596
+ // For worker environments or when memory detection fails
8597
+ if (this.environment === Environment$1.WORKER) {
8598
+ // Workers typically have limited memory, be conservative
8599
+ return isReadOnly ? 2000 : 1000;
8162
8600
  }
8163
8601
  return defaultSize;
8164
8602
  }
@@ -8220,30 +8658,57 @@ class CacheManager {
8220
8658
  }
8221
8659
  }
8222
8660
  /**
8223
- * Tune hot cache size based on statistics and environment
8661
+ * Tune hot cache size based on statistics, environment, and operating mode
8224
8662
  *
8225
8663
  * The hot cache size is tuned based on:
8226
8664
  * 1. Available memory in the current environment
8227
8665
  * 2. Total number of nodes and edges in the system
8228
8666
  * 3. Cache hit/miss ratio
8667
+ * 4. Operating mode (read-only vs. read-write)
8668
+ * 5. Storage type (S3, filesystem, memory)
8229
8669
  *
8230
- * Algorithm:
8231
- * - Start with a size based on available memory
8232
- * - If storage statistics are available, consider caching a percentage of total items
8233
- * - If hit ratio is low, increase the cache size to improve performance
8234
- * - Ensure a reasonable minimum size to maintain basic functionality
8670
+ * Enhanced algorithm:
8671
+ * - Start with a size based on available memory and operating mode
8672
+ * - For large datasets in S3 or other remote storage, use more aggressive caching
8673
+ * - Adjust based on access patterns (read-heavy vs. write-heavy)
8674
+ * - For read-only mode, prioritize cache size over eviction speed
8675
+ * - Dynamically adjust based on hit/miss ratio and query patterns
8235
8676
  */
8236
8677
  tuneHotCacheSize() {
8237
8678
  // Start with the base size from environment detection
8238
8679
  let optimalSize = this.detectOptimalCacheSize();
8680
+ // Check if we're in read-only mode
8681
+ const isReadOnly = this.options?.readOnly || false;
8682
+ // Check if we're using S3 or other remote storage
8683
+ const isRemoteStorage = this.coldStorageType === StorageType.S3 ||
8684
+ this.coldStorageType === StorageType.REMOTE_API;
8239
8685
  // If we have storage statistics, adjust based on total nodes/edges
8240
8686
  if (this.storageStatistics) {
8241
8687
  const totalItems = (this.storageStatistics.totalNodes || 0) +
8242
8688
  (this.storageStatistics.totalEdges || 0);
8243
8689
  // If total items is significant, adjust cache size
8244
8690
  if (totalItems > 0) {
8245
- // Use a percentage of total items, with a cap based on memory
8246
- const percentageToCache = 0.2; // Cache 20% of items by default
8691
+ // Base percentage to cache - adjusted based on mode and storage
8692
+ let percentageToCache = 0.2; // Cache 20% of items by default
8693
+ // For read-only mode, increase cache percentage
8694
+ if (isReadOnly) {
8695
+ percentageToCache = 0.3; // 30% for read-only mode
8696
+ // For remote storage in read-only mode, be even more aggressive
8697
+ if (isRemoteStorage) {
8698
+ percentageToCache = 0.4; // 40% for remote storage in read-only mode
8699
+ }
8700
+ }
8701
+ // For remote storage in normal mode, increase slightly
8702
+ else if (isRemoteStorage) {
8703
+ percentageToCache = 0.25; // 25% for remote storage
8704
+ }
8705
+ // For large datasets, cap the percentage to avoid excessive memory usage
8706
+ if (totalItems > 1000000) { // Over 1 million items
8707
+ percentageToCache = Math.min(percentageToCache, 0.15);
8708
+ }
8709
+ else if (totalItems > 100000) { // Over 100K items
8710
+ percentageToCache = Math.min(percentageToCache, 0.25);
8711
+ }
8247
8712
  const statisticsBasedSize = Math.ceil(totalItems * percentageToCache);
8248
8713
  // Use the smaller of the two to avoid memory issues
8249
8714
  optimalSize = Math.min(optimalSize, statisticsBasedSize);
@@ -8253,16 +8718,57 @@ class CacheManager {
8253
8718
  const totalAccesses = this.stats.hits + this.stats.misses;
8254
8719
  if (totalAccesses > 100) {
8255
8720
  const hitRatio = this.stats.hits / totalAccesses;
8256
- // If hit ratio is high, we might have a good cache size already
8721
+ // Base adjustment factor
8722
+ let hitRatioFactor = 1.0;
8257
8723
  // If hit ratio is low, we might need a larger cache
8258
8724
  if (hitRatio < 0.5) {
8259
- // Increase cache size by up to 50% if hit ratio is low
8260
- const hitRatioFactor = 1 + (0.5 - hitRatio);
8725
+ // Calculate adjustment factor based on hit ratio
8726
+ const baseAdjustment = 0.5 - hitRatio;
8727
+ // For read-only mode or remote storage, be more aggressive
8728
+ if (isReadOnly || isRemoteStorage) {
8729
+ hitRatioFactor = 1 + (baseAdjustment * 1.5); // Up to 75% increase
8730
+ }
8731
+ else {
8732
+ hitRatioFactor = 1 + baseAdjustment; // Up to 50% increase
8733
+ }
8734
+ optimalSize = Math.ceil(optimalSize * hitRatioFactor);
8735
+ }
8736
+ // If hit ratio is very high, we might be able to reduce cache size slightly
8737
+ else if (hitRatio > 0.9 && !isReadOnly && !isRemoteStorage) {
8738
+ // Only reduce cache size in normal mode with local storage
8739
+ // and only if hit ratio is very high
8740
+ hitRatioFactor = 0.9; // 10% reduction
8261
8741
  optimalSize = Math.ceil(optimalSize * hitRatioFactor);
8262
8742
  }
8263
8743
  }
8264
- // Ensure we have a reasonable minimum size
8265
- optimalSize = Math.max(optimalSize, 1000);
8744
+ // Check for operation patterns if available
8745
+ if (this.storageStatistics?.operations) {
8746
+ const ops = this.storageStatistics.operations;
8747
+ const totalOps = ops.total || 1;
8748
+ // Calculate read/write ratio
8749
+ const readOps = (ops.search || 0) + (ops.get || 0);
8750
+ (ops.add || 0) + (ops.update || 0) + (ops.delete || 0);
8751
+ if (totalOps > 100) {
8752
+ const readRatio = readOps / totalOps;
8753
+ // For read-heavy workloads, increase cache size
8754
+ if (readRatio > 0.8) {
8755
+ // More aggressive for remote storage
8756
+ const readAdjustment = isRemoteStorage ? 1.3 : 1.2;
8757
+ optimalSize = Math.ceil(optimalSize * readAdjustment);
8758
+ }
8759
+ }
8760
+ }
8761
+ // Ensure we have a reasonable minimum size based on environment and mode
8762
+ let minSize = 1000; // Default minimum
8763
+ // For read-only mode, use a higher minimum
8764
+ if (isReadOnly) {
8765
+ minSize = 2000;
8766
+ }
8767
+ // For remote storage, use an even higher minimum
8768
+ if (isRemoteStorage) {
8769
+ minSize = isReadOnly ? 3000 : 2000;
8770
+ }
8771
+ optimalSize = Math.max(optimalSize, minSize);
8266
8772
  // Update the hot cache max size
8267
8773
  this.hotCacheMaxSize = optimalSize;
8268
8774
  this.stats.maxSize = optimalSize;
@@ -8363,7 +8869,7 @@ class CacheManager {
8363
8869
  this.warmCacheTTL = ttl;
8364
8870
  }
8365
8871
  /**
8366
- * Tune batch size based on statistics and environment
8872
+ * Tune batch size based on environment, statistics, and operating mode
8367
8873
  *
8368
8874
  * The batch size determines how many items are processed in a single batch
8369
8875
  * for operations like prefetching. It is tuned based on:
@@ -8371,42 +8877,93 @@ class CacheManager {
8371
8877
  * 2. Available memory
8372
8878
  * 3. Operation patterns
8373
8879
  * 4. Cache hit/miss ratio
8880
+ * 5. Operating mode (read-only vs. read-write)
8881
+ * 6. Storage type (S3, filesystem, memory)
8882
+ * 7. Dataset size
8374
8883
  *
8375
- * Algorithm:
8884
+ * Enhanced algorithm:
8376
8885
  * - Start with a default based on the environment
8377
- * - Adjust based on available memory in browsers
8378
- * - For bulk-heavy workloads, use a larger batch size
8379
- * - For high hit ratios, use smaller batches (items likely in cache)
8380
- * - For low hit ratios, use larger batches (need to fetch more items)
8886
+ * - For large datasets in S3 or other remote storage, use larger batches
8887
+ * - For read-only mode, use larger batches to improve throughput
8888
+ * - Dynamically adjust based on network latency and throughput
8889
+ * - Balance between memory usage and performance
8381
8890
  */
8382
8891
  tuneBatchSize() {
8383
8892
  // Default batch size
8384
8893
  let batchSize = 10;
8385
- // Adjust based on environment
8894
+ // Check if we're in read-only mode
8895
+ const isReadOnly = this.options?.readOnly || false;
8896
+ // Check if we're using S3 or other remote storage
8897
+ const isRemoteStorage = this.coldStorageType === StorageType.S3 ||
8898
+ this.coldStorageType === StorageType.REMOTE_API;
8899
+ // Get the total dataset size if available
8900
+ const totalItems = this.storageStatistics ?
8901
+ (this.storageStatistics.totalNodes || 0) + (this.storageStatistics.totalEdges || 0) : 0;
8902
+ // Determine if we're dealing with a large dataset
8903
+ const isLargeDataset = totalItems > 100000;
8904
+ const isVeryLargeDataset = totalItems > 1000000;
8905
+ // Base batch size adjustment based on environment
8386
8906
  if (this.environment === Environment$1.NODE) {
8387
8907
  // Node.js can handle larger batches
8388
- batchSize = 20;
8908
+ batchSize = isReadOnly ? 30 : 20;
8909
+ // For remote storage, increase batch size
8910
+ if (isRemoteStorage) {
8911
+ batchSize = isReadOnly ? 50 : 30;
8912
+ }
8913
+ // For large datasets, adjust batch size
8914
+ if (isLargeDataset) {
8915
+ batchSize = Math.min(100, batchSize * 1.5);
8916
+ }
8917
+ // For very large datasets, adjust even more
8918
+ if (isVeryLargeDataset) {
8919
+ batchSize = Math.min(200, batchSize * 2);
8920
+ }
8389
8921
  }
8390
8922
  else if (this.environment === Environment$1.BROWSER) {
8391
8923
  // Browsers might need smaller batches
8392
- batchSize = 10;
8924
+ batchSize = isReadOnly ? 15 : 10;
8393
8925
  // If we have memory information, adjust accordingly
8394
8926
  if (navigator.deviceMemory) {
8395
8927
  // Scale batch size with available memory
8396
- batchSize = Math.max(5, Math.min(20, Math.floor(navigator.deviceMemory * 2)));
8928
+ const memoryFactor = isReadOnly ? 3 : 2;
8929
+ batchSize = Math.max(5, Math.min(30, Math.floor(navigator.deviceMemory * memoryFactor)));
8930
+ // For large datasets, adjust based on memory
8931
+ if (isLargeDataset && navigator.deviceMemory > 4) {
8932
+ batchSize = Math.min(50, batchSize * 1.5);
8933
+ }
8397
8934
  }
8398
8935
  }
8936
+ else if (this.environment === Environment$1.WORKER) {
8937
+ // Workers can handle moderate batch sizes
8938
+ batchSize = isReadOnly ? 20 : 15;
8939
+ }
8399
8940
  // If we have storage statistics with operation counts, adjust based on operation patterns
8400
8941
  if (this.storageStatistics && this.storageStatistics.operations) {
8401
8942
  const ops = this.storageStatistics.operations;
8402
8943
  const totalOps = ops.total || 1;
8403
- const bulkOps = (ops.search || 0);
8944
+ const searchOps = (ops.search || 0);
8945
+ const getOps = (ops.get || 0);
8404
8946
  if (totalOps > 100) {
8405
- const bulkRatio = bulkOps / totalOps;
8406
- // For bulk-heavy workloads, use larger batch size
8407
- if (bulkRatio > 0.7) {
8408
- // Bulk-heavy, increase batch size (up to 2x)
8409
- batchSize = Math.min(50, Math.ceil(batchSize * 1.5));
8947
+ // Calculate search and get ratios
8948
+ const searchRatio = searchOps / totalOps;
8949
+ const getRatio = getOps / totalOps;
8950
+ // For search-heavy workloads, use larger batch size
8951
+ if (searchRatio > 0.6) {
8952
+ // Search-heavy, increase batch size
8953
+ const searchFactor = isRemoteStorage ? 1.8 : 1.5;
8954
+ batchSize = Math.min(isRemoteStorage ? 200 : 100, Math.ceil(batchSize * searchFactor));
8955
+ }
8956
+ // For get-heavy workloads, adjust batch size
8957
+ if (getRatio > 0.6) {
8958
+ // Get-heavy, adjust batch size based on storage type
8959
+ if (isRemoteStorage) {
8960
+ // For remote storage, larger batches reduce network overhead
8961
+ batchSize = Math.min(150, Math.ceil(batchSize * 1.5));
8962
+ }
8963
+ else {
8964
+ // For local storage, smaller batches might be more efficient
8965
+ batchSize = Math.max(10, Math.ceil(batchSize * 0.9));
8966
+ }
8410
8967
  }
8411
8968
  }
8412
8969
  }
@@ -8414,17 +8971,46 @@ class CacheManager {
8414
8971
  const totalAccesses = this.stats.hits + this.stats.misses;
8415
8972
  if (totalAccesses > 100) {
8416
8973
  const hitRatio = this.stats.hits / totalAccesses;
8974
+ // Base adjustment factors
8975
+ let increaseFactorForLowHitRatio = isRemoteStorage ? 1.5 : 1.2;
8976
+ let decreaseFactorForHighHitRatio = 0.8;
8977
+ // In read-only mode, be more aggressive with batch size adjustments
8978
+ if (isReadOnly) {
8979
+ increaseFactorForLowHitRatio = isRemoteStorage ? 2.0 : 1.5;
8980
+ decreaseFactorForHighHitRatio = 0.9; // Less reduction in read-only mode
8981
+ }
8417
8982
  // If hit ratio is high, we can use smaller batches
8418
- // If hit ratio is low, we might need larger batches
8419
- if (hitRatio > 0.8) {
8983
+ if (hitRatio > 0.8 && !isVeryLargeDataset) {
8420
8984
  // High hit ratio, decrease batch size slightly
8421
- batchSize = Math.max(5, Math.floor(batchSize * 0.8));
8985
+ // But don't decrease too much for large datasets or remote storage
8986
+ if (!(isLargeDataset && isRemoteStorage)) {
8987
+ batchSize = Math.max(isReadOnly ? 10 : 5, Math.floor(batchSize * decreaseFactorForHighHitRatio));
8988
+ }
8422
8989
  }
8990
+ // If hit ratio is low, we need larger batches
8423
8991
  else if (hitRatio < 0.5) {
8424
8992
  // Low hit ratio, increase batch size
8425
- batchSize = Math.min(50, Math.ceil(batchSize * 1.2));
8426
- }
8427
- }
8993
+ const maxBatchSize = isRemoteStorage ?
8994
+ (isVeryLargeDataset ? 300 : 200) :
8995
+ (isVeryLargeDataset ? 150 : 100);
8996
+ batchSize = Math.min(maxBatchSize, Math.ceil(batchSize * increaseFactorForLowHitRatio));
8997
+ }
8998
+ }
8999
+ // Set minimum batch sizes based on storage type and mode
9000
+ let minBatchSize = 5;
9001
+ if (isRemoteStorage) {
9002
+ minBatchSize = isReadOnly ? 20 : 10;
9003
+ }
9004
+ else if (isReadOnly) {
9005
+ minBatchSize = 10;
9006
+ }
9007
+ // Ensure batch size is within reasonable limits
9008
+ batchSize = Math.max(minBatchSize, batchSize);
9009
+ // Cap maximum batch size based on environment and storage
9010
+ const maxBatchSize = isRemoteStorage ?
9011
+ (this.environment === Environment$1.NODE ? 300 : 150) :
9012
+ (this.environment === Environment$1.NODE ? 150 : 75);
9013
+ batchSize = Math.min(maxBatchSize, batchSize);
8428
9014
  // Update the batch size
8429
9015
  this.batchSize = batchSize;
8430
9016
  }
@@ -11493,7 +12079,8 @@ async function createStorage(options = {}) {
11493
12079
  secretAccessKey: options.s3Storage.secretAccessKey,
11494
12080
  sessionToken: options.s3Storage.sessionToken,
11495
12081
  serviceType: 's3',
11496
- operationConfig: options.operationConfig
12082
+ operationConfig: options.operationConfig,
12083
+ cacheConfig: options.cacheConfig
11497
12084
  });
11498
12085
  }
11499
12086
  else {
@@ -11508,7 +12095,8 @@ async function createStorage(options = {}) {
11508
12095
  accountId: options.r2Storage.accountId,
11509
12096
  accessKeyId: options.r2Storage.accessKeyId,
11510
12097
  secretAccessKey: options.r2Storage.secretAccessKey,
11511
- serviceType: 'r2'
12098
+ serviceType: 'r2',
12099
+ cacheConfig: options.cacheConfig
11512
12100
  });
11513
12101
  }
11514
12102
  else {
@@ -11524,7 +12112,8 @@ async function createStorage(options = {}) {
11524
12112
  endpoint: options.gcsStorage.endpoint || 'https://storage.googleapis.com',
11525
12113
  accessKeyId: options.gcsStorage.accessKeyId,
11526
12114
  secretAccessKey: options.gcsStorage.secretAccessKey,
11527
- serviceType: 'gcs'
12115
+ serviceType: 'gcs',
12116
+ cacheConfig: options.cacheConfig
11528
12117
  });
11529
12118
  }
11530
12119
  else {
@@ -11545,7 +12134,8 @@ async function createStorage(options = {}) {
11545
12134
  endpoint: options.customS3Storage.endpoint,
11546
12135
  accessKeyId: options.customS3Storage.accessKeyId,
11547
12136
  secretAccessKey: options.customS3Storage.secretAccessKey,
11548
- serviceType: options.customS3Storage.serviceType || 'custom'
12137
+ serviceType: options.customS3Storage.serviceType || 'custom',
12138
+ cacheConfig: options.cacheConfig
11549
12139
  });
11550
12140
  }
11551
12141
  // If R2 storage is specified, use it
@@ -11556,7 +12146,8 @@ async function createStorage(options = {}) {
11556
12146
  accountId: options.r2Storage.accountId,
11557
12147
  accessKeyId: options.r2Storage.accessKeyId,
11558
12148
  secretAccessKey: options.r2Storage.secretAccessKey,
11559
- serviceType: 'r2'
12149
+ serviceType: 'r2',
12150
+ cacheConfig: options.cacheConfig
11560
12151
  });
11561
12152
  }
11562
12153
  // If S3 storage is specified, use it
@@ -11568,7 +12159,8 @@ async function createStorage(options = {}) {
11568
12159
  accessKeyId: options.s3Storage.accessKeyId,
11569
12160
  secretAccessKey: options.s3Storage.secretAccessKey,
11570
12161
  sessionToken: options.s3Storage.sessionToken,
11571
- serviceType: 's3'
12162
+ serviceType: 's3',
12163
+ cacheConfig: options.cacheConfig
11572
12164
  });
11573
12165
  }
11574
12166
  // If GCS storage is specified, use it
@@ -11580,7 +12172,8 @@ async function createStorage(options = {}) {
11580
12172
  endpoint: options.gcsStorage.endpoint || 'https://storage.googleapis.com',
11581
12173
  accessKeyId: options.gcsStorage.accessKeyId,
11582
12174
  secretAccessKey: options.gcsStorage.secretAccessKey,
11583
- serviceType: 'gcs'
12175
+ serviceType: 'gcs',
12176
+ cacheConfig: options.cacheConfig
11584
12177
  });
11585
12178
  }
11586
12179
  // Auto-detect the best storage adapter based on the environment
@@ -14138,6 +14731,27 @@ class BrainyData {
14138
14731
  ...config.realtimeUpdates
14139
14732
  };
14140
14733
  }
14734
+ // Initialize cache configuration with intelligent defaults
14735
+ // These defaults are automatically tuned based on environment and dataset size
14736
+ this.cacheConfig = {
14737
+ // Enable auto-tuning by default for optimal performance
14738
+ autoTune: true,
14739
+ // Set auto-tune interval to 1 minute for faster initial optimization
14740
+ // This is especially important for large datasets
14741
+ autoTuneInterval: 60000, // 1 minute
14742
+ // Read-only mode specific optimizations
14743
+ readOnlyMode: {
14744
+ // Use aggressive prefetching in read-only mode for better performance
14745
+ prefetchStrategy: 'aggressive'
14746
+ }
14747
+ };
14748
+ // Override defaults with user-provided configuration if available
14749
+ if (config.cache) {
14750
+ this.cacheConfig = {
14751
+ ...this.cacheConfig,
14752
+ ...config.cache
14753
+ };
14754
+ }
14141
14755
  }
14142
14756
  /**
14143
14757
  * Check if the database is in read-only mode and throw an error if it is
@@ -14430,6 +15044,18 @@ class BrainyData {
14430
15044
  return 'default';
14431
15045
  }
14432
15046
  }
15047
+ /**
15048
+ * Get the service name from options or fallback to current augmentation
15049
+ * This provides a consistent way to handle service names across all methods
15050
+ * @param options Options object that may contain a service property
15051
+ * @returns The service name to use for operations
15052
+ */
15053
+ getServiceName(options) {
15054
+ if (options?.service) {
15055
+ return options.service;
15056
+ }
15057
+ return this.getCurrentAugmentation();
15058
+ }
14433
15059
  /**
14434
15060
  * Initialize the database
14435
15061
  * Loads existing data from storage if available
@@ -14482,6 +15108,14 @@ class BrainyData {
14482
15108
  ...this.storageConfig,
14483
15109
  requestPersistentStorage: this.requestPersistentStorage
14484
15110
  };
15111
+ // Add cache configuration if provided
15112
+ if (this.cacheConfig) {
15113
+ storageOptions.cacheConfig = {
15114
+ ...this.cacheConfig,
15115
+ // Pass read-only flag to optimize cache behavior
15116
+ readOnly: this.readOnly
15117
+ };
15118
+ }
14485
15119
  // Ensure s3Storage has all required fields if it's provided
14486
15120
  if (storageOptions.s3Storage) {
14487
15121
  // Only include s3Storage if all required fields are present
@@ -14619,7 +15253,33 @@ class BrainyData {
14619
15253
  else {
14620
15254
  // Input needs to be vectorized
14621
15255
  try {
14622
- vector = await this.embeddingFunction(vectorOrData);
15256
+ // Check if input is a JSON object and process it specially
15257
+ if (typeof vectorOrData === 'object' &&
15258
+ vectorOrData !== null &&
15259
+ !Array.isArray(vectorOrData)) {
15260
+ // Process JSON object for better vectorization
15261
+ const preparedText = prepareJsonForVectorization(vectorOrData, {
15262
+ // Prioritize common name/title fields if they exist
15263
+ priorityFields: [
15264
+ 'name',
15265
+ 'title',
15266
+ 'company',
15267
+ 'organization',
15268
+ 'description',
15269
+ 'summary'
15270
+ ]
15271
+ });
15272
+ vector = await this.embeddingFunction(preparedText);
15273
+ // Track field names for this JSON document
15274
+ const service = this.getServiceName(options);
15275
+ if (this.storage) {
15276
+ await this.storage.trackFieldNames(vectorOrData, service);
15277
+ }
15278
+ }
15279
+ else {
15280
+ // Use standard embedding for non-JSON data
15281
+ vector = await this.embeddingFunction(vectorOrData);
15282
+ }
14623
15283
  }
14624
15284
  catch (embedError) {
14625
15285
  throw new Error(`Failed to vectorize data: ${embedError}`);
@@ -14648,7 +15308,7 @@ class BrainyData {
14648
15308
  // Save noun to storage
14649
15309
  await this.storage.saveNoun(noun);
14650
15310
  // Track noun statistics
14651
- const service = options.service || this.getCurrentAugmentation();
15311
+ const service = this.getServiceName(options);
14652
15312
  await this.storage.incrementStatistic('noun', service);
14653
15313
  // Save metadata if provided and not empty
14654
15314
  if (metadata !== undefined) {
@@ -14701,7 +15361,7 @@ class BrainyData {
14701
15361
  }
14702
15362
  await this.storage.saveMetadata(id, metadataToSave);
14703
15363
  // Track metadata statistics
14704
- const metadataService = options.service || this.getCurrentAugmentation();
15364
+ const metadataService = this.getServiceName(options);
14705
15365
  await this.storage.incrementStatistic('metadata', metadataService);
14706
15366
  }
14707
15367
  }
@@ -15122,12 +15782,43 @@ class BrainyData {
15122
15782
  }
15123
15783
  // Check if database is in write-only mode
15124
15784
  this.checkWriteOnly();
15125
- // If input is a string and not a vector, automatically vectorize it
15785
+ // Process the query input for vectorization
15126
15786
  let queryToUse = queryVectorOrData;
15787
+ // Handle string queries
15127
15788
  if (typeof queryVectorOrData === 'string' && !options.forceEmbed) {
15128
15789
  queryToUse = await this.embed(queryVectorOrData);
15129
15790
  options.forceEmbed = false; // Already embedded, don't force again
15130
15791
  }
15792
+ // Handle JSON object queries with special processing
15793
+ else if (typeof queryVectorOrData === 'object' &&
15794
+ queryVectorOrData !== null &&
15795
+ !Array.isArray(queryVectorOrData) &&
15796
+ !options.forceEmbed) {
15797
+ // If searching within a specific field
15798
+ if (options.searchField) {
15799
+ // Extract text from the specific field
15800
+ const fieldText = extractFieldFromJson(queryVectorOrData, options.searchField);
15801
+ if (fieldText) {
15802
+ queryToUse = await this.embeddingFunction(fieldText);
15803
+ options.forceEmbed = false; // Already embedded, don't force again
15804
+ }
15805
+ }
15806
+ // Otherwise process the entire object with priority fields
15807
+ else {
15808
+ const preparedText = prepareJsonForVectorization(queryVectorOrData, {
15809
+ priorityFields: options.priorityFields || [
15810
+ 'name',
15811
+ 'title',
15812
+ 'company',
15813
+ 'organization',
15814
+ 'description',
15815
+ 'summary'
15816
+ ]
15817
+ });
15818
+ queryToUse = await this.embeddingFunction(preparedText);
15819
+ options.forceEmbed = false; // Already embedded, don't force again
15820
+ }
15821
+ }
15131
15822
  // If noun types are specified, use searchByNounTypes
15132
15823
  let searchResults;
15133
15824
  if (options.nounTypes && options.nounTypes.length > 0) {
@@ -15424,7 +16115,7 @@ class BrainyData {
15424
16115
  // Remove from storage
15425
16116
  await this.storage.deleteNoun(actualId);
15426
16117
  // Track deletion statistics
15427
- const service = options.service || 'default';
16118
+ const service = this.getServiceName(options);
15428
16119
  await this.storage.decrementStatistic('noun', service);
15429
16120
  // Try to remove metadata (ignore errors)
15430
16121
  try {
@@ -15751,7 +16442,7 @@ class BrainyData {
15751
16442
  // Save verb to storage
15752
16443
  await this.storage.saveVerb(verb);
15753
16444
  // Track verb statistics
15754
- const serviceForStats = options.service || 'default';
16445
+ const serviceForStats = this.getServiceName(options);
15755
16446
  await this.storage.incrementStatistic('verb', serviceForStats);
15756
16447
  // Update HNSW index size (excluding verbs)
15757
16448
  await this.storage.updateHnswIndexSize(await this.getNounCount());
@@ -15899,7 +16590,7 @@ class BrainyData {
15899
16590
  // Remove from storage
15900
16591
  await this.storage.deleteVerb(id);
15901
16592
  // Track deletion statistics
15902
- const service = options.service || 'default';
16593
+ const service = this.getServiceName(options);
15903
16594
  await this.storage.decrementStatistic('verb', service);
15904
16595
  return true;
15905
16596
  }
@@ -16986,6 +17677,82 @@ class BrainyData {
16986
17677
  throw new Error(`Failed to generate random graph: ${error}`);
16987
17678
  }
16988
17679
  }
17680
+ /**
17681
+ * Get available field names by service
17682
+ * This helps users understand what fields are available for searching from different data sources
17683
+ * @returns Record of field names by service
17684
+ */
17685
+ async getAvailableFieldNames() {
17686
+ await this.ensureInitialized();
17687
+ if (!this.storage) {
17688
+ return {};
17689
+ }
17690
+ return this.storage.getAvailableFieldNames();
17691
+ }
17692
+ /**
17693
+ * Get standard field mappings
17694
+ * This helps users understand how fields from different services map to standard field names
17695
+ * @returns Record of standard field mappings
17696
+ */
17697
+ async getStandardFieldMappings() {
17698
+ await this.ensureInitialized();
17699
+ if (!this.storage) {
17700
+ return {};
17701
+ }
17702
+ return this.storage.getStandardFieldMappings();
17703
+ }
17704
+ /**
17705
+ * Search using a standard field name
17706
+ * This allows searching across multiple services using a standardized field name
17707
+ * @param standardField The standard field name to search in
17708
+ * @param searchTerm The term to search for
17709
+ * @param k Number of results to return
17710
+ * @param options Additional search options
17711
+ * @returns Array of search results
17712
+ */
17713
+ async searchByStandardField(standardField, searchTerm, k = 10, options = {}) {
17714
+ await this.ensureInitialized();
17715
+ // Check if database is in write-only mode
17716
+ this.checkWriteOnly();
17717
+ // Get standard field mappings
17718
+ const standardFieldMappings = await this.getStandardFieldMappings();
17719
+ // If the standard field doesn't exist, return empty results
17720
+ if (!standardFieldMappings[standardField]) {
17721
+ return [];
17722
+ }
17723
+ // Filter by services if specified
17724
+ let serviceFieldMappings = standardFieldMappings[standardField];
17725
+ if (options.services && options.services.length > 0) {
17726
+ const filteredMappings = {};
17727
+ for (const service of options.services) {
17728
+ if (serviceFieldMappings[service]) {
17729
+ filteredMappings[service] = serviceFieldMappings[service];
17730
+ }
17731
+ }
17732
+ serviceFieldMappings = filteredMappings;
17733
+ }
17734
+ // If no mappings after filtering, return empty results
17735
+ if (Object.keys(serviceFieldMappings).length === 0) {
17736
+ return [];
17737
+ }
17738
+ // Search in each service's fields and combine results
17739
+ const allResults = [];
17740
+ for (const [service, fieldNames] of Object.entries(serviceFieldMappings)) {
17741
+ for (const fieldName of fieldNames) {
17742
+ // Search using the specific field name for this service
17743
+ const results = await this.search(searchTerm, k, {
17744
+ searchField: fieldName,
17745
+ service,
17746
+ includeVerbs: options.includeVerbs,
17747
+ searchMode: options.searchMode
17748
+ });
17749
+ // Add results to the combined list
17750
+ allResults.push(...results);
17751
+ }
17752
+ }
17753
+ // Sort by score and limit to k results
17754
+ return allResults.sort((a, b) => b.score - a.score).slice(0, k);
17755
+ }
16989
17756
  }
16990
17757
 
16991
17758
  /**