npm - @soulcraft/brainy - Versions diffs - 0.32.0 → 0.33.0 - Mend

@soulcraft/brainy 0.32.0 → 0.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/README.md +495 -400
package/dist/brainyData.d.ts +115 -0
package/dist/coreTypes.d.ts +26 -0
package/dist/storage/adapters/baseStorageAdapter.d.ts +16 -0
package/dist/storage/adapters/baseStorageAdapter.d.ts.map +1 -1
package/dist/storage/cacheManager.d.ts +27 -13
package/dist/storage/cacheManager.d.ts.map +1 -1
package/dist/storage/storageFactory.d.ts +44 -0
package/dist/storage/storageFactory.d.ts.map +1 -1
package/dist/unified.js +823 -56
package/dist/unified.min.js +747 -747
package/dist/utils/fieldNameTracking.d.ts +21 -0
package/dist/utils/fieldNameTracking.d.ts.map +1 -0
package/dist/utils/index.d.ts +2 -0
package/dist/utils/index.d.ts.map +1 -1
package/dist/utils/jsonProcessing.d.ts +43 -0
package/dist/utils/jsonProcessing.d.ts.map +1 -0
package/package.json +1 -1

package/dist/unified.js CHANGED Viewed

@@ -4432,6 +4432,275 @@ async function getStatistics(instance, options = {}) {
     }
 }
+/**
+ * Utility functions for processing JSON documents for vectorization and search
+ */
+/**
+ * Extracts text from a JSON object for vectorization
+ * This function recursively processes the JSON object and extracts text from all fields
+ * It can also prioritize specific fields if provided
+ *
+ * @param jsonObject The JSON object to extract text from
+ * @param options Configuration options for text extraction
+ * @returns A string containing the extracted text
+ */
+function extractTextFromJson(jsonObject, options = {}) {
+    // Set default options
+    const { priorityFields = [], excludeFields = [], includeFieldNames = true, maxDepth = 5, currentDepth = 0, fieldPath = [] } = options;
+    // If input is not an object or array, or we've reached max depth, return as string
+    if (jsonObject === null ||
+        jsonObject === undefined ||
+        typeof jsonObject !== 'object' ||
+        currentDepth >= maxDepth) {
+        return String(jsonObject || '');
+    }
+    const extractedText = [];
+    const priorityText = [];
+    // Process arrays
+    if (Array.isArray(jsonObject)) {
+        for (let i = 0; i < jsonObject.length; i++) {
+            const value = jsonObject[i];
+            const newPath = [...fieldPath, i.toString()];
+            // Recursively extract text from array items
+            const itemText = extractTextFromJson(value, {
+                priorityFields,
+                excludeFields,
+                includeFieldNames,
+                maxDepth,
+                currentDepth: currentDepth + 1,
+                fieldPath: newPath
+            });
+            if (itemText) {
+                extractedText.push(itemText);
+            }
+        }
+    }
+    // Process objects
+    else {
+        for (const [key, value] of Object.entries(jsonObject)) {
+            // Skip excluded fields
+            if (excludeFields.includes(key)) {
+                continue;
+            }
+            const newPath = [...fieldPath, key];
+            const fullPath = newPath.join('.');
+            // Check if this is a priority field
+            const isPriority = priorityFields.some(field => {
+                // Exact match
+                if (field === key)
+                    return true;
+                // Path match
+                if (field === fullPath)
+                    return true;
+                // Wildcard match (e.g., "user.*" matches "user.name", "user.email", etc.)
+                if (field.endsWith('.*') && fullPath.startsWith(field.slice(0, -2)))
+                    return true;
+                return false;
+            });
+            // Get the field value as text
+            let fieldText;
+            if (typeof value === 'object' && value !== null) {
+                // Recursively extract text from nested objects
+                fieldText = extractTextFromJson(value, {
+                    priorityFields,
+                    excludeFields,
+                    includeFieldNames,
+                    maxDepth,
+                    currentDepth: currentDepth + 1,
+                    fieldPath: newPath
+                });
+            }
+            else {
+                fieldText = String(value || '');
+            }
+            // Add field name if requested
+            if (includeFieldNames && fieldText) {
+                fieldText = `${key}: ${fieldText}`;
+            }
+            // Add to appropriate collection
+            if (fieldText) {
+                if (isPriority) {
+                    priorityText.push(fieldText);
+                }
+                else {
+                    extractedText.push(fieldText);
+                }
+            }
+        }
+    }
+    // Combine priority text (repeated for emphasis) and regular text
+    return [...priorityText, ...priorityText, ...extractedText].join(' ');
+}
+/**
+ * Prepares a JSON document for vectorization
+ * This function extracts text from the JSON document and formats it for optimal vectorization
+ *
+ * @param jsonDocument The JSON document to prepare
+ * @param options Configuration options for preparation
+ * @returns A string ready for vectorization
+ */
+function prepareJsonForVectorization(jsonDocument, options = {}) {
+    // If input is a string, try to parse it as JSON
+    let document = jsonDocument;
+    if (typeof jsonDocument === 'string') {
+        try {
+            document = JSON.parse(jsonDocument);
+        }
+        catch (e) {
+            // If parsing fails, treat it as a plain string
+            return jsonDocument;
+        }
+    }
+    // If not an object after parsing, return as is
+    if (typeof document !== 'object' || document === null) {
+        return String(document || '');
+    }
+    // Extract text from the document
+    return extractTextFromJson(document, options);
+}
+/**
+ * Extracts text from a specific field in a JSON document
+ * This is useful for searching within specific fields
+ *
+ * @param jsonDocument The JSON document to extract from
+ * @param fieldPath The path to the field (e.g., "user.name" or "addresses[0].city")
+ * @returns The extracted text or empty string if field not found
+ */
+function extractFieldFromJson(jsonDocument, fieldPath) {
+    // If input is a string, try to parse it as JSON
+    let document = jsonDocument;
+    if (typeof jsonDocument === 'string') {
+        try {
+            document = JSON.parse(jsonDocument);
+        }
+        catch (e) {
+            // If parsing fails, return empty string
+            return '';
+        }
+    }
+    // If not an object after parsing, return empty string
+    if (typeof document !== 'object' || document === null) {
+        return '';
+    }
+    // Parse the field path
+    const parts = fieldPath.split('.');
+    let current = document;
+    // Navigate through the path
+    for (const part of parts) {
+        // Handle array indexing (e.g., "addresses[0]")
+        const match = part.match(/^([^[]+)(?:\[(\d+)\])?$/);
+        if (!match) {
+            return '';
+        }
+        const [, key, indexStr] = match;
+        // Move to the next level
+        current = current[key];
+        // If we have an array index, access that element
+        if (indexStr !== undefined && Array.isArray(current)) {
+            const index = parseInt(indexStr, 10);
+            current = current[index];
+        }
+        // If we've reached a null or undefined value, return empty string
+        if (current === null || current === undefined) {
+            return '';
+        }
+    }
+    // Convert the final value to string
+    return typeof current === 'object'
+        ? JSON.stringify(current)
+        : String(current);
+}
+/**
+ * Utility functions for tracking and managing field names in JSON documents
+ */
+/**
+ * Extracts field names from a JSON document
+ * @param jsonObject The JSON object to extract field names from
+ * @param options Configuration options
+ * @returns An array of field paths (e.g., "user.name", "addresses[0].city")
+ */
+function extractFieldNamesFromJson(jsonObject, options = {}) {
+    const { maxDepth = 5, currentDepth = 0, currentPath = '', fieldNames = new Set() } = options;
+    if (jsonObject === null ||
+        jsonObject === undefined ||
+        typeof jsonObject !== 'object' ||
+        currentDepth >= maxDepth) {
+        return Array.from(fieldNames);
+    }
+    if (Array.isArray(jsonObject)) {
+        // For arrays, we'll just check the first item to avoid explosion of paths
+        if (jsonObject.length > 0) {
+            const arrayPath = currentPath ? `${currentPath}[0]` : '[0]';
+            extractFieldNamesFromJson(jsonObject[0], {
+                maxDepth,
+                currentDepth: currentDepth + 1,
+                currentPath: arrayPath,
+                fieldNames
+            });
+        }
+    }
+    else {
+        // For objects, process each property
+        for (const key of Object.keys(jsonObject)) {
+            const value = jsonObject[key];
+            const fieldPath = currentPath ? `${currentPath}.${key}` : key;
+            // Add this field path
+            fieldNames.add(fieldPath);
+            // Recursively process nested objects
+            if (typeof value === 'object' && value !== null) {
+                extractFieldNamesFromJson(value, {
+                    maxDepth,
+                    currentDepth: currentDepth + 1,
+                    currentPath: fieldPath,
+                    fieldNames
+                });
+            }
+        }
+    }
+    return Array.from(fieldNames);
+}
+/**
+ * Maps field names to standard field names based on common patterns
+ * @param fieldName The field name to map
+ * @returns The standard field name if a match is found, or null if no match
+ */
+function mapToStandardField(fieldName) {
+    // Standard field mappings
+    const standardMappings = {
+        'title': ['title', 'name', 'headline', 'subject'],
+        'description': ['description', 'summary', 'content', 'text', 'body'],
+        'author': ['author', 'creator', 'user', 'owner', 'by'],
+        'date': ['date', 'created', 'createdAt', 'timestamp', 'published'],
+        'url': ['url', 'link', 'href', 'source'],
+        'image': ['image', 'thumbnail', 'photo', 'picture'],
+        'tags': ['tags', 'categories', 'keywords', 'topics']
+    };
+    // Check for matches
+    for (const [standardField, possibleMatches] of Object.entries(standardMappings)) {
+        // Exact match
+        if (possibleMatches.includes(fieldName)) {
+            return standardField;
+        }
+        // Path match (e.g., "user.name" matches "name")
+        const parts = fieldName.split('.');
+        const lastPart = parts[parts.length - 1];
+        if (possibleMatches.includes(lastPart)) {
+            return standardField;
+        }
+        // Array match (e.g., "items[0].name" matches "name")
+        if (fieldName.includes('[')) {
+            for (const part of parts) {
+                const cleanPart = part.split('[')[0];
+                if (possibleMatches.includes(cleanPart)) {
+                    return standardField;
+                }
+            }
+        }
+    }
+    return null;
+}
 /**
  * HNSW (Hierarchical Navigable Small World) Index implementation
  * Based on the paper: "Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs"
@@ -5708,6 +5977,107 @@ class BaseStorageAdapter {
         // Call the protected flushStatistics method to immediately write to storage
         await this.flushStatistics();
     }
+    /**
+     * Track field names from a JSON document
+     * @param jsonDocument The JSON document to extract field names from
+     * @param service The service that inserted the data
+     */
+    async trackFieldNames(jsonDocument, service) {
+        // Skip if not a JSON object
+        if (typeof jsonDocument !== 'object' || jsonDocument === null || Array.isArray(jsonDocument)) {
+            return;
+        }
+        // Get current statistics from cache or storage
+        let statistics = this.statisticsCache;
+        if (!statistics) {
+            statistics = await this.getStatisticsData();
+            if (!statistics) {
+                statistics = this.createDefaultStatistics();
+            }
+            // Update the cache
+            this.statisticsCache = {
+                ...statistics,
+                nounCount: { ...statistics.nounCount },
+                verbCount: { ...statistics.verbCount },
+                metadataCount: { ...statistics.metadataCount },
+                fieldNames: { ...statistics.fieldNames },
+                standardFieldMappings: { ...statistics.standardFieldMappings }
+            };
+        }
+        // Ensure fieldNames exists
+        if (!this.statisticsCache.fieldNames) {
+            this.statisticsCache.fieldNames = {};
+        }
+        // Ensure standardFieldMappings exists
+        if (!this.statisticsCache.standardFieldMappings) {
+            this.statisticsCache.standardFieldMappings = {};
+        }
+        // Extract field names from the JSON document
+        const fieldNames = extractFieldNamesFromJson(jsonDocument);
+        // Initialize service entry if it doesn't exist
+        if (!this.statisticsCache.fieldNames[service]) {
+            this.statisticsCache.fieldNames[service] = [];
+        }
+        // Add new field names to the service's list
+        for (const fieldName of fieldNames) {
+            if (!this.statisticsCache.fieldNames[service].includes(fieldName)) {
+                this.statisticsCache.fieldNames[service].push(fieldName);
+            }
+            // Map to standard field if possible
+            const standardField = mapToStandardField(fieldName);
+            if (standardField) {
+                // Initialize standard field entry if it doesn't exist
+                if (!this.statisticsCache.standardFieldMappings[standardField]) {
+                    this.statisticsCache.standardFieldMappings[standardField] = {};
+                }
+                // Initialize service entry if it doesn't exist
+                if (!this.statisticsCache.standardFieldMappings[standardField][service]) {
+                    this.statisticsCache.standardFieldMappings[standardField][service] = [];
+                }
+                // Add field name to standard field mapping if not already there
+                if (!this.statisticsCache.standardFieldMappings[standardField][service].includes(fieldName)) {
+                    this.statisticsCache.standardFieldMappings[standardField][service].push(fieldName);
+                }
+            }
+        }
+        // Update timestamp
+        this.statisticsCache.lastUpdated = new Date().toISOString();
+        // Schedule a batch update
+        this.statisticsModified = true;
+        this.scheduleBatchUpdate();
+    }
+    /**
+     * Get available field names by service
+     * @returns Record of field names by service
+     */
+    async getAvailableFieldNames() {
+        // Get current statistics from cache or storage
+        let statistics = this.statisticsCache;
+        if (!statistics) {
+            statistics = await this.getStatisticsData();
+            if (!statistics) {
+                return {};
+            }
+        }
+        // Return field names by service
+        return statistics.fieldNames || {};
+    }
+    /**
+     * Get standard field mappings
+     * @returns Record of standard field mappings
+     */
+    async getStandardFieldMappings() {
+        // Get current statistics from cache or storage
+        let statistics = this.statisticsCache;
+        if (!statistics) {
+            statistics = await this.getStatisticsData();
+            if (!statistics) {
+                return {};
+            }
+        }
+        // Return standard field mappings
+        return statistics.standardFieldMappings || {};
+    }
     /**
      * Create default statistics data
      * @returns Default statistics data
@@ -5718,6 +6088,8 @@ class BaseStorageAdapter {
             verbCount: {},
             metadataCount: {},
             hnswIndexSize: 0,
+            fieldNames: {},
+            standardFieldMappings: {},
             lastUpdated: new Date().toISOString()
         };
     }
@@ -8055,6 +8427,7 @@ var StorageType;
     StorageType[StorageType["OPFS"] = 1] = "OPFS";
     StorageType[StorageType["FILESYSTEM"] = 2] = "FILESYSTEM";
     StorageType[StorageType["S3"] = 3] = "S3";
+    StorageType[StorageType["REMOTE_API"] = 4] = "REMOTE_API";
 })(StorageType || (StorageType = {}));
 /**
  * Multi-level cache manager for efficient data access
@@ -8078,6 +8451,8 @@ class CacheManager {
         this.lastAutoTuneTime = 0;
         this.autoTuneInterval = 5 * 60 * 1000; // 5 minutes
         this.storageStatistics = null;
+        // Store options for later reference
+        this.options = options;
         // Detect environment
         this.environment = this.detectEnvironment();
         // Set storage types based on environment
@@ -8127,13 +8502,26 @@ class CacheManager {
         }
     }
     /**
-     * Detect the optimal cache size based on available memory
+     * Detect the optimal cache size based on available memory and operating mode
+     *
+     * Enhanced to better handle large datasets in S3 or other storage:
+     * - Increases cache size for read-only mode
+     * - Adjusts based on total dataset size when available
+     * - Provides more aggressive caching for large datasets
+     * - Optimizes memory usage based on environment
      */
     detectOptimalCacheSize() {
         try {
             // Default to a conservative value
             const defaultSize = 1000;
-            // In Node.js, use available system memory
+            // Get the total dataset size if available
+            const totalItems = this.storageStatistics ?
+                (this.storageStatistics.totalNodes || 0) + (this.storageStatistics.totalEdges || 0) : 0;
+            // Determine if we're dealing with a large dataset (>100K items)
+            const isLargeDataset = totalItems > 100000;
+            // Check if we're in read-only mode (from parent BrainyData instance)
+            const isReadOnly = this.options?.readOnly || false;
+            // In Node.js, use available system memory with enhanced allocation
             if (this.environment === Environment$1.NODE) {
                 try {
                     // Use dynamic import to avoid ESLint warning
@@ -8142,12 +8530,36 @@ class CacheManager {
                         return require('os');
                     };
                     const os = getOS();
+                    const totalMemory = os.totalmem();
                     const freeMemory = os.freemem();
                     // Estimate average entry size (in bytes)
                     // This is a conservative estimate for complex objects with vectors
                     const ESTIMATED_BYTES_PER_ENTRY = 1024; // 1KB per entry
-                    // Use 10% of free memory, with a minimum of 1000 entries
-                    const optimalSize = Math.max(Math.floor(freeMemory * 0.1 / ESTIMATED_BYTES_PER_ENTRY), 1000);
+                    // Base memory percentage - 10% by default
+                    let memoryPercentage = 0.1;
+                    // Adjust based on operating mode and dataset size
+                    if (isReadOnly) {
+                        // In read-only mode, we can use more memory for caching
+                        memoryPercentage = 0.25; // 25% of free memory
+                        // For large datasets in read-only mode, be even more aggressive
+                        if (isLargeDataset) {
+                            memoryPercentage = 0.4; // 40% of free memory
+                        }
+                    }
+                    else if (isLargeDataset) {
+                        // For large datasets in normal mode, increase slightly
+                        memoryPercentage = 0.15; // 15% of free memory
+                    }
+                    // Calculate optimal size based on adjusted percentage
+                    const optimalSize = Math.max(Math.floor(freeMemory * memoryPercentage / ESTIMATED_BYTES_PER_ENTRY), 1000);
+                    // If we know the total dataset size, cap at a reasonable percentage
+                    if (totalItems > 0) {
+                        // In read-only mode, we can cache a larger percentage
+                        const maxPercentage = isReadOnly ? 0.5 : 0.3;
+                        const maxItems = Math.ceil(totalItems * maxPercentage);
+                        // Return the smaller of the two to avoid excessive memory usage
+                        return Math.min(optimalSize, maxItems);
+                    }
                     return optimalSize;
                 }
                 catch (error) {
@@ -8155,10 +8567,36 @@ class CacheManager {
                     return defaultSize;
                 }
             }
-            // In browser, use navigator.deviceMemory if available
+            // In browser, use navigator.deviceMemory with enhanced allocation
             if (this.environment === Environment$1.BROWSER && navigator.deviceMemory) {
-                // deviceMemory is in GB, scale accordingly
-                return Math.max(navigator.deviceMemory * 500, 1000);
+                // Base entries per GB
+                let entriesPerGB = 500;
+                // Adjust based on operating mode and dataset size
+                if (isReadOnly) {
+                    entriesPerGB = 800; // More aggressive caching in read-only mode
+                    if (isLargeDataset) {
+                        entriesPerGB = 1000; // Even more aggressive for large datasets
+                    }
+                }
+                else if (isLargeDataset) {
+                    entriesPerGB = 600; // Slightly more aggressive for large datasets
+                }
+                // Calculate based on device memory
+                const browserCacheSize = Math.max(navigator.deviceMemory * entriesPerGB, 1000);
+                // If we know the total dataset size, cap at a reasonable percentage
+                if (totalItems > 0) {
+                    // In read-only mode, we can cache a larger percentage
+                    const maxPercentage = isReadOnly ? 0.4 : 0.25;
+                    const maxItems = Math.ceil(totalItems * maxPercentage);
+                    // Return the smaller of the two to avoid excessive memory usage
+                    return Math.min(browserCacheSize, maxItems);
+                }
+                return browserCacheSize;
+            }
+            // For worker environments or when memory detection fails
+            if (this.environment === Environment$1.WORKER) {
+                // Workers typically have limited memory, be conservative
+                return isReadOnly ? 2000 : 1000;
             }
             return defaultSize;
         }
@@ -8220,30 +8658,57 @@ class CacheManager {
         }
     }
     /**
-     * Tune hot cache size based on statistics and environment
+     * Tune hot cache size based on statistics, environment, and operating mode
      *
      * The hot cache size is tuned based on:
      * 1. Available memory in the current environment
      * 2. Total number of nodes and edges in the system
      * 3. Cache hit/miss ratio
+     * 4. Operating mode (read-only vs. read-write)
+     * 5. Storage type (S3, filesystem, memory)
      *
-     * Algorithm:
-     * - Start with a size based on available memory
-     * - If storage statistics are available, consider caching a percentage of total items
-     * - If hit ratio is low, increase the cache size to improve performance
-     * - Ensure a reasonable minimum size to maintain basic functionality
+     * Enhanced algorithm:
+     * - Start with a size based on available memory and operating mode
+     * - For large datasets in S3 or other remote storage, use more aggressive caching
+     * - Adjust based on access patterns (read-heavy vs. write-heavy)
+     * - For read-only mode, prioritize cache size over eviction speed
+     * - Dynamically adjust based on hit/miss ratio and query patterns
      */
     tuneHotCacheSize() {
         // Start with the base size from environment detection
         let optimalSize = this.detectOptimalCacheSize();
+        // Check if we're in read-only mode
+        const isReadOnly = this.options?.readOnly || false;
+        // Check if we're using S3 or other remote storage
+        const isRemoteStorage = this.coldStorageType === StorageType.S3 ||
+            this.coldStorageType === StorageType.REMOTE_API;
         // If we have storage statistics, adjust based on total nodes/edges
         if (this.storageStatistics) {
             const totalItems = (this.storageStatistics.totalNodes || 0) +
                 (this.storageStatistics.totalEdges || 0);
             // If total items is significant, adjust cache size
             if (totalItems > 0) {
-                // Use a percentage of total items, with a cap based on memory
-                const percentageToCache = 0.2; // Cache 20% of items by default
+                // Base percentage to cache - adjusted based on mode and storage
+                let percentageToCache = 0.2; // Cache 20% of items by default
+                // For read-only mode, increase cache percentage
+                if (isReadOnly) {
+                    percentageToCache = 0.3; // 30% for read-only mode
+                    // For remote storage in read-only mode, be even more aggressive
+                    if (isRemoteStorage) {
+                        percentageToCache = 0.4; // 40% for remote storage in read-only mode
+                    }
+                }
+                // For remote storage in normal mode, increase slightly
+                else if (isRemoteStorage) {
+                    percentageToCache = 0.25; // 25% for remote storage
+                }
+                // For large datasets, cap the percentage to avoid excessive memory usage
+                if (totalItems > 1000000) { // Over 1 million items
+                    percentageToCache = Math.min(percentageToCache, 0.15);
+                }
+                else if (totalItems > 100000) { // Over 100K items
+                    percentageToCache = Math.min(percentageToCache, 0.25);
+                }
                 const statisticsBasedSize = Math.ceil(totalItems * percentageToCache);
                 // Use the smaller of the two to avoid memory issues
                 optimalSize = Math.min(optimalSize, statisticsBasedSize);
@@ -8253,16 +8718,57 @@ class CacheManager {
         const totalAccesses = this.stats.hits + this.stats.misses;
         if (totalAccesses > 100) {
             const hitRatio = this.stats.hits / totalAccesses;
-            // If hit ratio is high, we might have a good cache size already
+            // Base adjustment factor
+            let hitRatioFactor = 1.0;
             // If hit ratio is low, we might need a larger cache
             if (hitRatio < 0.5) {
-                // Increase cache size by up to 50% if hit ratio is low
-                const hitRatioFactor = 1 + (0.5 - hitRatio);
+                // Calculate adjustment factor based on hit ratio
+                const baseAdjustment = 0.5 - hitRatio;
+                // For read-only mode or remote storage, be more aggressive
+                if (isReadOnly || isRemoteStorage) {
+                    hitRatioFactor = 1 + (baseAdjustment * 1.5); // Up to 75% increase
+                }
+                else {
+                    hitRatioFactor = 1 + baseAdjustment; // Up to 50% increase
+                }
+                optimalSize = Math.ceil(optimalSize * hitRatioFactor);
+            }
+            // If hit ratio is very high, we might be able to reduce cache size slightly
+            else if (hitRatio > 0.9 && !isReadOnly && !isRemoteStorage) {
+                // Only reduce cache size in normal mode with local storage
+                // and only if hit ratio is very high
+                hitRatioFactor = 0.9; // 10% reduction
                 optimalSize = Math.ceil(optimalSize * hitRatioFactor);
             }
         }
-        // Ensure we have a reasonable minimum size
-        optimalSize = Math.max(optimalSize, 1000);
+        // Check for operation patterns if available
+        if (this.storageStatistics?.operations) {
+            const ops = this.storageStatistics.operations;
+            const totalOps = ops.total || 1;
+            // Calculate read/write ratio
+            const readOps = (ops.search || 0) + (ops.get || 0);
+            (ops.add || 0) + (ops.update || 0) + (ops.delete || 0);
+            if (totalOps > 100) {
+                const readRatio = readOps / totalOps;
+                // For read-heavy workloads, increase cache size
+                if (readRatio > 0.8) {
+                    // More aggressive for remote storage
+                    const readAdjustment = isRemoteStorage ? 1.3 : 1.2;
+                    optimalSize = Math.ceil(optimalSize * readAdjustment);
+                }
+            }
+        }
+        // Ensure we have a reasonable minimum size based on environment and mode
+        let minSize = 1000; // Default minimum
+        // For read-only mode, use a higher minimum
+        if (isReadOnly) {
+            minSize = 2000;
+        }
+        // For remote storage, use an even higher minimum
+        if (isRemoteStorage) {
+            minSize = isReadOnly ? 3000 : 2000;
+        }
+        optimalSize = Math.max(optimalSize, minSize);
         // Update the hot cache max size
         this.hotCacheMaxSize = optimalSize;
         this.stats.maxSize = optimalSize;
@@ -8363,7 +8869,7 @@ class CacheManager {
         this.warmCacheTTL = ttl;
     }
     /**
-     * Tune batch size based on statistics and environment
+     * Tune batch size based on environment, statistics, and operating mode
      *
      * The batch size determines how many items are processed in a single batch
      * for operations like prefetching. It is tuned based on:
@@ -8371,42 +8877,93 @@ class CacheManager {
      * 2. Available memory
      * 3. Operation patterns
      * 4. Cache hit/miss ratio
+     * 5. Operating mode (read-only vs. read-write)
+     * 6. Storage type (S3, filesystem, memory)
+     * 7. Dataset size
      *
-     * Algorithm:
+     * Enhanced algorithm:
      * - Start with a default based on the environment
-     * - Adjust based on available memory in browsers
-     * - For bulk-heavy workloads, use a larger batch size
-     * - For high hit ratios, use smaller batches (items likely in cache)
-     * - For low hit ratios, use larger batches (need to fetch more items)
+     * - For large datasets in S3 or other remote storage, use larger batches
+     * - For read-only mode, use larger batches to improve throughput
+     * - Dynamically adjust based on network latency and throughput
+     * - Balance between memory usage and performance
      */
     tuneBatchSize() {
         // Default batch size
         let batchSize = 10;
-        // Adjust based on environment
+        // Check if we're in read-only mode
+        const isReadOnly = this.options?.readOnly || false;
+        // Check if we're using S3 or other remote storage
+        const isRemoteStorage = this.coldStorageType === StorageType.S3 ||
+            this.coldStorageType === StorageType.REMOTE_API;
+        // Get the total dataset size if available
+        const totalItems = this.storageStatistics ?
+            (this.storageStatistics.totalNodes || 0) + (this.storageStatistics.totalEdges || 0) : 0;
+        // Determine if we're dealing with a large dataset
+        const isLargeDataset = totalItems > 100000;
+        const isVeryLargeDataset = totalItems > 1000000;
+        // Base batch size adjustment based on environment
         if (this.environment === Environment$1.NODE) {
             // Node.js can handle larger batches
-            batchSize = 20;
+            batchSize = isReadOnly ? 30 : 20;
+            // For remote storage, increase batch size
+            if (isRemoteStorage) {
+                batchSize = isReadOnly ? 50 : 30;
+            }
+            // For large datasets, adjust batch size
+            if (isLargeDataset) {
+                batchSize = Math.min(100, batchSize * 1.5);
+            }
+            // For very large datasets, adjust even more
+            if (isVeryLargeDataset) {
+                batchSize = Math.min(200, batchSize * 2);
+            }
         }
         else if (this.environment === Environment$1.BROWSER) {
             // Browsers might need smaller batches
-            batchSize = 10;
+            batchSize = isReadOnly ? 15 : 10;
             // If we have memory information, adjust accordingly
             if (navigator.deviceMemory) {
                 // Scale batch size with available memory
-                batchSize = Math.max(5, Math.min(20, Math.floor(navigator.deviceMemory * 2)));
+                const memoryFactor = isReadOnly ? 3 : 2;
+                batchSize = Math.max(5, Math.min(30, Math.floor(navigator.deviceMemory * memoryFactor)));
+                // For large datasets, adjust based on memory
+                if (isLargeDataset && navigator.deviceMemory > 4) {
+                    batchSize = Math.min(50, batchSize * 1.5);
+                }
             }
         }
+        else if (this.environment === Environment$1.WORKER) {
+            // Workers can handle moderate batch sizes
+            batchSize = isReadOnly ? 20 : 15;
+        }
         // If we have storage statistics with operation counts, adjust based on operation patterns
         if (this.storageStatistics && this.storageStatistics.operations) {
             const ops = this.storageStatistics.operations;
             const totalOps = ops.total || 1;
-            const bulkOps = (ops.search || 0);
+            const searchOps = (ops.search || 0);
+            const getOps = (ops.get || 0);
             if (totalOps > 100) {
-                const bulkRatio = bulkOps / totalOps;
-                // For bulk-heavy workloads, use larger batch size
-                if (bulkRatio > 0.7) {
-                    // Bulk-heavy, increase batch size (up to 2x)
-                    batchSize = Math.min(50, Math.ceil(batchSize * 1.5));
+                // Calculate search and get ratios
+                const searchRatio = searchOps / totalOps;
+                const getRatio = getOps / totalOps;
+                // For search-heavy workloads, use larger batch size
+                if (searchRatio > 0.6) {
+                    // Search-heavy, increase batch size
+                    const searchFactor = isRemoteStorage ? 1.8 : 1.5;
+                    batchSize = Math.min(isRemoteStorage ? 200 : 100, Math.ceil(batchSize * searchFactor));
+                }
+                // For get-heavy workloads, adjust batch size
+                if (getRatio > 0.6) {
+                    // Get-heavy, adjust batch size based on storage type
+                    if (isRemoteStorage) {
+                        // For remote storage, larger batches reduce network overhead
+                        batchSize = Math.min(150, Math.ceil(batchSize * 1.5));
+                    }
+                    else {
+                        // For local storage, smaller batches might be more efficient
+                        batchSize = Math.max(10, Math.ceil(batchSize * 0.9));
+                    }
                 }
             }
         }
@@ -8414,17 +8971,46 @@ class CacheManager {
         const totalAccesses = this.stats.hits + this.stats.misses;
         if (totalAccesses > 100) {
             const hitRatio = this.stats.hits / totalAccesses;
+            // Base adjustment factors
+            let increaseFactorForLowHitRatio = isRemoteStorage ? 1.5 : 1.2;
+            let decreaseFactorForHighHitRatio = 0.8;
+            // In read-only mode, be more aggressive with batch size adjustments
+            if (isReadOnly) {
+                increaseFactorForLowHitRatio = isRemoteStorage ? 2.0 : 1.5;
+                decreaseFactorForHighHitRatio = 0.9; // Less reduction in read-only mode
+            }
             // If hit ratio is high, we can use smaller batches
-            // If hit ratio is low, we might need larger batches
-            if (hitRatio > 0.8) {
+            if (hitRatio > 0.8 && !isVeryLargeDataset) {
                 // High hit ratio, decrease batch size slightly
-                batchSize = Math.max(5, Math.floor(batchSize * 0.8));
+                // But don't decrease too much for large datasets or remote storage
+                if (!(isLargeDataset && isRemoteStorage)) {
+                    batchSize = Math.max(isReadOnly ? 10 : 5, Math.floor(batchSize * decreaseFactorForHighHitRatio));
+                }
             }
+            // If hit ratio is low, we need larger batches
             else if (hitRatio < 0.5) {
                 // Low hit ratio, increase batch size
-                batchSize = Math.min(50, Math.ceil(batchSize * 1.2));
-            }
-        }
+                const maxBatchSize = isRemoteStorage ?
+                    (isVeryLargeDataset ? 300 : 200) :
+                    (isVeryLargeDataset ? 150 : 100);
+                batchSize = Math.min(maxBatchSize, Math.ceil(batchSize * increaseFactorForLowHitRatio));
+            }
+        }
+        // Set minimum batch sizes based on storage type and mode
+        let minBatchSize = 5;
+        if (isRemoteStorage) {
+            minBatchSize = isReadOnly ? 20 : 10;
+        }
+        else if (isReadOnly) {
+            minBatchSize = 10;
+        }
+        // Ensure batch size is within reasonable limits
+        batchSize = Math.max(minBatchSize, batchSize);
+        // Cap maximum batch size based on environment and storage
+        const maxBatchSize = isRemoteStorage ?
+            (this.environment === Environment$1.NODE ? 300 : 150) :
+            (this.environment === Environment$1.NODE ? 150 : 75);
+        batchSize = Math.min(maxBatchSize, batchSize);
         // Update the batch size
         this.batchSize = batchSize;
     }
@@ -11493,7 +12079,8 @@ async function createStorage(options = {}) {
                         secretAccessKey: options.s3Storage.secretAccessKey,
                         sessionToken: options.s3Storage.sessionToken,
                         serviceType: 's3',
-                        operationConfig: options.operationConfig
+                        operationConfig: options.operationConfig,
+                        cacheConfig: options.cacheConfig
                     });
                 }
                 else {
@@ -11508,7 +12095,8 @@ async function createStorage(options = {}) {
                         accountId: options.r2Storage.accountId,
                         accessKeyId: options.r2Storage.accessKeyId,
                         secretAccessKey: options.r2Storage.secretAccessKey,
-                        serviceType: 'r2'
+                        serviceType: 'r2',
+                        cacheConfig: options.cacheConfig
                     });
                 }
                 else {
@@ -11524,7 +12112,8 @@ async function createStorage(options = {}) {
                         endpoint: options.gcsStorage.endpoint || 'https://storage.googleapis.com',
                         accessKeyId: options.gcsStorage.accessKeyId,
                         secretAccessKey: options.gcsStorage.secretAccessKey,
-                        serviceType: 'gcs'
+                        serviceType: 'gcs',
+                        cacheConfig: options.cacheConfig
                     });
                 }
                 else {
@@ -11545,7 +12134,8 @@ async function createStorage(options = {}) {
             endpoint: options.customS3Storage.endpoint,
             accessKeyId: options.customS3Storage.accessKeyId,
             secretAccessKey: options.customS3Storage.secretAccessKey,
-            serviceType: options.customS3Storage.serviceType || 'custom'
+            serviceType: options.customS3Storage.serviceType || 'custom',
+            cacheConfig: options.cacheConfig
         });
     }
     // If R2 storage is specified, use it
@@ -11556,7 +12146,8 @@ async function createStorage(options = {}) {
             accountId: options.r2Storage.accountId,
             accessKeyId: options.r2Storage.accessKeyId,
             secretAccessKey: options.r2Storage.secretAccessKey,
-            serviceType: 'r2'
+            serviceType: 'r2',
+            cacheConfig: options.cacheConfig
         });
     }
     // If S3 storage is specified, use it
@@ -11568,7 +12159,8 @@ async function createStorage(options = {}) {
             accessKeyId: options.s3Storage.accessKeyId,
             secretAccessKey: options.s3Storage.secretAccessKey,
             sessionToken: options.s3Storage.sessionToken,
-            serviceType: 's3'
+            serviceType: 's3',
+            cacheConfig: options.cacheConfig
         });
     }
     // If GCS storage is specified, use it
@@ -11580,7 +12172,8 @@ async function createStorage(options = {}) {
             endpoint: options.gcsStorage.endpoint || 'https://storage.googleapis.com',
             accessKeyId: options.gcsStorage.accessKeyId,
             secretAccessKey: options.gcsStorage.secretAccessKey,
-            serviceType: 'gcs'
+            serviceType: 'gcs',
+            cacheConfig: options.cacheConfig
         });
     }
     // Auto-detect the best storage adapter based on the environment
@@ -14138,6 +14731,27 @@ class BrainyData {
                 ...config.realtimeUpdates
             };
         }
+        // Initialize cache configuration with intelligent defaults
+        // These defaults are automatically tuned based on environment and dataset size
+        this.cacheConfig = {
+            // Enable auto-tuning by default for optimal performance
+            autoTune: true,
+            // Set auto-tune interval to 1 minute for faster initial optimization
+            // This is especially important for large datasets
+            autoTuneInterval: 60000, // 1 minute
+            // Read-only mode specific optimizations
+            readOnlyMode: {
+                // Use aggressive prefetching in read-only mode for better performance
+                prefetchStrategy: 'aggressive'
+            }
+        };
+        // Override defaults with user-provided configuration if available
+        if (config.cache) {
+            this.cacheConfig = {
+                ...this.cacheConfig,
+                ...config.cache
+            };
+        }
     }
     /**
      * Check if the database is in read-only mode and throw an error if it is
@@ -14430,6 +15044,18 @@ class BrainyData {
             return 'default';
         }
     }
+    /**
+     * Get the service name from options or fallback to current augmentation
+     * This provides a consistent way to handle service names across all methods
+     * @param options Options object that may contain a service property
+     * @returns The service name to use for operations
+     */
+    getServiceName(options) {
+        if (options?.service) {
+            return options.service;
+        }
+        return this.getCurrentAugmentation();
+    }
     /**
      * Initialize the database
      * Loads existing data from storage if available
@@ -14482,6 +15108,14 @@ class BrainyData {
                     ...this.storageConfig,
                     requestPersistentStorage: this.requestPersistentStorage
                 };
+                // Add cache configuration if provided
+                if (this.cacheConfig) {
+                    storageOptions.cacheConfig = {
+                        ...this.cacheConfig,
+                        // Pass read-only flag to optimize cache behavior
+                        readOnly: this.readOnly
+                    };
+                }
                 // Ensure s3Storage has all required fields if it's provided
                 if (storageOptions.s3Storage) {
                     // Only include s3Storage if all required fields are present
@@ -14619,7 +15253,33 @@ class BrainyData {
             else {
                 // Input needs to be vectorized
                 try {
-                    vector = await this.embeddingFunction(vectorOrData);
+                    // Check if input is a JSON object and process it specially
+                    if (typeof vectorOrData === 'object' &&
+                        vectorOrData !== null &&
+                        !Array.isArray(vectorOrData)) {
+                        // Process JSON object for better vectorization
+                        const preparedText = prepareJsonForVectorization(vectorOrData, {
+                            // Prioritize common name/title fields if they exist
+                            priorityFields: [
+                                'name',
+                                'title',
+                                'company',
+                                'organization',
+                                'description',
+                                'summary'
+                            ]
+                        });
+                        vector = await this.embeddingFunction(preparedText);
+                        // Track field names for this JSON document
+                        const service = this.getServiceName(options);
+                        if (this.storage) {
+                            await this.storage.trackFieldNames(vectorOrData, service);
+                        }
+                    }
+                    else {
+                        // Use standard embedding for non-JSON data
+                        vector = await this.embeddingFunction(vectorOrData);
+                    }
                 }
                 catch (embedError) {
                     throw new Error(`Failed to vectorize data: ${embedError}`);
@@ -14648,7 +15308,7 @@ class BrainyData {
             // Save noun to storage
             await this.storage.saveNoun(noun);
             // Track noun statistics
-            const service = options.service || this.getCurrentAugmentation();
+            const service = this.getServiceName(options);
             await this.storage.incrementStatistic('noun', service);
             // Save metadata if provided and not empty
             if (metadata !== undefined) {
@@ -14701,7 +15361,7 @@ class BrainyData {
                     }
                     await this.storage.saveMetadata(id, metadataToSave);
                     // Track metadata statistics
-                    const metadataService = options.service || this.getCurrentAugmentation();
+                    const metadataService = this.getServiceName(options);
                     await this.storage.incrementStatistic('metadata', metadataService);
                 }
             }
@@ -15122,12 +15782,43 @@ class BrainyData {
         }
         // Check if database is in write-only mode
         this.checkWriteOnly();
-        // If input is a string and not a vector, automatically vectorize it
+        // Process the query input for vectorization
         let queryToUse = queryVectorOrData;
+        // Handle string queries
         if (typeof queryVectorOrData === 'string' && !options.forceEmbed) {
             queryToUse = await this.embed(queryVectorOrData);
             options.forceEmbed = false; // Already embedded, don't force again
         }
+        // Handle JSON object queries with special processing
+        else if (typeof queryVectorOrData === 'object' &&
+            queryVectorOrData !== null &&
+            !Array.isArray(queryVectorOrData) &&
+            !options.forceEmbed) {
+            // If searching within a specific field
+            if (options.searchField) {
+                // Extract text from the specific field
+                const fieldText = extractFieldFromJson(queryVectorOrData, options.searchField);
+                if (fieldText) {
+                    queryToUse = await this.embeddingFunction(fieldText);
+                    options.forceEmbed = false; // Already embedded, don't force again
+                }
+            }
+            // Otherwise process the entire object with priority fields
+            else {
+                const preparedText = prepareJsonForVectorization(queryVectorOrData, {
+                    priorityFields: options.priorityFields || [
+                        'name',
+                        'title',
+                        'company',
+                        'organization',
+                        'description',
+                        'summary'
+                    ]
+                });
+                queryToUse = await this.embeddingFunction(preparedText);
+                options.forceEmbed = false; // Already embedded, don't force again
+            }
+        }
         // If noun types are specified, use searchByNounTypes
         let searchResults;
         if (options.nounTypes && options.nounTypes.length > 0) {
@@ -15424,7 +16115,7 @@ class BrainyData {
             // Remove from storage
             await this.storage.deleteNoun(actualId);
             // Track deletion statistics
-            const service = options.service || 'default';
+            const service = this.getServiceName(options);
             await this.storage.decrementStatistic('noun', service);
             // Try to remove metadata (ignore errors)
             try {
@@ -15751,7 +16442,7 @@ class BrainyData {
             // Save verb to storage
             await this.storage.saveVerb(verb);
             // Track verb statistics
-            const serviceForStats = options.service || 'default';
+            const serviceForStats = this.getServiceName(options);
             await this.storage.incrementStatistic('verb', serviceForStats);
             // Update HNSW index size (excluding verbs)
             await this.storage.updateHnswIndexSize(await this.getNounCount());
@@ -15899,7 +16590,7 @@ class BrainyData {
             // Remove from storage
             await this.storage.deleteVerb(id);
             // Track deletion statistics
-            const service = options.service || 'default';
+            const service = this.getServiceName(options);
             await this.storage.decrementStatistic('verb', service);
             return true;
         }
@@ -16986,6 +17677,82 @@ class BrainyData {
             throw new Error(`Failed to generate random graph: ${error}`);
         }
     }
+    /**
+     * Get available field names by service
+     * This helps users understand what fields are available for searching from different data sources
+     * @returns Record of field names by service
+     */
+    async getAvailableFieldNames() {
+        await this.ensureInitialized();
+        if (!this.storage) {
+            return {};
+        }
+        return this.storage.getAvailableFieldNames();
+    }
+    /**
+     * Get standard field mappings
+     * This helps users understand how fields from different services map to standard field names
+     * @returns Record of standard field mappings
+     */
+    async getStandardFieldMappings() {
+        await this.ensureInitialized();
+        if (!this.storage) {
+            return {};
+        }
+        return this.storage.getStandardFieldMappings();
+    }
+    /**
+     * Search using a standard field name
+     * This allows searching across multiple services using a standardized field name
+     * @param standardField The standard field name to search in
+     * @param searchTerm The term to search for
+     * @param k Number of results to return
+     * @param options Additional search options
+     * @returns Array of search results
+     */
+    async searchByStandardField(standardField, searchTerm, k = 10, options = {}) {
+        await this.ensureInitialized();
+        // Check if database is in write-only mode
+        this.checkWriteOnly();
+        // Get standard field mappings
+        const standardFieldMappings = await this.getStandardFieldMappings();
+        // If the standard field doesn't exist, return empty results
+        if (!standardFieldMappings[standardField]) {
+            return [];
+        }
+        // Filter by services if specified
+        let serviceFieldMappings = standardFieldMappings[standardField];
+        if (options.services && options.services.length > 0) {
+            const filteredMappings = {};
+            for (const service of options.services) {
+                if (serviceFieldMappings[service]) {
+                    filteredMappings[service] = serviceFieldMappings[service];
+                }
+            }
+            serviceFieldMappings = filteredMappings;
+        }
+        // If no mappings after filtering, return empty results
+        if (Object.keys(serviceFieldMappings).length === 0) {
+            return [];
+        }
+        // Search in each service's fields and combine results
+        const allResults = [];
+        for (const [service, fieldNames] of Object.entries(serviceFieldMappings)) {
+            for (const fieldName of fieldNames) {
+                // Search using the specific field name for this service
+                const results = await this.search(searchTerm, k, {
+                    searchField: fieldName,
+                    service,
+                    includeVerbs: options.includeVerbs,
+                    searchMode: options.searchMode
+                });
+                // Add results to the combined list
+                allResults.push(...results);
+            }
+        }
+        // Sort by score and limit to k results
+        return allResults.sort((a, b) => b.score - a.score).slice(0, k);
+    }
 }
 /**