npm - @soulcraft/brainy - Versions diffs - 3.41.0 → 3.42.0 - Mend

@soulcraft/brainy 3.41.0 → 3.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/CHANGELOG.md +7 -0
package/dist/utils/metadataIndex.d.ts +30 -63
package/dist/utils/metadataIndex.js +339 -576
package/dist/utils/metadataIndexChunking.d.ts +322 -0
package/dist/utils/metadataIndexChunking.js +709 -0
package/package.json +1 -1

package/dist/utils/metadataIndex.js CHANGED Viewed

@@ -6,18 +6,14 @@
 import { MetadataIndexCache } from './metadataIndexCache.js';
 import { prodLog } from './logger.js';
 import { getGlobalCache } from './unifiedCache.js';
+import { SparseIndex, ChunkManager, AdaptiveChunkingStrategy } from './metadataIndexChunking.js';
 export class MetadataIndexManager {
     constructor(storage, config = {}) {
-        this.indexCache = new Map();
-        this.dirtyEntries = new Set();
         this.isRebuilding = false;
         this.fieldIndexes = new Map();
         this.dirtyFields = new Set();
         this.lastFlushTime = Date.now();
         this.autoFlushThreshold = 10; // Start with 10 for more frequent non-blocking flushes
-        // Sorted indices for range queries (only for numeric/date fields)
-        this.sortedIndices = new Map();
-        this.numericFields = new Set(); // Track which fields are numeric
         // Cardinality and field statistics tracking
         this.fieldStats = new Map();
         this.cardinalityUpdateInterval = 100; // Update cardinality every N operations
@@ -33,6 +29,10 @@ export class MetadataIndexManager {
         this.activeLocks = new Map();
         this.lockPromises = new Map();
         this.lockTimers = new Map(); // Track timers for cleanup
+        // Adaptive Chunked Sparse Indexing (v3.42.0)
+        // Reduces file count from 560k → 89 files (630x reduction)
+        // ALL fields now use chunking - no more flat files
+        this.sparseIndices = new Map(); // field -> sparse index
         this.storage = storage;
         this.config = {
             maxIndexSize: config.maxIndexSize ?? 10000,
@@ -67,6 +67,9 @@ export class MetadataIndexManager {
         });
         // Get global unified cache for coordinated memory management
         this.unifiedCache = getGlobalCache();
+        // Initialize chunking system (v3.42.0)
+        this.chunkManager = new ChunkManager(storage);
+        this.chunkingStrategy = new AdaptiveChunkingStrategy();
         // Lazy load counts from storage statistics on first access
         this.lazyLoadCounts();
     }
@@ -148,220 +151,6 @@ export class MetadataIndexManager {
             // This maintains zero-configuration principle
         }
     }
-    /**
-     * Get index key for field and value
-     */
-    getIndexKey(field, value) {
-        const normalizedValue = this.normalizeValue(value, field); // Pass field for bucketing!
-        return `${field}:${normalizedValue}`;
-    }
-    /**
-     * Ensure sorted index exists for a field (for range queries)
-     */
-    async ensureSortedIndex(field) {
-        if (!this.sortedIndices.has(field)) {
-            // Try to load from storage first
-            const loaded = await this.loadSortedIndex(field);
-            if (loaded) {
-                this.sortedIndices.set(field, loaded);
-            }
-            else {
-                // Create new sorted index - NOT dirty since we maintain incrementally
-                this.sortedIndices.set(field, {
-                    values: [],
-                    isDirty: false, // Clean by default with incremental updates
-                    fieldType: 'mixed'
-                });
-            }
-        }
-    }
-    /**
-     * Build sorted index for a field from hash index
-     */
-    async buildSortedIndex(field) {
-        const sortedIndex = this.sortedIndices.get(field);
-        if (!sortedIndex || !sortedIndex.isDirty)
-            return;
-        // Collect all values for this field from hash index
-        const valueMap = new Map();
-        for (const [key, entry] of this.indexCache.entries()) {
-            if (entry.field === field) {
-                const existing = valueMap.get(entry.value);
-                if (existing) {
-                    // Merge ID sets
-                    entry.ids.forEach(id => existing.add(id));
-                }
-                else {
-                    valueMap.set(entry.value, new Set(entry.ids));
-                }
-            }
-        }
-        // Convert to sorted array
-        const sorted = Array.from(valueMap.entries());
-        // Detect field type and sort accordingly
-        if (sorted.length > 0) {
-            const sampleValue = sorted[0][0];
-            if (typeof sampleValue === 'number') {
-                sortedIndex.fieldType = 'number';
-                sorted.sort((a, b) => a[0] - b[0]);
-            }
-            else if (sampleValue instanceof Date) {
-                sortedIndex.fieldType = 'date';
-                sorted.sort((a, b) => a[0].getTime() - b[0].getTime());
-            }
-            else {
-                sortedIndex.fieldType = 'string';
-                sorted.sort((a, b) => {
-                    const aVal = String(a[0]);
-                    const bVal = String(b[0]);
-                    return aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
-                });
-            }
-        }
-        sortedIndex.values = sorted;
-        sortedIndex.isDirty = false;
-    }
-    /**
-     * Detect field type from value
-     */
-    detectFieldType(value) {
-        if (typeof value === 'number' && !isNaN(value))
-            return 'number';
-        if (value instanceof Date)
-            return 'date';
-        return 'string';
-    }
-    /**
-     * Compare two values based on field type for sorting
-     */
-    compareValues(a, b, fieldType) {
-        switch (fieldType) {
-            case 'number':
-                return a - b;
-            case 'date':
-                return a.getTime() - b.getTime();
-            case 'string':
-            default:
-                const aStr = String(a);
-                const bStr = String(b);
-                return aStr < bStr ? -1 : aStr > bStr ? 1 : 0;
-        }
-    }
-    /**
-     * Binary search to find insertion position for a value
-     * Returns the index where the value should be inserted to maintain sorted order
-     */
-    findInsertPosition(sortedArray, value, fieldType) {
-        if (sortedArray.length === 0)
-            return 0;
-        let left = 0;
-        let right = sortedArray.length - 1;
-        while (left <= right) {
-            const mid = Math.floor((left + right) / 2);
-            const midVal = sortedArray[mid][0];
-            const comparison = this.compareValues(midVal, value, fieldType);
-            if (comparison < 0) {
-                left = mid + 1;
-            }
-            else if (comparison > 0) {
-                right = mid - 1;
-            }
-            else {
-                return mid; // Value already exists at this position
-            }
-        }
-        return left; // Insert position
-    }
-    /**
-     * Incrementally update sorted index when adding an ID
-     */
-    updateSortedIndexAdd(field, value, id) {
-        // Ensure sorted index exists
-        if (!this.sortedIndices.has(field)) {
-            this.sortedIndices.set(field, {
-                values: [],
-                isDirty: false,
-                fieldType: this.detectFieldType(value)
-            });
-        }
-        const sortedIndex = this.sortedIndices.get(field);
-        const normalizedValue = this.normalizeValue(value, field); // Pass field for bucketing!
-        // Find where this value should be in the sorted array
-        const insertPos = this.findInsertPosition(sortedIndex.values, normalizedValue, sortedIndex.fieldType);
-        if (insertPos < sortedIndex.values.length &&
-            sortedIndex.values[insertPos][0] === normalizedValue) {
-            // Value already exists, just add the ID to the existing Set
-            sortedIndex.values[insertPos][1].add(id);
-        }
-        else {
-            // New value, insert at the correct position
-            sortedIndex.values.splice(insertPos, 0, [normalizedValue, new Set([id])]);
-        }
-        // Mark as clean since we're maintaining it incrementally
-        sortedIndex.isDirty = false;
-    }
-    /**
-     * Incrementally update sorted index when removing an ID
-     */
-    updateSortedIndexRemove(field, value, id) {
-        const sortedIndex = this.sortedIndices.get(field);
-        if (!sortedIndex || sortedIndex.values.length === 0)
-            return;
-        const normalizedValue = this.normalizeValue(value, field); // Pass field for bucketing!
-        // Binary search to find the value
-        const pos = this.findInsertPosition(sortedIndex.values, normalizedValue, sortedIndex.fieldType);
-        if (pos < sortedIndex.values.length &&
-            sortedIndex.values[pos][0] === normalizedValue) {
-            // Remove the ID from the Set
-            sortedIndex.values[pos][1].delete(id);
-            // If no IDs left for this value, remove the entire entry
-            if (sortedIndex.values[pos][1].size === 0) {
-                sortedIndex.values.splice(pos, 1);
-            }
-        }
-        // Keep it clean
-        sortedIndex.isDirty = false;
-    }
-    /**
-     * Binary search for range start (inclusive or exclusive)
-     */
-    binarySearchStart(sorted, target, inclusive) {
-        let left = 0;
-        let right = sorted.length - 1;
-        let result = sorted.length;
-        while (left <= right) {
-            const mid = Math.floor((left + right) / 2);
-            const midVal = sorted[mid][0];
-            if (inclusive ? midVal >= target : midVal > target) {
-                result = mid;
-                right = mid - 1;
-            }
-            else {
-                left = mid + 1;
-            }
-        }
-        return result;
-    }
-    /**
-     * Binary search for range end (inclusive or exclusive)
-     */
-    binarySearchEnd(sorted, target, inclusive) {
-        let left = 0;
-        let right = sorted.length - 1;
-        let result = -1;
-        while (left <= right) {
-            const mid = Math.floor((left + right) / 2);
-            const midVal = sorted[mid][0];
-            if (inclusive ? midVal <= target : midVal < target) {
-                result = mid;
-                left = mid + 1;
-            }
-            else {
-                right = mid - 1;
-            }
-        }
-        return result;
-    }
     /**
      * Update cardinality statistics for a field
      */
@@ -385,18 +174,21 @@ export class MetadataIndexManager {
         }
         const stats = this.fieldStats.get(field);
         const cardinality = stats.cardinality;
-        // Track unique values
-        const fieldIndexKey = `${field}:${String(value)}`;
-        const entry = this.indexCache.get(fieldIndexKey);
+        // Track unique values by checking fieldIndex counts (v3.42.0 - removed indexCache)
+        const fieldIndex = this.fieldIndexes.get(field);
+        const normalizedValue = this.normalizeValue(value, field);
+        const currentCount = fieldIndex?.values[normalizedValue] || 0;
         if (operation === 'add') {
-            if (!entry || entry.ids.size === 1) {
+            // If this is a new value (count is 0), increment unique values
+            if (currentCount === 0) {
                 cardinality.uniqueValues++;
             }
             cardinality.totalValues++;
         }
         else if (operation === 'remove') {
-            if (entry && entry.ids.size === 0) {
-                cardinality.uniqueValues--;
+            // If count will become 0, decrement unique values
+            if (currentCount === 1) {
+                cardinality.uniqueValues = Math.max(0, cardinality.uniqueValues - 1);
             }
             cardinality.totalValues = Math.max(0, cardinality.totalValues - 1);
         }
@@ -434,23 +226,17 @@ export class MetadataIndexManager {
      * Update index strategy based on field statistics
      */
     updateIndexStrategy(field, stats) {
-        const isNumeric = this.numericFields.has(field);
         const hasHighCardinality = stats.cardinality.uniqueValues > this.HIGH_CARDINALITY_THRESHOLD;
-        const hasRangeQueries = stats.rangeQueryCount > stats.exactQueryCount * 0.3;
-        // Determine optimal index type
-        if (isNumeric && hasRangeQueries) {
-            stats.indexType = 'both'; // Need both hash and sorted
-        }
-        else if (hasRangeQueries) {
-            stats.indexType = 'sorted';
-        }
-        else {
-            stats.indexType = 'hash';
-        }
+        // All fields use chunked sparse indexing with zone maps (v3.42.0)
+        stats.indexType = 'hash';
         // Determine normalization strategy for high cardinality NON-temporal fields
         // (Temporal fields are already bucketed in normalizeValue from the start!)
         if (hasHighCardinality) {
-            if (isNumeric) {
+            // Check if field looks numeric (for float precision reduction)
+            const fieldLower = field.toLowerCase();
+            const looksNumeric = fieldLower.includes('count') || fieldLower.includes('score') ||
+                fieldLower.includes('value') || fieldLower.includes('amount');
+            if (looksNumeric) {
                 stats.normalizationStrategy = 'precision'; // Reduce float precision
             }
             else {
@@ -461,8 +247,237 @@ export class MetadataIndexManager {
             stats.normalizationStrategy = 'none';
         }
     }
+    // ============================================================================
+    // Adaptive Chunked Sparse Indexing (v3.42.0)
+    // All fields use chunking - simplified implementation
+    // ============================================================================
+    /**
+     * Load sparse index from storage
+     */
+    async loadSparseIndex(field) {
+        const indexPath = `__sparse_index__${field}`;
+        const unifiedKey = `metadata:sparse:${field}`;
+        return await this.unifiedCache.get(unifiedKey, async () => {
+            try {
+                const data = await this.storage.getMetadata(indexPath);
+                if (data) {
+                    const sparseIndex = SparseIndex.fromJSON(data);
+                    // Add to unified cache (sparse indices are expensive to rebuild)
+                    const size = JSON.stringify(data).length;
+                    this.unifiedCache.set(unifiedKey, sparseIndex, 'metadata', size, 200);
+                    return sparseIndex;
+                }
+            }
+            catch (error) {
+                prodLog.debug(`Failed to load sparse index for field '${field}':`, error);
+            }
+            return undefined;
+        });
+    }
+    /**
+     * Save sparse index to storage
+     */
+    async saveSparseIndex(field, sparseIndex) {
+        const indexPath = `__sparse_index__${field}`;
+        const unifiedKey = `metadata:sparse:${field}`;
+        const data = sparseIndex.toJSON();
+        await this.storage.saveMetadata(indexPath, data);
+        // Update unified cache
+        const size = JSON.stringify(data).length;
+        this.unifiedCache.set(unifiedKey, sparseIndex, 'metadata', size, 200);
+    }
+    /**
+     * Get IDs for a value using chunked sparse index
+     */
+    async getIdsFromChunks(field, value) {
+        // Load sparse index
+        let sparseIndex = this.sparseIndices.get(field);
+        if (!sparseIndex) {
+            sparseIndex = await this.loadSparseIndex(field);
+            if (!sparseIndex) {
+                return []; // No chunked index exists yet
+            }
+            this.sparseIndices.set(field, sparseIndex);
+        }
+        // Find candidate chunks using zone maps and bloom filters
+        const normalizedValue = this.normalizeValue(value, field);
+        const candidateChunkIds = sparseIndex.findChunksForValue(normalizedValue);
+        if (candidateChunkIds.length === 0) {
+            return []; // No chunks contain this value
+        }
+        // Load chunks and collect IDs
+        const allIds = new Set();
+        for (const chunkId of candidateChunkIds) {
+            const chunk = await this.chunkManager.loadChunk(field, chunkId);
+            if (chunk) {
+                const ids = chunk.entries.get(normalizedValue);
+                if (ids) {
+                    ids.forEach(id => allIds.add(id));
+                }
+            }
+        }
+        return Array.from(allIds);
+    }
+    /**
+     * Get IDs for a range using chunked sparse index with zone maps
+     */
+    async getIdsFromChunksForRange(field, min, max, includeMin = true, includeMax = true) {
+        // Load sparse index
+        let sparseIndex = this.sparseIndices.get(field);
+        if (!sparseIndex) {
+            sparseIndex = await this.loadSparseIndex(field);
+            if (!sparseIndex) {
+                return []; // No chunked index exists yet
+            }
+            this.sparseIndices.set(field, sparseIndex);
+        }
+        // Find candidate chunks using zone maps
+        const candidateChunkIds = sparseIndex.findChunksForRange(min, max);
+        if (candidateChunkIds.length === 0) {
+            return [];
+        }
+        // Load chunks and filter by range
+        const allIds = new Set();
+        for (const chunkId of candidateChunkIds) {
+            const chunk = await this.chunkManager.loadChunk(field, chunkId);
+            if (chunk) {
+                for (const [value, ids] of chunk.entries) {
+                    // Check if value is in range
+                    let inRange = true;
+                    if (min !== undefined) {
+                        inRange = inRange && (includeMin ? value >= min : value > min);
+                    }
+                    if (max !== undefined) {
+                        inRange = inRange && (includeMax ? value <= max : value < max);
+                    }
+                    if (inRange) {
+                        ids.forEach(id => allIds.add(id));
+                    }
+                }
+            }
+        }
+        return Array.from(allIds);
+    }
+    /**
+     * Add value-ID mapping to chunked index
+     */
+    async addToChunkedIndex(field, value, id) {
+        // Load or create sparse index
+        let sparseIndex = this.sparseIndices.get(field);
+        if (!sparseIndex) {
+            sparseIndex = await this.loadSparseIndex(field);
+            if (!sparseIndex) {
+                // Create new sparse index
+                const stats = this.fieldStats.get(field);
+                const chunkSize = stats
+                    ? this.chunkingStrategy.getOptimalChunkSize({
+                        uniqueValues: stats.cardinality.uniqueValues,
+                        distribution: stats.cardinality.distribution,
+                        avgIdsPerValue: stats.cardinality.totalValues / Math.max(1, stats.cardinality.uniqueValues)
+                    })
+                    : 50;
+                sparseIndex = new SparseIndex(field, chunkSize);
+            }
+            this.sparseIndices.set(field, sparseIndex);
+        }
+        const normalizedValue = this.normalizeValue(value, field);
+        // Find existing chunk for this value (check zone maps)
+        const candidateChunkIds = sparseIndex.findChunksForValue(normalizedValue);
+        let targetChunk = null;
+        let targetChunkId = null;
+        // Try to find an existing chunk with this value
+        for (const chunkId of candidateChunkIds) {
+            const chunk = await this.chunkManager.loadChunk(field, chunkId);
+            if (chunk && chunk.entries.has(normalizedValue)) {
+                targetChunk = chunk;
+                targetChunkId = chunkId;
+                break;
+            }
+        }
+        // If no chunk has this value, find chunk with space or create new one
+        if (!targetChunk) {
+            // Find a chunk with available space
+            for (const chunkId of sparseIndex.getAllChunkIds()) {
+                const chunk = await this.chunkManager.loadChunk(field, chunkId);
+                const descriptor = sparseIndex.getChunk(chunkId);
+                if (chunk && descriptor && chunk.entries.size < descriptor.splitThreshold) {
+                    targetChunk = chunk;
+                    targetChunkId = chunkId;
+                    break;
+                }
+            }
+        }
+        // Create new chunk if needed
+        if (!targetChunk) {
+            targetChunk = await this.chunkManager.createChunk(field);
+            targetChunkId = targetChunk.chunkId;
+            // Register in sparse index
+            const descriptor = {
+                chunkId: targetChunk.chunkId,
+                field,
+                valueCount: 0,
+                idCount: 0,
+                zoneMap: { min: null, max: null, count: 0, hasNulls: false },
+                lastUpdated: Date.now(),
+                splitThreshold: 80,
+                mergeThreshold: 20
+            };
+            sparseIndex.registerChunk(descriptor);
+        }
+        // Add to chunk
+        await this.chunkManager.addToChunk(targetChunk, normalizedValue, id);
+        await this.chunkManager.saveChunk(targetChunk);
+        // Update chunk descriptor in sparse index
+        const updatedZoneMap = this.chunkManager.calculateZoneMap(targetChunk);
+        const updatedBloomFilter = this.chunkManager.createBloomFilter(targetChunk);
+        sparseIndex.updateChunk(targetChunkId, {
+            valueCount: targetChunk.entries.size,
+            idCount: Array.from(targetChunk.entries.values()).reduce((sum, ids) => sum + ids.size, 0),
+            zoneMap: updatedZoneMap,
+            lastUpdated: Date.now()
+        });
+        // Update bloom filter
+        const descriptor = sparseIndex.getChunk(targetChunkId);
+        if (descriptor) {
+            sparseIndex.registerChunk(descriptor, updatedBloomFilter);
+        }
+        // Check if chunk needs splitting
+        if (targetChunk.entries.size > 80) {
+            await this.chunkManager.splitChunk(targetChunk, sparseIndex);
+        }
+        // Save sparse index
+        await this.saveSparseIndex(field, sparseIndex);
+    }
     /**
-     * Get IDs matching a range query
+     * Remove ID from chunked index
+     */
+    async removeFromChunkedIndex(field, value, id) {
+        const sparseIndex = this.sparseIndices.get(field) || await this.loadSparseIndex(field);
+        if (!sparseIndex) {
+            return; // No chunked index exists
+        }
+        const normalizedValue = this.normalizeValue(value, field);
+        const candidateChunkIds = sparseIndex.findChunksForValue(normalizedValue);
+        for (const chunkId of candidateChunkIds) {
+            const chunk = await this.chunkManager.loadChunk(field, chunkId);
+            if (chunk && chunk.entries.has(normalizedValue)) {
+                await this.chunkManager.removeFromChunk(chunk, normalizedValue, id);
+                await this.chunkManager.saveChunk(chunk);
+                // Update sparse index
+                const updatedZoneMap = this.chunkManager.calculateZoneMap(chunk);
+                sparseIndex.updateChunk(chunkId, {
+                    valueCount: chunk.entries.size,
+                    idCount: Array.from(chunk.entries.values()).reduce((sum, ids) => sum + ids.size, 0),
+                    zoneMap: updatedZoneMap,
+                    lastUpdated: Date.now()
+                });
+                await this.saveSparseIndex(field, sparseIndex);
+                break;
+            }
+        }
+    }
+    /**
+     * Get IDs matching a range query using zone maps
      */
     async getIdsForRange(field, min, max, includeMin = true, includeMax = true) {
         // Track range query for field statistics
@@ -470,35 +485,8 @@ export class MetadataIndexManager {
             const stats = this.fieldStats.get(field);
             stats.rangeQueryCount++;
         }
-        // Ensure sorted index exists
-        await this.ensureSortedIndex(field);
-        // With incremental updates, we should rarely need to rebuild
-        // Only rebuild if it's marked dirty (e.g., after a bulk load or migration)
-        let sortedIndex = this.sortedIndices.get(field);
-        if (sortedIndex?.isDirty) {
-            prodLog.warn(`MetadataIndex: Sorted index for field '${field}' was dirty, rebuilding...`);
-            await this.buildSortedIndex(field);
-            sortedIndex = this.sortedIndices.get(field);
-        }
-        if (!sortedIndex || sortedIndex.values.length === 0)
-            return [];
-        const sorted = sortedIndex.values;
-        const resultSet = new Set();
-        // Find range boundaries
-        let start = 0;
-        let end = sorted.length - 1;
-        if (min !== undefined) {
-            start = this.binarySearchStart(sorted, min, includeMin);
-        }
-        if (max !== undefined) {
-            end = this.binarySearchEnd(sorted, max, includeMax);
-        }
-        // Collect all IDs in range
-        for (let i = start; i <= end && i < sorted.length; i++) {
-            const [, ids] = sorted[i];
-            ids.forEach(id => resultSet.add(id));
-        }
-        return Array.from(resultSet);
+        // All fields use chunked sparse index with zone map optimization (v3.42.0)
+        return await this.getIdsFromChunksForRange(field, min, max, includeMin, includeMax);
     }
     /**
      * Generate field index filename for filter discovery
@@ -646,51 +634,22 @@ export class MetadataIndexManager {
         const updatedFields = new Set();
         for (let i = 0; i < fields.length; i++) {
             const { field, value } = fields[i];
-            const key = this.getIndexKey(field, value);
-            // Get or create index entry
-            let entry = this.indexCache.get(key);
-            if (!entry) {
-                const loadedEntry = await this.loadIndexEntry(key);
-                entry = loadedEntry ?? {
-                    field,
-                    value: this.normalizeValue(value, field), // Pass field for bucketing!
-                    ids: new Set(),
-                    lastUpdated: Date.now()
-                };
-                this.indexCache.set(key, entry);
-            }
-            // Add ID to entry
-            const wasNew = entry.ids.size === 0;
-            entry.ids.add(id);
-            entry.lastUpdated = Date.now();
-            this.dirtyEntries.add(key);
-            // Update cardinality statistics
-            if (wasNew) {
-                this.updateCardinalityStats(field, value, 'add');
-            }
-            // Track type-field affinity for intelligent NLP
-            this.updateTypeFieldAffinity(id, field, value, 'add');
-            // Incrementally update sorted index for this field
-            if (!updatedFields.has(field)) {
-                this.updateSortedIndexAdd(field, value, id);
-                updatedFields.add(field);
-            }
-            else {
-                // Multiple values for same field - still update
-                this.updateSortedIndexAdd(field, value, id);
-            }
-            // Update field index
+            // All fields use chunked sparse indexing (v3.42.0)
+            await this.addToChunkedIndex(field, value, id);
+            // Update statistics and tracking
+            this.updateCardinalityStats(field, value, 'add');
+            this.updateTypeFieldAffinity(id, field, value, 'add', metadata);
             await this.updateFieldIndex(field, value, 1);
             // Yield to event loop every 5 fields to prevent blocking
             if (i % 5 === 4) {
                 await this.yieldToEventLoop();
             }
         }
-        // Adaptive auto-flush based on usage patterns
+        // Adaptive auto-flush based on usage patterns (v3.42.0 - flush field indexes only)
         if (!skipFlush) {
             const timeSinceLastFlush = Date.now() - this.lastFlushTime;
-            const shouldAutoFlush = this.dirtyEntries.size >= this.autoFlushThreshold || // Size threshold
-                (this.dirtyEntries.size > 10 && timeSinceLastFlush > 5000); // Time threshold (5 seconds)
+            const shouldAutoFlush = this.dirtyFields.size >= this.autoFlushThreshold || // Size threshold
+                (this.dirtyFields.size > 10 && timeSinceLastFlush > 5000); // Time threshold (5 seconds)
             if (shouldAutoFlush) {
                 const startTime = Date.now();
                 await this.flush();
@@ -743,51 +702,31 @@ export class MetadataIndexManager {
             // Remove from specific field indexes
             const fields = this.extractIndexableFields(metadata);
             for (const { field, value } of fields) {
-                const key = this.getIndexKey(field, value);
-                let entry = this.indexCache.get(key);
-                if (!entry) {
-                    const loadedEntry = await this.loadIndexEntry(key);
-                    entry = loadedEntry ?? undefined;
-                }
-                if (entry) {
-                    const hadId = entry.ids.has(id);
-                    entry.ids.delete(id);
-                    entry.lastUpdated = Date.now();
-                    this.dirtyEntries.add(key);
-                    // Update cardinality statistics
-                    if (hadId && entry.ids.size === 0) {
-                        this.updateCardinalityStats(field, value, 'remove');
-                    }
-                    // Track type-field affinity for intelligent NLP
-                    if (hadId) {
-                        this.updateTypeFieldAffinity(id, field, value, 'remove');
-                    }
-                    // Incrementally update sorted index when removing
-                    this.updateSortedIndexRemove(field, value, id);
-                    // Update field index
-                    await this.updateFieldIndex(field, value, -1);
-                    // If no IDs left, mark for cleanup
-                    if (entry.ids.size === 0) {
-                        this.indexCache.delete(key);
-                        await this.deleteIndexEntry(key);
-                    }
-                }
+                // All fields use chunked sparse indexing (v3.42.0)
+                await this.removeFromChunkedIndex(field, value, id);
+                // Update statistics and tracking
+                this.updateCardinalityStats(field, value, 'remove');
+                this.updateTypeFieldAffinity(id, field, value, 'remove', metadata);
+                await this.updateFieldIndex(field, value, -1);
                 // Invalidate cache
                 this.metadataCache.invalidatePattern(`field_values_${field}`);
             }
         }
         else {
-            // Remove from all indexes (slower, requires scanning)
-            for (const [key, entry] of this.indexCache.entries()) {
-                if (entry.ids.has(id)) {
-                    entry.ids.delete(id);
-                    entry.lastUpdated = Date.now();
-                    this.dirtyEntries.add(key);
-                    // Incrementally update sorted index
-                    this.updateSortedIndexRemove(entry.field, entry.value, id);
-                    if (entry.ids.size === 0) {
-                        this.indexCache.delete(key);
-                        await this.deleteIndexEntry(key);
+            // Remove from all indexes (slower, requires scanning all chunks)
+            // This should be rare - prefer providing metadata when removing
+            prodLog.warn(`Removing ID ${id} without metadata requires scanning all sparse indices (slow)`);
+            // Scan all sparse indices
+            for (const [field, sparseIndex] of this.sparseIndices.entries()) {
+                for (const chunkId of sparseIndex.getAllChunkIds()) {
+                    const chunk = await this.chunkManager.loadChunk(field, chunkId);
+                    if (chunk) {
+                        // Check all values in this chunk
+                        for (const [value, ids] of chunk.entries) {
+                            if (ids.has(id)) {
+                                await this.removeFromChunkedIndex(field, value, id);
+                            }
+                        }
                     }
                 }
             }
@@ -797,14 +736,9 @@ export class MetadataIndexManager {
      * Get all IDs in the index
      */
     async getAllIds() {
-        // Collect all unique IDs from all index entries
+        // Use storage as the source of truth (v3.42.0 - removed redundant indexCache scan)
         const allIds = new Set();
-        // First, add all IDs from the in-memory cache
-        for (const entry of this.indexCache.values()) {
-            entry.ids.forEach(id => allIds.add(id));
-        }
-        // If storage has a method to get all nouns, use it as the source of truth
-        // This ensures we include items that might not be indexed yet
+        // Storage.getNouns() is the definitive source of all entity IDs
         if (this.storage && typeof this.storage.getNouns === 'function') {
             try {
                 const result = await this.storage.getNouns({
@@ -818,13 +752,15 @@ export class MetadataIndexManager {
                 }
             }
             catch (e) {
-                // Fall back to using only indexed IDs
+                // If storage method fails, return empty array
+                prodLog.warn('Failed to get all IDs from storage:', e);
+                return [];
             }
         }
         return Array.from(allIds);
     }
     /**
-     * Get IDs for a specific field-value combination with caching
+     * Get IDs for a specific field-value combination using chunked sparse index
      */
     async getIds(field, value) {
         // Track exact query for field statistics
@@ -832,27 +768,8 @@ export class MetadataIndexManager {
             const stats = this.fieldStats.get(field);
             stats.exactQueryCount++;
         }
-        const key = this.getIndexKey(field, value);
-        // Check metadata cache first
-        const cacheKey = `ids_${key}`;
-        const cachedIds = this.metadataCache.get(cacheKey);
-        if (cachedIds) {
-            return cachedIds;
-        }
-        // Try in-memory cache
-        let entry = this.indexCache.get(key);
-        // Load from storage if not cached
-        if (!entry) {
-            const loadedEntry = await this.loadIndexEntry(key);
-            if (loadedEntry) {
-                entry = loadedEntry;
-                this.indexCache.set(key, entry);
-            }
-        }
-        const ids = entry ? Array.from(entry.ids) : [];
-        // Cache the result
-        this.metadataCache.set(cacheKey, ids);
-        return ids;
+        // All fields use chunked sparse indexing (v3.42.0)
+        return await this.getIdsFromChunks(field, value);
     }
     /**
      * Get all available values for a field (for filter discovery)
@@ -1044,11 +961,20 @@ export class MetadataIndexManager {
                         // Existence operator
                         case 'exists':
                             if (operand) {
-                                // Get all IDs that have this field (any value)
+                                // Get all IDs that have this field (any value) from chunked sparse index (v3.42.0)
                                 const allIds = new Set();
-                                for (const [key, entry] of this.indexCache.entries()) {
-                                    if (entry.field === field) {
-                                        entry.ids.forEach(id => allIds.add(id));
+                                // Load sparse index for this field
+                                const sparseIndex = this.sparseIndices.get(field) || await this.loadSparseIndex(field);
+                                if (sparseIndex) {
+                                    // Iterate through all chunks for this field
+                                    for (const chunkId of sparseIndex.getAllChunkIds()) {
+                                        const chunk = await this.chunkManager.loadChunk(field, chunkId);
+                                        if (chunk) {
+                                            // Collect all IDs from all values in this chunk
+                                            for (const ids of chunk.entries.values()) {
+                                                ids.forEach(id => allIds.add(id));
+                                            }
+                                        }
                                     }
                                 }
                                 fieldResults = Array.from(allIds);
@@ -1149,31 +1075,17 @@ export class MetadataIndexManager {
     }
     /**
      * Flush dirty entries to storage (non-blocking version)
+     * NOTE (v3.42.0): Sparse indices are flushed immediately in add/remove operations
      */
     async flush() {
-        // Check if we have anything to flush (including sorted indices)
-        const hasDirtySortedIndices = Array.from(this.sortedIndices.values()).some(idx => idx.isDirty);
-        if (this.dirtyEntries.size === 0 && this.dirtyFields.size === 0 && !hasDirtySortedIndices) {
+        // Check if we have anything to flush
+        if (this.dirtyFields.size === 0) {
             return; // Nothing to flush
         }
         // Process in smaller batches to avoid blocking
         const BATCH_SIZE = 20;
         const allPromises = [];
-        // Flush value entries in batches
-        const dirtyEntriesArray = Array.from(this.dirtyEntries);
-        for (let i = 0; i < dirtyEntriesArray.length; i += BATCH_SIZE) {
-            const batch = dirtyEntriesArray.slice(i, i + BATCH_SIZE);
-            const batchPromises = batch.map(key => {
-                const entry = this.indexCache.get(key);
-                return entry ? this.saveIndexEntry(key, entry) : Promise.resolve();
-            });
-            allPromises.push(...batchPromises);
-            // Yield to event loop between batches
-            if (i + BATCH_SIZE < dirtyEntriesArray.length) {
-                await this.yieldToEventLoop();
-            }
-        }
-        // Flush field indexes in batches
+        // Flush field indexes in batches (v3.42.0 - removed flat file flushing)
         const dirtyFieldsArray = Array.from(this.dirtyFields);
         for (let i = 0; i < dirtyFieldsArray.length; i += BATCH_SIZE) {
             const batch = dirtyFieldsArray.slice(i, i + BATCH_SIZE);
@@ -1187,15 +1099,8 @@ export class MetadataIndexManager {
                 await this.yieldToEventLoop();
             }
         }
-        // Flush sorted indices (for range queries)
-        for (const [field, sortedIndex] of this.sortedIndices.entries()) {
-            if (sortedIndex.isDirty) {
-                allPromises.push(this.saveSortedIndex(field, sortedIndex));
-            }
-        }
         // Wait for all operations to complete
         await Promise.all(allPromises);
-        this.dirtyEntries.clear();
         this.dirtyFields.clear();
         this.lastFlushTime = Date.now();
     }
@@ -1275,69 +1180,6 @@ export class MetadataIndexManager {
             }
         }
     }
-    /**
-     * Save sorted index to storage for range queries with file locking
-     */
-    async saveSortedIndex(field, sortedIndex) {
-        const filename = `sorted_${field}`;
-        const lockKey = `sorted_index_${field}`;
-        const lockAcquired = await this.acquireLock(lockKey, 5000); // 5 second timeout
-        if (!lockAcquired) {
-            prodLog.warn(`Failed to acquire lock for sorted index '${field}', proceeding without lock`);
-        }
-        try {
-            const indexId = `__metadata_sorted_index__${filename}`;
-            const unifiedKey = `metadata:sorted:${field}`;
-            // Convert Set to Array for serialization
-            const serializable = {
-                values: sortedIndex.values.map(([value, ids]) => [value, Array.from(ids)]),
-                fieldType: sortedIndex.fieldType,
-                lastUpdated: Date.now()
-            };
-            await this.storage.saveMetadata(indexId, serializable);
-            // Mark as clean
-            sortedIndex.isDirty = false;
-            // Update unified cache (sorted indices are expensive to rebuild)
-            const size = JSON.stringify(serializable).length;
-            this.unifiedCache.set(unifiedKey, sortedIndex, 'metadata', size, 100); // Higher rebuild cost
-        }
-        finally {
-            if (lockAcquired) {
-                await this.releaseLock(lockKey);
-            }
-        }
-    }
-    /**
-     * Load sorted index from storage
-     */
-    async loadSortedIndex(field) {
-        const filename = `sorted_${field}`;
-        const indexId = `__metadata_sorted_index__${filename}`;
-        const unifiedKey = `metadata:sorted:${field}`;
-        // Check unified cache first
-        const cached = await this.unifiedCache.get(unifiedKey, async () => {
-            try {
-                const data = await this.storage.getMetadata(indexId);
-                if (data) {
-                    // Convert Arrays back to Sets
-                    const sortedIndex = {
-                        values: data.values.map(([value, ids]) => [value, new Set(ids)]),
-                        fieldType: data.fieldType || 'mixed',
-                        isDirty: false
-                    };
-                    // Add to unified cache
-                    const size = JSON.stringify(data).length;
-                    this.unifiedCache.set(unifiedKey, sortedIndex, 'metadata', size, 100);
-                    return sortedIndex;
-                }
-            }
-            catch (error) {
-                // Sorted index doesn't exist yet
-            }
-            return null;
-        });
-        return cached;
-    }
     /**
      * Get count of entities by type - O(1) operation using existing tracking
      * This exposes the production-ready counting that's already maintained
@@ -1362,19 +1204,12 @@ export class MetadataIndexManager {
         return new Map(this.totalEntitiesByType);
     }
     /**
-     * Get count of entities matching field-value criteria - O(1) lookup from existing indexes
+     * Get count of entities matching field-value criteria - queries chunked sparse index
      */
     async getCountForCriteria(field, value) {
-        const key = this.getIndexKey(field, value);
-        let entry = this.indexCache.get(key);
-        if (!entry) {
-            const loadedEntry = await this.loadIndexEntry(key);
-            if (loadedEntry) {
-                entry = loadedEntry;
-                this.indexCache.set(key, entry);
-            }
-        }
-        return entry ? entry.ids.size : 0;
+        // Use chunked sparse indexing (v3.42.0 - removed indexCache)
+        const ids = await this.getIds(field, value);
+        return ids.length;
     }
     /**
      * Get index statistics with enhanced counting information
@@ -1383,10 +1218,23 @@ export class MetadataIndexManager {
         const fields = new Set();
         let totalEntries = 0;
         let totalIds = 0;
-        for (const entry of this.indexCache.values()) {
-            fields.add(entry.field);
-            totalEntries++;
-            totalIds += entry.ids.size;
+        // Collect stats from sparse indices (v3.42.0 - removed indexCache)
+        for (const [field, sparseIndex] of this.sparseIndices.entries()) {
+            fields.add(field);
+            // Count entries and IDs from all chunks
+            for (const chunkId of sparseIndex.getAllChunkIds()) {
+                const chunk = await this.chunkManager.loadChunk(field, chunkId);
+                if (chunk) {
+                    totalEntries += chunk.entries.size;
+                    for (const ids of chunk.entries.values()) {
+                        totalIds += ids.size;
+                    }
+                }
+            }
+        }
+        // Also include fields from fieldIndexes that might not have sparse indices yet
+        for (const field of this.fieldIndexes.keys()) {
+            fields.add(field);
         }
         return {
             totalEntries,
@@ -1408,9 +1256,8 @@ export class MetadataIndexManager {
             prodLog.info('🔄 Starting non-blocking metadata index rebuild with batch processing to prevent socket exhaustion...');
             prodLog.info(`📊 Storage adapter: ${this.storage.constructor.name}`);
             prodLog.info(`🔧 Batch processing available: ${!!this.storage.getMetadataBatch}`);
-            // Clear existing indexes
-            this.indexCache.clear();
-            this.dirtyEntries.clear();
+            // Clear existing indexes (v3.42.0 - use sparse indices instead of flat files)
+            this.sparseIndices.clear();
             this.fieldIndexes.clear();
             this.dirtyFields.clear();
             // Rebuild noun metadata indexes using pagination
@@ -1599,89 +1446,6 @@ export class MetadataIndexManager {
             this.isRebuilding = false;
         }
     }
-    /**
-     * Load index entry from storage using safe filenames
-     */
-    async loadIndexEntry(key) {
-        const unifiedKey = `metadata:entry:${key}`;
-        // Use unified cache with loader function
-        return await this.unifiedCache.get(unifiedKey, async () => {
-            try {
-                // Extract field and value from key
-                const [field, value] = key.split(':', 2);
-                const filename = this.getValueChunkFilename(field, value);
-                // Load from metadata indexes directory with safe filename
-                const indexId = `__metadata_index__${filename}`;
-                const data = await this.storage.getMetadata(indexId);
-                if (data) {
-                    const entry = {
-                        field: data.field,
-                        value: data.value,
-                        ids: new Set(data.ids || []),
-                        lastUpdated: data.lastUpdated || Date.now()
-                    };
-                    // Add to unified cache (metadata entries are cheap to rebuild)
-                    const size = JSON.stringify(Array.from(entry.ids)).length + 100;
-                    this.unifiedCache.set(unifiedKey, entry, 'metadata', size, 1);
-                    return entry;
-                }
-            }
-            catch (error) {
-                // Index entry doesn't exist yet
-            }
-            return null;
-        });
-    }
-    /**
-     * Save index entry to storage using safe filenames with file locking
-     */
-    async saveIndexEntry(key, entry) {
-        const lockKey = `index_entry_${key}`;
-        const lockAcquired = await this.acquireLock(lockKey, 5000); // 5 second timeout
-        if (!lockAcquired) {
-            prodLog.warn(`Failed to acquire lock for index entry '${key}', proceeding without lock`);
-        }
-        try {
-            const unifiedKey = `metadata:entry:${key}`;
-            const data = {
-                field: entry.field,
-                value: entry.value,
-                ids: Array.from(entry.ids),
-                lastUpdated: entry.lastUpdated
-            };
-            // Extract field and value from key for safe filename generation
-            const [field, value] = key.split(':', 2);
-            const filename = this.getValueChunkFilename(field, value);
-            // Store metadata indexes with safe filename
-            const indexId = `__metadata_index__${filename}`;
-            await this.storage.saveMetadata(indexId, data);
-            // Update unified cache
-            const size = JSON.stringify(data.ids).length + 100;
-            this.unifiedCache.set(unifiedKey, entry, 'metadata', size, 1);
-        }
-        finally {
-            if (lockAcquired) {
-                await this.releaseLock(lockKey);
-            }
-        }
-    }
-    /**
-     * Delete index entry from storage using safe filenames
-     */
-    async deleteIndexEntry(key) {
-        const unifiedKey = `metadata:entry:${key}`;
-        try {
-            const [field, value] = key.split(':', 2);
-            const filename = this.getValueChunkFilename(field, value);
-            const indexId = `__metadata_index__${filename}`;
-            await this.storage.saveMetadata(indexId, null);
-            // Remove from unified cache
-            this.unifiedCache.delete(unifiedKey);
-        }
-        catch (error) {
-            // Entry might not exist
-        }
-    }
     /**
      * Get field statistics for optimization and discovery
      */
@@ -1814,7 +1578,7 @@ export class MetadataIndexManager {
      * Update type-field affinity tracking for intelligent NLP
      * Tracks which fields commonly appear with which entity types
      */
-    updateTypeFieldAffinity(entityId, field, value, operation) {
+    updateTypeFieldAffinity(entityId, field, value, operation, metadata) {
         // Only track affinity for non-system fields (but allow 'noun' for type detection)
         if (this.config.excludeFields.includes(field) && field !== 'noun')
             return;
@@ -1824,14 +1588,13 @@ export class MetadataIndexManager {
             // This is the type definition itself
             entityType = this.normalizeValue(value, field); // Pass field for bucketing!
         }
+        else if (metadata && metadata.noun) {
+            // Extract entity type from metadata (v3.42.0 - removed indexCache scan)
+            entityType = this.normalizeValue(metadata.noun, 'noun');
+        }
         else {
-            // Find the noun type for this entity by looking for entries with this entityId
-            for (const [key, entry] of this.indexCache.entries()) {
-                if (key.startsWith('noun:') && entry.ids.has(entityId)) {
-                    entityType = key.split(':', 2)[1];
-                    break;
-                }
-            }
+            // No type information available, skip affinity tracking
+            return;
         }
         if (!entityType)
             return; // No type found, skip affinity tracking