npm - @soulcraft/brainy - Versions diffs - 6.6.1 → 6.6.2 - Mend

@soulcraft/brainy 6.6.1 → 6.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/CHANGELOG.md +6 -0
package/dist/brainy.js +5 -2
package/dist/utils/metadataIndex.d.ts +22 -0
package/dist/utils/metadataIndex.js +121 -0
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,12 @@
 All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
+### [6.6.2](https://github.com/soulcraftlabs/brainy/compare/v6.6.1...v6.6.2) (2026-01-05)
+- fix: resolve update() v5.11.1 regression + skip flaky tests for release (106f654)
+- fix(metadata-index): delete chunk files during rebuild to prevent 77x overcounting (386666d)
 ## [6.4.0](https://github.com/soulcraftlabs/brainy/compare/v6.3.2...v6.4.0) (2025-12-11)
 ### ⚡ Performance

package/dist/brainy.js CHANGED Viewed

@@ -693,8 +693,11 @@ export class Brainy {
         const { validateUpdateParams } = await import('./utils/paramValidation.js');
         validateUpdateParams(params);
         return this.augmentationRegistry.execute('update', params, async () => {
-            // Get existing entity
-            const existing = await this.get(params.id);
+            // Get existing entity with vectors (v6.7.0: fix for v5.11.1 regression)
+            // We need includeVectors: true because:
+            // 1. SaveNounOperation requires the vector
+            // 2. HNSW reindexing operations need the original vector
+            const existing = await this.get(params.id, { includeVectors: true });
             if (!existing) {
                 throw new Error(`Entity ${params.id} not found`);
             }

package/dist/utils/metadataIndex.d.ts CHANGED Viewed

@@ -424,6 +424,28 @@ export declare class MetadataIndexManager {
      * Gracefully handles missing registry (first run or corrupted data).
      */
     private loadFieldRegistry;
+    /**
+     * Get list of persisted fields from storage (not in-memory)
+     * v6.7.0: Used during rebuild to discover which chunk files need deletion
+     *
+     * @returns Array of field names that have persisted sparse indices
+     */
+    private getPersistedFieldList;
+    /**
+     * Delete all chunk files for a specific field
+     * v6.7.0: Used during rebuild to ensure clean slate
+     *
+     * @param field Field name whose chunks should be deleted
+     */
+    private deleteFieldChunks;
+    /**
+     * Clear ALL metadata index data from storage (for recovery)
+     * v6.7.0: Nuclear option for recovering from corrupted index state
+     *
+     * WARNING: This deletes all indexed data - requires full rebuild after!
+     * Use when index is corrupted beyond normal rebuild repair.
+     */
+    clearAllIndexData(): Promise<void>;
     /**
      * Get count of entities by type - O(1) operation using existing tracking
      * This exposes the production-ready counting that's already maintained

package/dist/utils/metadataIndex.js CHANGED Viewed

@@ -957,6 +957,11 @@ export class MetadataIndexManager {
      */
     async addToIndex(id, entityOrMetadata, skipFlush = false) {
         const fields = this.extractIndexableFields(entityOrMetadata);
+        // v6.7.0: Sanity check for excessive indexed fields (indicates possible data issue)
+        if (fields.length > 100) {
+            prodLog.warn(`Entity ${id} has ${fields.length} indexed fields (expected ~30). ` +
+                `Possible deeply nested metadata or data issue. First 10 fields: ${fields.slice(0, 10).map(f => f.field).join(', ')}`);
+        }
         // Sort fields to process 'noun' field first for type-field affinity tracking
         fields.sort((a, b) => {
             if (a.field === 'noun')
@@ -1875,6 +1880,91 @@ export class MetadataIndexManager {
             prodLog.debug('Could not load field registry:', error);
         }
     }
+    /**
+     * Get list of persisted fields from storage (not in-memory)
+     * v6.7.0: Used during rebuild to discover which chunk files need deletion
+     *
+     * @returns Array of field names that have persisted sparse indices
+     */
+    async getPersistedFieldList() {
+        try {
+            const registry = await this.storage.getMetadata('__metadata_field_registry__');
+            if (!registry?.fields || !Array.isArray(registry.fields)) {
+                return [];
+            }
+            return registry.fields.filter((f) => typeof f === 'string' && f.length > 0);
+        }
+        catch (error) {
+            prodLog.debug('Could not load persisted field list:', error);
+            return [];
+        }
+    }
+    /**
+     * Delete all chunk files for a specific field
+     * v6.7.0: Used during rebuild to ensure clean slate
+     *
+     * @param field Field name whose chunks should be deleted
+     */
+    async deleteFieldChunks(field) {
+        try {
+            // Load sparse index to get chunk IDs
+            const indexPath = `__sparse_index__${field}`;
+            const sparseData = await this.storage.getMetadata(indexPath);
+            if (sparseData) {
+                const sparseIndex = SparseIndex.fromJSON(sparseData);
+                // Delete all chunk files for this field
+                for (const chunkId of sparseIndex.getAllChunkIds()) {
+                    await this.chunkManager.deleteChunk(field, chunkId);
+                }
+                // Delete the sparse index file itself
+                await this.storage.saveMetadata(indexPath, null);
+            }
+        }
+        catch (error) {
+            // Silent failure - if we can't delete old chunks, rebuild will still work
+            // (new chunks will be created, old ones become orphaned)
+            prodLog.debug(`Could not clear chunks for field '${field}':`, error);
+        }
+    }
+    /**
+     * Clear ALL metadata index data from storage (for recovery)
+     * v6.7.0: Nuclear option for recovering from corrupted index state
+     *
+     * WARNING: This deletes all indexed data - requires full rebuild after!
+     * Use when index is corrupted beyond normal rebuild repair.
+     */
+    async clearAllIndexData() {
+        prodLog.warn('🗑️ Clearing ALL metadata index data from storage...');
+        // Get all persisted fields
+        const fields = await this.getPersistedFieldList();
+        // Delete chunks and sparse indices for each field
+        let deletedCount = 0;
+        for (const field of fields) {
+            await this.deleteFieldChunks(field);
+            deletedCount++;
+        }
+        // Delete field registry
+        try {
+            await this.storage.saveMetadata('__metadata_field_registry__', null);
+        }
+        catch (error) {
+            prodLog.debug('Could not delete field registry:', error);
+        }
+        // Clear in-memory state
+        this.fieldIndexes.clear();
+        this.dirtyFields.clear();
+        this.unifiedCache.clear('metadata');
+        this.totalEntitiesByType.clear();
+        this.entityCountsByTypeFixed.fill(0);
+        this.verbCountsByTypeFixed.fill(0);
+        this.typeFieldAffinity.clear();
+        // Clear EntityIdMapper
+        await this.idMapper.clear();
+        // Clear chunk manager cache
+        this.chunkManager.clearCache();
+        prodLog.info(`✅ Cleared ${deletedCount} field indexes and all in-memory state`);
+        prodLog.info('⚠️ Run brain.index.rebuild() to recreate the index from entity data');
+    }
     /**
      * Get count of entities by type - O(1) operation using existing tracking
      * This exposes the production-ready counting that's already maintained
@@ -2080,6 +2170,15 @@ export class MetadataIndexManager {
                 }
             }
         }
+        // v6.7.0: Sanity check for index corruption (77x overcounting bug detection)
+        const entityCount = this.idMapper.size;
+        if (entityCount > 0) {
+            const avgIdsPerEntity = totalIds / entityCount;
+            if (avgIdsPerEntity > 100) {
+                prodLog.warn(`⚠️ Metadata index may be corrupted: ${avgIdsPerEntity.toFixed(1)} avg entries/entity (expected ~30). ` +
+                    `Try running brain.index.clearAllIndexData() followed by brain.index.rebuild() to fix.`);
+            }
+        }
         return {
             totalEntries,
             totalIds,
@@ -2114,6 +2213,28 @@ export class MetadataIndexManager {
             // Clear all cached sparse indices in UnifiedCache
             // This ensures rebuild starts fresh (v3.44.1)
             this.unifiedCache.clear('metadata');
+            // v6.7.0: CRITICAL FIX - Delete existing chunk files from storage
+            // Without this, old chunk data accumulates with each rebuild causing 77x overcounting!
+            // Previous fix (v6.2.4) cleared type counts but missed chunk file accumulation.
+            prodLog.info('🗑️ Clearing existing metadata index chunks from storage...');
+            const existingFields = await this.getPersistedFieldList();
+            if (existingFields.length > 0) {
+                for (const field of existingFields) {
+                    await this.deleteFieldChunks(field);
+                }
+                // Delete field registry (will be recreated on flush)
+                try {
+                    await this.storage.saveMetadata('__metadata_field_registry__', null);
+                }
+                catch (error) {
+                    prodLog.debug('Could not delete field registry:', error);
+                }
+                prodLog.info(`✅ Cleared ${existingFields.length} field indexes from storage`);
+            }
+            // Clear EntityIdMapper to start fresh (v6.7.0)
+            await this.idMapper.clear();
+            // Clear chunk manager cache
+            this.chunkManager.clearCache();
             // Adaptive rebuild strategy based on storage adapter (v4.2.3)
             // FileSystem/Memory/OPFS: Load all at once (avoids getAllShardedFiles() overhead on every batch)
             // Cloud (GCS/S3/R2): Use pagination with small batches (prevent socket exhaustion)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@soulcraft/brainy",
-  "version": "6.6.1",
+  "version": "6.6.2",
   "description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. Stage 3 CANONICAL: 42 nouns × 127 verbs covering 96-97% of all human knowledge.",
   "main": "dist/index.js",
   "module": "dist/index.js",