@soulcraft/brainy 6.6.1 → 6.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,12 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
4
4
 
5
+ ### [6.6.2](https://github.com/soulcraftlabs/brainy/compare/v6.6.1...v6.6.2) (2026-01-05)
6
+
7
+ - fix: resolve update() v5.11.1 regression + skip flaky tests for release (106f654)
8
+ - fix(metadata-index): delete chunk files during rebuild to prevent 77x overcounting (386666d)
9
+
10
+
5
11
  ## [6.4.0](https://github.com/soulcraftlabs/brainy/compare/v6.3.2...v6.4.0) (2025-12-11)
6
12
 
7
13
  ### ⚡ Performance
package/dist/brainy.js CHANGED
@@ -693,8 +693,11 @@ export class Brainy {
693
693
  const { validateUpdateParams } = await import('./utils/paramValidation.js');
694
694
  validateUpdateParams(params);
695
695
  return this.augmentationRegistry.execute('update', params, async () => {
696
- // Get existing entity
697
- const existing = await this.get(params.id);
696
+ // Get existing entity with vectors (v6.7.0: fix for v5.11.1 regression)
697
+ // We need includeVectors: true because:
698
+ // 1. SaveNounOperation requires the vector
699
+ // 2. HNSW reindexing operations need the original vector
700
+ const existing = await this.get(params.id, { includeVectors: true });
698
701
  if (!existing) {
699
702
  throw new Error(`Entity ${params.id} not found`);
700
703
  }
@@ -424,6 +424,28 @@ export declare class MetadataIndexManager {
424
424
  * Gracefully handles missing registry (first run or corrupted data).
425
425
  */
426
426
  private loadFieldRegistry;
427
+ /**
428
+ * Get list of persisted fields from storage (not in-memory)
429
+ * v6.7.0: Used during rebuild to discover which chunk files need deletion
430
+ *
431
+ * @returns Array of field names that have persisted sparse indices
432
+ */
433
+ private getPersistedFieldList;
434
+ /**
435
+ * Delete all chunk files for a specific field
436
+ * v6.7.0: Used during rebuild to ensure clean slate
437
+ *
438
+ * @param field Field name whose chunks should be deleted
439
+ */
440
+ private deleteFieldChunks;
441
+ /**
442
+ * Clear ALL metadata index data from storage (for recovery)
443
+ * v6.7.0: Nuclear option for recovering from corrupted index state
444
+ *
445
+ * WARNING: This deletes all indexed data - requires full rebuild after!
446
+ * Use when index is corrupted beyond normal rebuild repair.
447
+ */
448
+ clearAllIndexData(): Promise<void>;
427
449
  /**
428
450
  * Get count of entities by type - O(1) operation using existing tracking
429
451
  * This exposes the production-ready counting that's already maintained
@@ -957,6 +957,11 @@ export class MetadataIndexManager {
957
957
  */
958
958
  async addToIndex(id, entityOrMetadata, skipFlush = false) {
959
959
  const fields = this.extractIndexableFields(entityOrMetadata);
960
+ // v6.7.0: Sanity check for excessive indexed fields (indicates possible data issue)
961
+ if (fields.length > 100) {
962
+ prodLog.warn(`Entity ${id} has ${fields.length} indexed fields (expected ~30). ` +
963
+ `Possible deeply nested metadata or data issue. First 10 fields: ${fields.slice(0, 10).map(f => f.field).join(', ')}`);
964
+ }
960
965
  // Sort fields to process 'noun' field first for type-field affinity tracking
961
966
  fields.sort((a, b) => {
962
967
  if (a.field === 'noun')
@@ -1875,6 +1880,91 @@ export class MetadataIndexManager {
1875
1880
  prodLog.debug('Could not load field registry:', error);
1876
1881
  }
1877
1882
  }
1883
+ /**
1884
+ * Get list of persisted fields from storage (not in-memory)
1885
+ * v6.7.0: Used during rebuild to discover which chunk files need deletion
1886
+ *
1887
+ * @returns Array of field names that have persisted sparse indices
1888
+ */
1889
+ async getPersistedFieldList() {
1890
+ try {
1891
+ const registry = await this.storage.getMetadata('__metadata_field_registry__');
1892
+ if (!registry?.fields || !Array.isArray(registry.fields)) {
1893
+ return [];
1894
+ }
1895
+ return registry.fields.filter((f) => typeof f === 'string' && f.length > 0);
1896
+ }
1897
+ catch (error) {
1898
+ prodLog.debug('Could not load persisted field list:', error);
1899
+ return [];
1900
+ }
1901
+ }
1902
+ /**
1903
+ * Delete all chunk files for a specific field
1904
+ * v6.7.0: Used during rebuild to ensure clean slate
1905
+ *
1906
+ * @param field Field name whose chunks should be deleted
1907
+ */
1908
+ async deleteFieldChunks(field) {
1909
+ try {
1910
+ // Load sparse index to get chunk IDs
1911
+ const indexPath = `__sparse_index__${field}`;
1912
+ const sparseData = await this.storage.getMetadata(indexPath);
1913
+ if (sparseData) {
1914
+ const sparseIndex = SparseIndex.fromJSON(sparseData);
1915
+ // Delete all chunk files for this field
1916
+ for (const chunkId of sparseIndex.getAllChunkIds()) {
1917
+ await this.chunkManager.deleteChunk(field, chunkId);
1918
+ }
1919
+ // Delete the sparse index file itself
1920
+ await this.storage.saveMetadata(indexPath, null);
1921
+ }
1922
+ }
1923
+ catch (error) {
1924
+ // Silent failure - if we can't delete old chunks, rebuild will still work
1925
+ // (new chunks will be created, old ones become orphaned)
1926
+ prodLog.debug(`Could not clear chunks for field '${field}':`, error);
1927
+ }
1928
+ }
1929
+ /**
1930
+ * Clear ALL metadata index data from storage (for recovery)
1931
+ * v6.7.0: Nuclear option for recovering from corrupted index state
1932
+ *
1933
+ * WARNING: This deletes all indexed data - requires full rebuild after!
1934
+ * Use when index is corrupted beyond normal rebuild repair.
1935
+ */
1936
+ async clearAllIndexData() {
1937
+ prodLog.warn('🗑️ Clearing ALL metadata index data from storage...');
1938
+ // Get all persisted fields
1939
+ const fields = await this.getPersistedFieldList();
1940
+ // Delete chunks and sparse indices for each field
1941
+ let deletedCount = 0;
1942
+ for (const field of fields) {
1943
+ await this.deleteFieldChunks(field);
1944
+ deletedCount++;
1945
+ }
1946
+ // Delete field registry
1947
+ try {
1948
+ await this.storage.saveMetadata('__metadata_field_registry__', null);
1949
+ }
1950
+ catch (error) {
1951
+ prodLog.debug('Could not delete field registry:', error);
1952
+ }
1953
+ // Clear in-memory state
1954
+ this.fieldIndexes.clear();
1955
+ this.dirtyFields.clear();
1956
+ this.unifiedCache.clear('metadata');
1957
+ this.totalEntitiesByType.clear();
1958
+ this.entityCountsByTypeFixed.fill(0);
1959
+ this.verbCountsByTypeFixed.fill(0);
1960
+ this.typeFieldAffinity.clear();
1961
+ // Clear EntityIdMapper
1962
+ await this.idMapper.clear();
1963
+ // Clear chunk manager cache
1964
+ this.chunkManager.clearCache();
1965
+ prodLog.info(`✅ Cleared ${deletedCount} field indexes and all in-memory state`);
1966
+ prodLog.info('⚠️ Run brain.index.rebuild() to recreate the index from entity data');
1967
+ }
1878
1968
  /**
1879
1969
  * Get count of entities by type - O(1) operation using existing tracking
1880
1970
  * This exposes the production-ready counting that's already maintained
@@ -2080,6 +2170,15 @@ export class MetadataIndexManager {
2080
2170
  }
2081
2171
  }
2082
2172
  }
2173
+ // v6.7.0: Sanity check for index corruption (77x overcounting bug detection)
2174
+ const entityCount = this.idMapper.size;
2175
+ if (entityCount > 0) {
2176
+ const avgIdsPerEntity = totalIds / entityCount;
2177
+ if (avgIdsPerEntity > 100) {
2178
+ prodLog.warn(`⚠️ Metadata index may be corrupted: ${avgIdsPerEntity.toFixed(1)} avg entries/entity (expected ~30). ` +
2179
+ `Try running brain.index.clearAllIndexData() followed by brain.index.rebuild() to fix.`);
2180
+ }
2181
+ }
2083
2182
  return {
2084
2183
  totalEntries,
2085
2184
  totalIds,
@@ -2114,6 +2213,28 @@ export class MetadataIndexManager {
2114
2213
  // Clear all cached sparse indices in UnifiedCache
2115
2214
  // This ensures rebuild starts fresh (v3.44.1)
2116
2215
  this.unifiedCache.clear('metadata');
2216
+ // v6.7.0: CRITICAL FIX - Delete existing chunk files from storage
2217
+ // Without this, old chunk data accumulates with each rebuild causing 77x overcounting!
2218
+ // Previous fix (v6.2.4) cleared type counts but missed chunk file accumulation.
2219
+ prodLog.info('🗑️ Clearing existing metadata index chunks from storage...');
2220
+ const existingFields = await this.getPersistedFieldList();
2221
+ if (existingFields.length > 0) {
2222
+ for (const field of existingFields) {
2223
+ await this.deleteFieldChunks(field);
2224
+ }
2225
+ // Delete field registry (will be recreated on flush)
2226
+ try {
2227
+ await this.storage.saveMetadata('__metadata_field_registry__', null);
2228
+ }
2229
+ catch (error) {
2230
+ prodLog.debug('Could not delete field registry:', error);
2231
+ }
2232
+ prodLog.info(`✅ Cleared ${existingFields.length} field indexes from storage`);
2233
+ }
2234
+ // Clear EntityIdMapper to start fresh (v6.7.0)
2235
+ await this.idMapper.clear();
2236
+ // Clear chunk manager cache
2237
+ this.chunkManager.clearCache();
2117
2238
  // Adaptive rebuild strategy based on storage adapter (v4.2.3)
2118
2239
  // FileSystem/Memory/OPFS: Load all at once (avoids getAllShardedFiles() overhead on every batch)
2119
2240
  // Cloud (GCS/S3/R2): Use pagination with small batches (prevent socket exhaustion)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@soulcraft/brainy",
3
- "version": "6.6.1",
3
+ "version": "6.6.2",
4
4
  "description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. Stage 3 CANONICAL: 42 nouns × 127 verbs covering 96-97% of all human knowledge.",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",