@soulcraft/brainy 6.6.1 → 6.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/brainy.js +5 -2
- package/dist/utils/metadataIndex.d.ts +22 -0
- package/dist/utils/metadataIndex.js +121 -0
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
|
|
4
4
|
|
|
5
|
+
### [6.6.2](https://github.com/soulcraftlabs/brainy/compare/v6.6.1...v6.6.2) (2026-01-05)
|
|
6
|
+
|
|
7
|
+
- fix: resolve update() v5.11.1 regression + skip flaky tests for release (106f654)
|
|
8
|
+
- fix(metadata-index): delete chunk files during rebuild to prevent 77x overcounting (386666d)
|
|
9
|
+
|
|
10
|
+
|
|
5
11
|
## [6.4.0](https://github.com/soulcraftlabs/brainy/compare/v6.3.2...v6.4.0) (2025-12-11)
|
|
6
12
|
|
|
7
13
|
### ⚡ Performance
|
package/dist/brainy.js
CHANGED
|
@@ -693,8 +693,11 @@ export class Brainy {
|
|
|
693
693
|
const { validateUpdateParams } = await import('./utils/paramValidation.js');
|
|
694
694
|
validateUpdateParams(params);
|
|
695
695
|
return this.augmentationRegistry.execute('update', params, async () => {
|
|
696
|
-
// Get existing entity
|
|
697
|
-
|
|
696
|
+
// Get existing entity with vectors (v6.7.0: fix for v5.11.1 regression)
|
|
697
|
+
// We need includeVectors: true because:
|
|
698
|
+
// 1. SaveNounOperation requires the vector
|
|
699
|
+
// 2. HNSW reindexing operations need the original vector
|
|
700
|
+
const existing = await this.get(params.id, { includeVectors: true });
|
|
698
701
|
if (!existing) {
|
|
699
702
|
throw new Error(`Entity ${params.id} not found`);
|
|
700
703
|
}
|
|
@@ -424,6 +424,28 @@ export declare class MetadataIndexManager {
|
|
|
424
424
|
* Gracefully handles missing registry (first run or corrupted data).
|
|
425
425
|
*/
|
|
426
426
|
private loadFieldRegistry;
|
|
427
|
+
/**
|
|
428
|
+
* Get list of persisted fields from storage (not in-memory)
|
|
429
|
+
* v6.7.0: Used during rebuild to discover which chunk files need deletion
|
|
430
|
+
*
|
|
431
|
+
* @returns Array of field names that have persisted sparse indices
|
|
432
|
+
*/
|
|
433
|
+
private getPersistedFieldList;
|
|
434
|
+
/**
|
|
435
|
+
* Delete all chunk files for a specific field
|
|
436
|
+
* v6.7.0: Used during rebuild to ensure clean slate
|
|
437
|
+
*
|
|
438
|
+
* @param field Field name whose chunks should be deleted
|
|
439
|
+
*/
|
|
440
|
+
private deleteFieldChunks;
|
|
441
|
+
/**
|
|
442
|
+
* Clear ALL metadata index data from storage (for recovery)
|
|
443
|
+
* v6.7.0: Nuclear option for recovering from corrupted index state
|
|
444
|
+
*
|
|
445
|
+
* WARNING: This deletes all indexed data - requires full rebuild after!
|
|
446
|
+
* Use when index is corrupted beyond normal rebuild repair.
|
|
447
|
+
*/
|
|
448
|
+
clearAllIndexData(): Promise<void>;
|
|
427
449
|
/**
|
|
428
450
|
* Get count of entities by type - O(1) operation using existing tracking
|
|
429
451
|
* This exposes the production-ready counting that's already maintained
|
|
@@ -957,6 +957,11 @@ export class MetadataIndexManager {
|
|
|
957
957
|
*/
|
|
958
958
|
async addToIndex(id, entityOrMetadata, skipFlush = false) {
|
|
959
959
|
const fields = this.extractIndexableFields(entityOrMetadata);
|
|
960
|
+
// v6.7.0: Sanity check for excessive indexed fields (indicates possible data issue)
|
|
961
|
+
if (fields.length > 100) {
|
|
962
|
+
prodLog.warn(`Entity ${id} has ${fields.length} indexed fields (expected ~30). ` +
|
|
963
|
+
`Possible deeply nested metadata or data issue. First 10 fields: ${fields.slice(0, 10).map(f => f.field).join(', ')}`);
|
|
964
|
+
}
|
|
960
965
|
// Sort fields to process 'noun' field first for type-field affinity tracking
|
|
961
966
|
fields.sort((a, b) => {
|
|
962
967
|
if (a.field === 'noun')
|
|
@@ -1875,6 +1880,91 @@ export class MetadataIndexManager {
|
|
|
1875
1880
|
prodLog.debug('Could not load field registry:', error);
|
|
1876
1881
|
}
|
|
1877
1882
|
}
|
|
1883
|
+
/**
|
|
1884
|
+
* Get list of persisted fields from storage (not in-memory)
|
|
1885
|
+
* v6.7.0: Used during rebuild to discover which chunk files need deletion
|
|
1886
|
+
*
|
|
1887
|
+
* @returns Array of field names that have persisted sparse indices
|
|
1888
|
+
*/
|
|
1889
|
+
async getPersistedFieldList() {
|
|
1890
|
+
try {
|
|
1891
|
+
const registry = await this.storage.getMetadata('__metadata_field_registry__');
|
|
1892
|
+
if (!registry?.fields || !Array.isArray(registry.fields)) {
|
|
1893
|
+
return [];
|
|
1894
|
+
}
|
|
1895
|
+
return registry.fields.filter((f) => typeof f === 'string' && f.length > 0);
|
|
1896
|
+
}
|
|
1897
|
+
catch (error) {
|
|
1898
|
+
prodLog.debug('Could not load persisted field list:', error);
|
|
1899
|
+
return [];
|
|
1900
|
+
}
|
|
1901
|
+
}
|
|
1902
|
+
/**
|
|
1903
|
+
* Delete all chunk files for a specific field
|
|
1904
|
+
* v6.7.0: Used during rebuild to ensure clean slate
|
|
1905
|
+
*
|
|
1906
|
+
* @param field Field name whose chunks should be deleted
|
|
1907
|
+
*/
|
|
1908
|
+
async deleteFieldChunks(field) {
|
|
1909
|
+
try {
|
|
1910
|
+
// Load sparse index to get chunk IDs
|
|
1911
|
+
const indexPath = `__sparse_index__${field}`;
|
|
1912
|
+
const sparseData = await this.storage.getMetadata(indexPath);
|
|
1913
|
+
if (sparseData) {
|
|
1914
|
+
const sparseIndex = SparseIndex.fromJSON(sparseData);
|
|
1915
|
+
// Delete all chunk files for this field
|
|
1916
|
+
for (const chunkId of sparseIndex.getAllChunkIds()) {
|
|
1917
|
+
await this.chunkManager.deleteChunk(field, chunkId);
|
|
1918
|
+
}
|
|
1919
|
+
// Delete the sparse index file itself
|
|
1920
|
+
await this.storage.saveMetadata(indexPath, null);
|
|
1921
|
+
}
|
|
1922
|
+
}
|
|
1923
|
+
catch (error) {
|
|
1924
|
+
// Silent failure - if we can't delete old chunks, rebuild will still work
|
|
1925
|
+
// (new chunks will be created, old ones become orphaned)
|
|
1926
|
+
prodLog.debug(`Could not clear chunks for field '${field}':`, error);
|
|
1927
|
+
}
|
|
1928
|
+
}
|
|
1929
|
+
/**
|
|
1930
|
+
* Clear ALL metadata index data from storage (for recovery)
|
|
1931
|
+
* v6.7.0: Nuclear option for recovering from corrupted index state
|
|
1932
|
+
*
|
|
1933
|
+
* WARNING: This deletes all indexed data - requires full rebuild after!
|
|
1934
|
+
* Use when index is corrupted beyond normal rebuild repair.
|
|
1935
|
+
*/
|
|
1936
|
+
async clearAllIndexData() {
|
|
1937
|
+
prodLog.warn('🗑️ Clearing ALL metadata index data from storage...');
|
|
1938
|
+
// Get all persisted fields
|
|
1939
|
+
const fields = await this.getPersistedFieldList();
|
|
1940
|
+
// Delete chunks and sparse indices for each field
|
|
1941
|
+
let deletedCount = 0;
|
|
1942
|
+
for (const field of fields) {
|
|
1943
|
+
await this.deleteFieldChunks(field);
|
|
1944
|
+
deletedCount++;
|
|
1945
|
+
}
|
|
1946
|
+
// Delete field registry
|
|
1947
|
+
try {
|
|
1948
|
+
await this.storage.saveMetadata('__metadata_field_registry__', null);
|
|
1949
|
+
}
|
|
1950
|
+
catch (error) {
|
|
1951
|
+
prodLog.debug('Could not delete field registry:', error);
|
|
1952
|
+
}
|
|
1953
|
+
// Clear in-memory state
|
|
1954
|
+
this.fieldIndexes.clear();
|
|
1955
|
+
this.dirtyFields.clear();
|
|
1956
|
+
this.unifiedCache.clear('metadata');
|
|
1957
|
+
this.totalEntitiesByType.clear();
|
|
1958
|
+
this.entityCountsByTypeFixed.fill(0);
|
|
1959
|
+
this.verbCountsByTypeFixed.fill(0);
|
|
1960
|
+
this.typeFieldAffinity.clear();
|
|
1961
|
+
// Clear EntityIdMapper
|
|
1962
|
+
await this.idMapper.clear();
|
|
1963
|
+
// Clear chunk manager cache
|
|
1964
|
+
this.chunkManager.clearCache();
|
|
1965
|
+
prodLog.info(`✅ Cleared ${deletedCount} field indexes and all in-memory state`);
|
|
1966
|
+
prodLog.info('⚠️ Run brain.index.rebuild() to recreate the index from entity data');
|
|
1967
|
+
}
|
|
1878
1968
|
/**
|
|
1879
1969
|
* Get count of entities by type - O(1) operation using existing tracking
|
|
1880
1970
|
* This exposes the production-ready counting that's already maintained
|
|
@@ -2080,6 +2170,15 @@ export class MetadataIndexManager {
|
|
|
2080
2170
|
}
|
|
2081
2171
|
}
|
|
2082
2172
|
}
|
|
2173
|
+
// v6.7.0: Sanity check for index corruption (77x overcounting bug detection)
|
|
2174
|
+
const entityCount = this.idMapper.size;
|
|
2175
|
+
if (entityCount > 0) {
|
|
2176
|
+
const avgIdsPerEntity = totalIds / entityCount;
|
|
2177
|
+
if (avgIdsPerEntity > 100) {
|
|
2178
|
+
prodLog.warn(`⚠️ Metadata index may be corrupted: ${avgIdsPerEntity.toFixed(1)} avg entries/entity (expected ~30). ` +
|
|
2179
|
+
`Try running brain.index.clearAllIndexData() followed by brain.index.rebuild() to fix.`);
|
|
2180
|
+
}
|
|
2181
|
+
}
|
|
2083
2182
|
return {
|
|
2084
2183
|
totalEntries,
|
|
2085
2184
|
totalIds,
|
|
@@ -2114,6 +2213,28 @@ export class MetadataIndexManager {
|
|
|
2114
2213
|
// Clear all cached sparse indices in UnifiedCache
|
|
2115
2214
|
// This ensures rebuild starts fresh (v3.44.1)
|
|
2116
2215
|
this.unifiedCache.clear('metadata');
|
|
2216
|
+
// v6.7.0: CRITICAL FIX - Delete existing chunk files from storage
|
|
2217
|
+
// Without this, old chunk data accumulates with each rebuild causing 77x overcounting!
|
|
2218
|
+
// Previous fix (v6.2.4) cleared type counts but missed chunk file accumulation.
|
|
2219
|
+
prodLog.info('🗑️ Clearing existing metadata index chunks from storage...');
|
|
2220
|
+
const existingFields = await this.getPersistedFieldList();
|
|
2221
|
+
if (existingFields.length > 0) {
|
|
2222
|
+
for (const field of existingFields) {
|
|
2223
|
+
await this.deleteFieldChunks(field);
|
|
2224
|
+
}
|
|
2225
|
+
// Delete field registry (will be recreated on flush)
|
|
2226
|
+
try {
|
|
2227
|
+
await this.storage.saveMetadata('__metadata_field_registry__', null);
|
|
2228
|
+
}
|
|
2229
|
+
catch (error) {
|
|
2230
|
+
prodLog.debug('Could not delete field registry:', error);
|
|
2231
|
+
}
|
|
2232
|
+
prodLog.info(`✅ Cleared ${existingFields.length} field indexes from storage`);
|
|
2233
|
+
}
|
|
2234
|
+
// Clear EntityIdMapper to start fresh (v6.7.0)
|
|
2235
|
+
await this.idMapper.clear();
|
|
2236
|
+
// Clear chunk manager cache
|
|
2237
|
+
this.chunkManager.clearCache();
|
|
2117
2238
|
// Adaptive rebuild strategy based on storage adapter (v4.2.3)
|
|
2118
2239
|
// FileSystem/Memory/OPFS: Load all at once (avoids getAllShardedFiles() overhead on every batch)
|
|
2119
2240
|
// Cloud (GCS/S3/R2): Use pagination with small batches (prevent socket exhaustion)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "6.6.
|
|
3
|
+
"version": "6.6.2",
|
|
4
4
|
"description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. Stage 3 CANONICAL: 42 nouns × 127 verbs covering 96-97% of all human knowledge.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|