@soulcraft/brainy 6.6.0 → 6.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,12 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
4
4
 
5
+ ### [6.6.2](https://github.com/soulcraftlabs/brainy/compare/v6.6.1...v6.6.2) (2026-01-05)
6
+
7
+ - fix: resolve update() v5.11.1 regression + skip flaky tests for release (106f654)
8
+ - fix(metadata-index): delete chunk files during rebuild to prevent 77x overcounting (386666d)
9
+
10
+
5
11
  ## [6.4.0](https://github.com/soulcraftlabs/brainy/compare/v6.3.2...v6.4.0) (2025-12-11)
6
12
 
7
13
  ### ⚡ Performance
package/dist/brainy.js CHANGED
@@ -80,7 +80,6 @@ export class Brainy {
80
80
  ...this.config,
81
81
  ...configOverrides,
82
82
  storage: { ...this.config.storage, ...configOverrides.storage },
83
- model: { ...this.config.model, ...configOverrides.model },
84
83
  index: { ...this.config.index, ...configOverrides.index },
85
84
  augmentations: { ...this.config.augmentations, ...configOverrides.augmentations },
86
85
  verbose: configOverrides.verbose ?? this.config.verbose,
@@ -694,8 +693,11 @@ export class Brainy {
694
693
  const { validateUpdateParams } = await import('./utils/paramValidation.js');
695
694
  validateUpdateParams(params);
696
695
  return this.augmentationRegistry.execute('update', params, async () => {
697
- // Get existing entity
698
- const existing = await this.get(params.id);
696
+ // Get existing entity with vectors (v6.7.0: fix for v5.11.1 regression)
697
+ // We need includeVectors: true because:
698
+ // 1. SaveNounOperation requires the vector
699
+ // 2. HNSW reindexing operations need the original vector
700
+ const existing = await this.get(params.id, { includeVectors: true });
699
701
  if (!existing) {
700
702
  throw new Error(`Entity ${params.id} not found`);
701
703
  }
@@ -4146,10 +4148,6 @@ export class Brainy {
4146
4148
  // No longer throw errors for mismatches - storageFactory now handles this intelligently
4147
4149
  // Both 'gcs' and 'gcs-native' can now use either gcsStorage or gcsNativeStorage
4148
4150
  }
4149
- // Validate model configuration
4150
- if (config?.model?.type && !['fast', 'accurate', 'custom'].includes(config.model.type)) {
4151
- throw new Error(`Invalid model type: ${config.model.type}. Must be one of: fast, accurate, custom`);
4152
- }
4153
4151
  // Validate numeric configurations
4154
4152
  if (config?.index?.m && (config.index.m < 1 || config.index.m > 128)) {
4155
4153
  throw new Error(`Invalid index m parameter: ${config.index.m}. Must be between 1 and 128`);
@@ -4164,7 +4162,6 @@ export class Brainy {
4164
4162
  const distributedConfig = this.autoDetectDistributed(config?.distributed);
4165
4163
  return {
4166
4164
  storage: config?.storage || { type: 'auto' },
4167
- model: config?.model || { type: 'fast' },
4168
4165
  index: config?.index || {},
4169
4166
  cache: config?.cache ?? true,
4170
4167
  augmentations: config?.augmentations || {},
@@ -2,9 +2,7 @@
2
2
  * Zero-Configuration System
3
3
  * Main entry point for all auto-configuration features
4
4
  */
5
- export { autoSelectModelPrecision, ModelPrecision as ModelPrecisionType, // Avoid conflict
6
- ModelPreset, shouldAutoDownloadModels, getModelPath, logModelConfig } from './modelAutoConfig.js';
7
- export declare const getModelPrecision: () => "q8";
5
+ export { getModelPrecision, shouldAutoDownloadModels, getModelPath } from './modelAutoConfig.js';
8
6
  export { autoDetectStorage, StorageType, StoragePreset, StorageConfigResult, logStorageConfig, type StorageTypeString, type StoragePresetString } from './storageAutoConfig.js';
9
7
  export { SharedConfig, SharedConfigManager } from './sharedConfigManager.js';
10
8
  export { BrainyZeroConfig, processZeroConfig, createEmbeddingFunctionWithPrecision } from './zeroConfig.js';
@@ -2,10 +2,8 @@
2
2
  * Zero-Configuration System
3
3
  * Main entry point for all auto-configuration features
4
4
  */
5
- // Model configuration
6
- export { autoSelectModelPrecision, shouldAutoDownloadModels, getModelPath, logModelConfig } from './modelAutoConfig.js';
7
- // Model precision - Always Q8 now (99% accuracy, 75% smaller)
8
- export const getModelPrecision = () => 'q8';
5
+ // Model configuration (simplified - always Q8 WASM)
6
+ export { getModelPrecision, shouldAutoDownloadModels, getModelPath } from './modelAutoConfig.js';
9
7
  // Storage configuration
10
8
  export { autoDetectStorage, StorageType, StoragePreset, logStorageConfig } from './storageAutoConfig.js';
11
9
  // Shared configuration for multi-instance
@@ -1,32 +1,25 @@
1
1
  /**
2
- * Model Configuration Auto-Selection
3
- * Always uses Q8 for optimal size/performance balance (99% accuracy, 75% smaller)
2
+ * Model Configuration
3
+ * Brainy uses Q8 WASM embeddings - no configuration needed (zero-config)
4
4
  */
5
- export type ModelPrecision = 'q8';
6
- export type ModelPreset = 'small' | 'auto';
7
5
  interface ModelConfigResult {
8
- precision: ModelPrecision;
6
+ precision: 'q8';
9
7
  reason: string;
10
8
  autoSelected: boolean;
11
9
  }
12
10
  /**
13
- * Auto-select model precision - Always returns Q8
14
- * Q8 provides 99% accuracy with 75% smaller size
15
- * @param override - For backward compatibility, ignored
11
+ * Get model precision configuration
12
+ * Always returns Q8 - the optimal balance of size and accuracy
16
13
  */
17
- export declare function autoSelectModelPrecision(override?: ModelPrecision | ModelPreset): ModelConfigResult;
14
+ export declare function getModelPrecision(): ModelConfigResult;
18
15
  /**
19
- * Convenience function to check if models need to be downloaded
20
- * This replaces the need for BRAINY_ALLOW_REMOTE_MODELS
16
+ * Check if models need to be downloaded
17
+ * With bundled WASM model, this is rarely needed
21
18
  */
22
19
  export declare function shouldAutoDownloadModels(): boolean;
23
20
  /**
24
- * Get the model path with intelligent defaults
25
- * This replaces the need for BRAINY_MODELS_PATH env var
21
+ * Get the model path
22
+ * With bundled WASM model, this points to the package assets
26
23
  */
27
24
  export declare function getModelPath(): string;
28
- /**
29
- * Log model configuration decision (only in verbose mode)
30
- */
31
- export declare function logModelConfig(config: ModelConfigResult, verbose?: boolean): void;
32
25
  export {};
@@ -1,35 +1,16 @@
1
1
  /**
2
- * Model Configuration Auto-Selection
3
- * Always uses Q8 for optimal size/performance balance (99% accuracy, 75% smaller)
2
+ * Model Configuration
3
+ * Brainy uses Q8 WASM embeddings - no configuration needed (zero-config)
4
4
  */
5
5
  import { isBrowser, isNode } from '../utils/environment.js';
6
6
  /**
7
- * Auto-select model precision - Always returns Q8
8
- * Q8 provides 99% accuracy with 75% smaller size
9
- * @param override - For backward compatibility, ignored
7
+ * Get model precision configuration
8
+ * Always returns Q8 - the optimal balance of size and accuracy
10
9
  */
11
- export function autoSelectModelPrecision(override) {
12
- // Always use Q8 regardless of override for simplicity
13
- // Q8 is optimal: 33MB vs 130MB, 99% accuracy retained
14
- // Log deprecation notice if FP32 was requested
15
- if (typeof override === 'string' && override.toLowerCase().includes('fp32')) {
16
- console.log('Note: FP32 precision is deprecated. Using Q8 (99% accuracy, 75% smaller).');
17
- }
18
- return {
19
- precision: 'q8',
20
- reason: 'Q8 precision (99% accuracy, 75% smaller)',
21
- autoSelected: true
22
- };
23
- }
24
- /**
25
- * Automatically detect the best model precision for the environment
26
- * DEPRECATED: Always returns Q8 now
27
- */
28
- function autoDetectBestPrecision() {
29
- // Always return Q8 - deprecated function kept for backward compatibility
10
+ export function getModelPrecision() {
30
11
  return {
31
12
  precision: 'q8',
32
- reason: 'Q8 precision (99% accuracy, 75% smaller)',
13
+ reason: 'Q8 WASM (23MB bundled, no downloads)',
33
14
  autoSelected: true
34
15
  };
35
16
  }
@@ -48,68 +29,25 @@ function isServerlessEnvironment() {
48
29
  );
49
30
  }
50
31
  /**
51
- * Get available memory in MB
52
- */
53
- function getAvailableMemoryMB() {
54
- if (isBrowser()) {
55
- // @ts-ignore - navigator.deviceMemory is experimental
56
- if (navigator.deviceMemory) {
57
- // @ts-ignore
58
- return navigator.deviceMemory * 1024; // Device memory in GB
59
- }
60
- return 256; // Conservative default for browsers
61
- }
62
- if (isNode()) {
63
- try {
64
- // Try to get memory info synchronously for Node.js
65
- // This will be available in Node.js environments
66
- if (typeof process !== 'undefined' && process.memoryUsage) {
67
- // Use RSS (Resident Set Size) as a proxy for available memory
68
- const rss = process.memoryUsage().rss;
69
- // Assume we can use up to 4GB or 50% more than current usage
70
- return Math.min(4096, Math.floor(rss / (1024 * 1024) * 1.5));
71
- }
72
- }
73
- catch {
74
- // Fall through to default
75
- }
76
- return 1024; // Default 1GB for Node.js
77
- }
78
- return 512; // Conservative default
79
- }
80
- /**
81
- * Convenience function to check if models need to be downloaded
82
- * This replaces the need for BRAINY_ALLOW_REMOTE_MODELS
32
+ * Check if models need to be downloaded
33
+ * With bundled WASM model, this is rarely needed
83
34
  */
84
35
  export function shouldAutoDownloadModels() {
85
- // Always allow downloads unless explicitly disabled
86
- // This eliminates the need for BRAINY_ALLOW_REMOTE_MODELS
36
+ // Model is bundled - no downloads needed in normal operation
37
+ // This flag exists for edge cases only
87
38
  const explicitlyDisabled = process.env.BRAINY_ALLOW_REMOTE_MODELS === 'false';
88
- if (explicitlyDisabled) {
89
- console.warn('Model downloads disabled via BRAINY_ALLOW_REMOTE_MODELS=false');
90
- return false;
91
- }
92
- // In production, always allow downloads for seamless operation
93
- if (process.env.NODE_ENV === 'production') {
94
- return true;
95
- }
96
- // In development, allow downloads with a one-time notice
97
- if (process.env.NODE_ENV === 'development') {
98
- return true;
99
- }
100
- // Default: allow downloads
101
- return true;
39
+ return !explicitlyDisabled;
102
40
  }
103
41
  /**
104
- * Get the model path with intelligent defaults
105
- * This replaces the need for BRAINY_MODELS_PATH env var
42
+ * Get the model path
43
+ * With bundled WASM model, this points to the package assets
106
44
  */
107
45
  export function getModelPath() {
108
- // Check if user explicitly set a path (keeping this for advanced users)
46
+ // Check if user explicitly set a path (for advanced users)
109
47
  if (process.env.BRAINY_MODELS_PATH) {
110
48
  return process.env.BRAINY_MODELS_PATH;
111
49
  }
112
- // Browser - use cache API or IndexedDB (handled by transformers.js)
50
+ // Browser - use cache API or IndexedDB
113
51
  if (isBrowser()) {
114
52
  return 'browser-cache';
115
53
  }
@@ -119,21 +57,10 @@ export function getModelPath() {
119
57
  }
120
58
  // Node.js - use home directory for persistent storage
121
59
  if (isNode()) {
122
- // Use process.env.HOME as a fallback
123
60
  const homeDir = process.env.HOME || process.env.USERPROFILE || '~';
124
61
  return `${homeDir}/.brainy/models`;
125
62
  }
126
63
  // Fallback
127
64
  return './.brainy/models';
128
65
  }
129
- /**
130
- * Log model configuration decision (only in verbose mode)
131
- */
132
- export function logModelConfig(config, verbose = false) {
133
- if (!verbose && process.env.NODE_ENV === 'production') {
134
- return; // Silent in production unless verbose
135
- }
136
- const icon = config.autoSelected ? '🤖' : '👤';
137
- console.log(`${icon} Model: ${config.precision.toUpperCase()} - ${config.reason}`);
138
- }
139
66
  //# sourceMappingURL=modelAutoConfig.js.map
@@ -2,10 +2,9 @@
2
2
  * Shared Configuration Manager
3
3
  * Ensures configuration consistency across multiple instances using shared storage
4
4
  */
5
- import { ModelPrecision } from './modelAutoConfig.js';
6
5
  export interface SharedConfig {
7
6
  version: string;
8
- precision: ModelPrecision;
7
+ precision: 'q8';
9
8
  dimensions: number;
10
9
  hnswM: number;
11
10
  hnswEfConstruction: number;
@@ -2,7 +2,6 @@
2
2
  * Zero-Configuration System for Brainy
3
3
  * Provides intelligent defaults while preserving full control
4
4
  */
5
- import { ModelPrecision, ModelPreset } from './modelAutoConfig.js';
6
5
  import { StorageType, StoragePreset } from './storageAutoConfig.js';
7
6
  /**
8
7
  * Simplified configuration interface
@@ -19,15 +18,6 @@ export interface BrainyZeroConfig {
19
18
  * - 'reader': Read-only instance for distributed setups (no write operations)
20
19
  */
21
20
  mode?: 'production' | 'development' | 'minimal' | 'zero' | 'writer' | 'reader';
22
- /**
23
- * Model precision configuration
24
- * - 'fp32': Full precision (best quality, larger size)
25
- * - 'q8': Quantized 8-bit (smaller size, slightly lower quality)
26
- * - 'fast': Alias for fp32
27
- * - 'small': Alias for q8
28
- * - 'auto': Auto-detect based on environment (default)
29
- */
30
- model?: ModelPrecision | ModelPreset;
31
21
  /**
32
22
  * Storage configuration
33
23
  * - 'memory': In-memory only (no persistence)
@@ -62,7 +52,6 @@ export interface BrainyZeroConfig {
62
52
  */
63
53
  export declare function processZeroConfig(input?: string | BrainyZeroConfig): Promise<any>;
64
54
  /**
65
- * Create embedding function with specified precision
66
- * This ensures the model precision is respected
55
+ * Create embedding function (always Q8 WASM)
67
56
  */
68
- export declare function createEmbeddingFunctionWithPrecision(precision: ModelPrecision): Promise<any>;
57
+ export declare function createEmbeddingFunctionWithPrecision(): Promise<any>;
@@ -2,7 +2,7 @@
2
2
  * Zero-Configuration System for Brainy
3
3
  * Provides intelligent defaults while preserving full control
4
4
  */
5
- import { autoSelectModelPrecision, getModelPath, shouldAutoDownloadModels } from './modelAutoConfig.js';
5
+ import { getModelPrecision, getModelPath, shouldAutoDownloadModels } from './modelAutoConfig.js';
6
6
  import { autoDetectStorage } from './storageAutoConfig.js';
7
7
  import { AutoConfiguration } from '../utils/autoConfiguration.js';
8
8
  /**
@@ -11,31 +11,26 @@ import { AutoConfiguration } from '../utils/autoConfiguration.js';
11
11
  const PRESETS = {
12
12
  production: {
13
13
  storage: 'disk',
14
- model: 'auto',
15
14
  features: 'default',
16
15
  verbose: false
17
16
  },
18
17
  development: {
19
18
  storage: 'memory',
20
- model: 'q8', // Q8 is now the default for all presets
21
19
  features: 'full',
22
20
  verbose: true
23
21
  },
24
22
  minimal: {
25
23
  storage: 'memory',
26
- model: 'q8',
27
24
  features: 'minimal',
28
25
  verbose: false
29
26
  },
30
27
  zero: {
31
28
  storage: 'auto',
32
- model: 'auto',
33
29
  features: 'default',
34
30
  verbose: false
35
31
  },
36
32
  writer: {
37
33
  storage: 'auto',
38
- model: 'auto',
39
34
  features: 'minimal',
40
35
  verbose: false,
41
36
  // Writer-specific settings
@@ -46,7 +41,6 @@ const PRESETS = {
46
41
  },
47
42
  reader: {
48
43
  storage: 'auto',
49
- model: 'auto',
50
44
  features: 'default',
51
45
  verbose: false,
52
46
  // Reader-specific settings
@@ -117,7 +111,6 @@ export async function processZeroConfig(input) {
117
111
  ...preset,
118
112
  ...config,
119
113
  // Preserve explicit overrides
120
- model: config.model ?? preset.model,
121
114
  storage: config.storage ?? preset.storage,
122
115
  features: config.features ?? preset.features,
123
116
  verbose: config.verbose ?? preset.verbose
@@ -125,8 +118,8 @@ export async function processZeroConfig(input) {
125
118
  }
126
119
  // Auto-detect environment if not in preset mode
127
120
  const environment = detectEnvironmentMode();
128
- // Process model configuration
129
- const modelConfig = autoSelectModelPrecision(config.model);
121
+ // Get model configuration (always Q8 WASM)
122
+ const modelConfig = getModelPrecision();
130
123
  // Process storage configuration
131
124
  const storageConfig = await autoDetectStorage(config.storage);
132
125
  // Process features configuration
@@ -287,14 +280,13 @@ function logConfigurationSummary(config) {
287
280
  console.log('================================\n');
288
281
  }
289
282
  /**
290
- * Create embedding function with specified precision
291
- * This ensures the model precision is respected
283
+ * Create embedding function (always Q8 WASM)
292
284
  */
293
- export async function createEmbeddingFunctionWithPrecision(precision) {
285
+ export async function createEmbeddingFunctionWithPrecision() {
294
286
  const { createEmbeddingFunction } = await import('../utils/embedding.js');
295
- // Create embedding function with specified precision
287
+ // Create embedding function - always Q8 WASM
296
288
  return createEmbeddingFunction({
297
- precision: precision,
289
+ precision: 'q8',
298
290
  verbose: false // Silent by default in zero-config
299
291
  });
300
292
  }
@@ -518,11 +518,6 @@ export interface BrainyConfig {
518
518
  options?: any;
519
519
  branch?: string;
520
520
  };
521
- model?: {
522
- type: 'fast' | 'accurate' | 'balanced' | 'custom';
523
- name?: string;
524
- precision?: 'q8';
525
- };
526
521
  index?: {
527
522
  m?: number;
528
523
  efConstruction?: number;
@@ -424,6 +424,28 @@ export declare class MetadataIndexManager {
424
424
  * Gracefully handles missing registry (first run or corrupted data).
425
425
  */
426
426
  private loadFieldRegistry;
427
+ /**
428
+ * Get list of persisted fields from storage (not in-memory)
429
+ * v6.7.0: Used during rebuild to discover which chunk files need deletion
430
+ *
431
+ * @returns Array of field names that have persisted sparse indices
432
+ */
433
+ private getPersistedFieldList;
434
+ /**
435
+ * Delete all chunk files for a specific field
436
+ * v6.7.0: Used during rebuild to ensure clean slate
437
+ *
438
+ * @param field Field name whose chunks should be deleted
439
+ */
440
+ private deleteFieldChunks;
441
+ /**
442
+ * Clear ALL metadata index data from storage (for recovery)
443
+ * v6.7.0: Nuclear option for recovering from corrupted index state
444
+ *
445
+ * WARNING: This deletes all indexed data - requires full rebuild after!
446
+ * Use when index is corrupted beyond normal rebuild repair.
447
+ */
448
+ clearAllIndexData(): Promise<void>;
427
449
  /**
428
450
  * Get count of entities by type - O(1) operation using existing tracking
429
451
  * This exposes the production-ready counting that's already maintained
@@ -957,6 +957,11 @@ export class MetadataIndexManager {
957
957
  */
958
958
  async addToIndex(id, entityOrMetadata, skipFlush = false) {
959
959
  const fields = this.extractIndexableFields(entityOrMetadata);
960
+ // v6.7.0: Sanity check for excessive indexed fields (indicates possible data issue)
961
+ if (fields.length > 100) {
962
+ prodLog.warn(`Entity ${id} has ${fields.length} indexed fields (expected ~30). ` +
963
+ `Possible deeply nested metadata or data issue. First 10 fields: ${fields.slice(0, 10).map(f => f.field).join(', ')}`);
964
+ }
960
965
  // Sort fields to process 'noun' field first for type-field affinity tracking
961
966
  fields.sort((a, b) => {
962
967
  if (a.field === 'noun')
@@ -1875,6 +1880,91 @@ export class MetadataIndexManager {
1875
1880
  prodLog.debug('Could not load field registry:', error);
1876
1881
  }
1877
1882
  }
1883
+ /**
1884
+ * Get list of persisted fields from storage (not in-memory)
1885
+ * v6.7.0: Used during rebuild to discover which chunk files need deletion
1886
+ *
1887
+ * @returns Array of field names that have persisted sparse indices
1888
+ */
1889
+ async getPersistedFieldList() {
1890
+ try {
1891
+ const registry = await this.storage.getMetadata('__metadata_field_registry__');
1892
+ if (!registry?.fields || !Array.isArray(registry.fields)) {
1893
+ return [];
1894
+ }
1895
+ return registry.fields.filter((f) => typeof f === 'string' && f.length > 0);
1896
+ }
1897
+ catch (error) {
1898
+ prodLog.debug('Could not load persisted field list:', error);
1899
+ return [];
1900
+ }
1901
+ }
1902
+ /**
1903
+ * Delete all chunk files for a specific field
1904
+ * v6.7.0: Used during rebuild to ensure clean slate
1905
+ *
1906
+ * @param field Field name whose chunks should be deleted
1907
+ */
1908
+ async deleteFieldChunks(field) {
1909
+ try {
1910
+ // Load sparse index to get chunk IDs
1911
+ const indexPath = `__sparse_index__${field}`;
1912
+ const sparseData = await this.storage.getMetadata(indexPath);
1913
+ if (sparseData) {
1914
+ const sparseIndex = SparseIndex.fromJSON(sparseData);
1915
+ // Delete all chunk files for this field
1916
+ for (const chunkId of sparseIndex.getAllChunkIds()) {
1917
+ await this.chunkManager.deleteChunk(field, chunkId);
1918
+ }
1919
+ // Delete the sparse index file itself
1920
+ await this.storage.saveMetadata(indexPath, null);
1921
+ }
1922
+ }
1923
+ catch (error) {
1924
+ // Silent failure - if we can't delete old chunks, rebuild will still work
1925
+ // (new chunks will be created, old ones become orphaned)
1926
+ prodLog.debug(`Could not clear chunks for field '${field}':`, error);
1927
+ }
1928
+ }
1929
+ /**
1930
+ * Clear ALL metadata index data from storage (for recovery)
1931
+ * v6.7.0: Nuclear option for recovering from corrupted index state
1932
+ *
1933
+ * WARNING: This deletes all indexed data - requires full rebuild after!
1934
+ * Use when index is corrupted beyond normal rebuild repair.
1935
+ */
1936
+ async clearAllIndexData() {
1937
+ prodLog.warn('🗑️ Clearing ALL metadata index data from storage...');
1938
+ // Get all persisted fields
1939
+ const fields = await this.getPersistedFieldList();
1940
+ // Delete chunks and sparse indices for each field
1941
+ let deletedCount = 0;
1942
+ for (const field of fields) {
1943
+ await this.deleteFieldChunks(field);
1944
+ deletedCount++;
1945
+ }
1946
+ // Delete field registry
1947
+ try {
1948
+ await this.storage.saveMetadata('__metadata_field_registry__', null);
1949
+ }
1950
+ catch (error) {
1951
+ prodLog.debug('Could not delete field registry:', error);
1952
+ }
1953
+ // Clear in-memory state
1954
+ this.fieldIndexes.clear();
1955
+ this.dirtyFields.clear();
1956
+ this.unifiedCache.clear('metadata');
1957
+ this.totalEntitiesByType.clear();
1958
+ this.entityCountsByTypeFixed.fill(0);
1959
+ this.verbCountsByTypeFixed.fill(0);
1960
+ this.typeFieldAffinity.clear();
1961
+ // Clear EntityIdMapper
1962
+ await this.idMapper.clear();
1963
+ // Clear chunk manager cache
1964
+ this.chunkManager.clearCache();
1965
+ prodLog.info(`✅ Cleared ${deletedCount} field indexes and all in-memory state`);
1966
+ prodLog.info('⚠️ Run brain.index.rebuild() to recreate the index from entity data');
1967
+ }
1878
1968
  /**
1879
1969
  * Get count of entities by type - O(1) operation using existing tracking
1880
1970
  * This exposes the production-ready counting that's already maintained
@@ -2080,6 +2170,15 @@ export class MetadataIndexManager {
2080
2170
  }
2081
2171
  }
2082
2172
  }
2173
+ // v6.7.0: Sanity check for index corruption (77x overcounting bug detection)
2174
+ const entityCount = this.idMapper.size;
2175
+ if (entityCount > 0) {
2176
+ const avgIdsPerEntity = totalIds / entityCount;
2177
+ if (avgIdsPerEntity > 100) {
2178
+ prodLog.warn(`⚠️ Metadata index may be corrupted: ${avgIdsPerEntity.toFixed(1)} avg entries/entity (expected ~30). ` +
2179
+ `Try running brain.index.clearAllIndexData() followed by brain.index.rebuild() to fix.`);
2180
+ }
2181
+ }
2083
2182
  return {
2084
2183
  totalEntries,
2085
2184
  totalIds,
@@ -2114,6 +2213,28 @@ export class MetadataIndexManager {
2114
2213
  // Clear all cached sparse indices in UnifiedCache
2115
2214
  // This ensures rebuild starts fresh (v3.44.1)
2116
2215
  this.unifiedCache.clear('metadata');
2216
+ // v6.7.0: CRITICAL FIX - Delete existing chunk files from storage
2217
+ // Without this, old chunk data accumulates with each rebuild causing 77x overcounting!
2218
+ // Previous fix (v6.2.4) cleared type counts but missed chunk file accumulation.
2219
+ prodLog.info('🗑️ Clearing existing metadata index chunks from storage...');
2220
+ const existingFields = await this.getPersistedFieldList();
2221
+ if (existingFields.length > 0) {
2222
+ for (const field of existingFields) {
2223
+ await this.deleteFieldChunks(field);
2224
+ }
2225
+ // Delete field registry (will be recreated on flush)
2226
+ try {
2227
+ await this.storage.saveMetadata('__metadata_field_registry__', null);
2228
+ }
2229
+ catch (error) {
2230
+ prodLog.debug('Could not delete field registry:', error);
2231
+ }
2232
+ prodLog.info(`✅ Cleared ${existingFields.length} field indexes from storage`);
2233
+ }
2234
+ // Clear EntityIdMapper to start fresh (v6.7.0)
2235
+ await this.idMapper.clear();
2236
+ // Clear chunk manager cache
2237
+ this.chunkManager.clearCache();
2117
2238
  // Adaptive rebuild strategy based on storage adapter (v4.2.3)
2118
2239
  // FileSystem/Memory/OPFS: Load all at once (avoids getAllShardedFiles() overhead on every batch)
2119
2240
  // Cloud (GCS/S3/R2): Use pagination with small batches (prevent socket exhaustion)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@soulcraft/brainy",
3
- "version": "6.6.0",
3
+ "version": "6.6.2",
4
4
  "description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. Stage 3 CANONICAL: 42 nouns × 127 verbs covering 96-97% of all human knowledge.",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",