@soulcraft/brainy 6.6.0 → 6.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/brainy.js +5 -8
- package/dist/config/index.d.ts +1 -3
- package/dist/config/index.js +2 -4
- package/dist/config/modelAutoConfig.d.ts +10 -17
- package/dist/config/modelAutoConfig.js +15 -88
- package/dist/config/sharedConfigManager.d.ts +1 -2
- package/dist/config/zeroConfig.d.ts +2 -13
- package/dist/config/zeroConfig.js +7 -15
- package/dist/types/brainy.types.d.ts +0 -5
- package/dist/utils/metadataIndex.d.ts +22 -0
- package/dist/utils/metadataIndex.js +121 -0
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
|
|
4
4
|
|
|
5
|
+
### [6.6.2](https://github.com/soulcraftlabs/brainy/compare/v6.6.1...v6.6.2) (2026-01-05)
|
|
6
|
+
|
|
7
|
+
- fix: resolve update() v5.11.1 regression + skip flaky tests for release (106f654)
|
|
8
|
+
- fix(metadata-index): delete chunk files during rebuild to prevent 77x overcounting (386666d)
|
|
9
|
+
|
|
10
|
+
|
|
5
11
|
## [6.4.0](https://github.com/soulcraftlabs/brainy/compare/v6.3.2...v6.4.0) (2025-12-11)
|
|
6
12
|
|
|
7
13
|
### ⚡ Performance
|
package/dist/brainy.js
CHANGED
|
@@ -80,7 +80,6 @@ export class Brainy {
|
|
|
80
80
|
...this.config,
|
|
81
81
|
...configOverrides,
|
|
82
82
|
storage: { ...this.config.storage, ...configOverrides.storage },
|
|
83
|
-
model: { ...this.config.model, ...configOverrides.model },
|
|
84
83
|
index: { ...this.config.index, ...configOverrides.index },
|
|
85
84
|
augmentations: { ...this.config.augmentations, ...configOverrides.augmentations },
|
|
86
85
|
verbose: configOverrides.verbose ?? this.config.verbose,
|
|
@@ -694,8 +693,11 @@ export class Brainy {
|
|
|
694
693
|
const { validateUpdateParams } = await import('./utils/paramValidation.js');
|
|
695
694
|
validateUpdateParams(params);
|
|
696
695
|
return this.augmentationRegistry.execute('update', params, async () => {
|
|
697
|
-
// Get existing entity
|
|
698
|
-
|
|
696
|
+
// Get existing entity with vectors (v6.7.0: fix for v5.11.1 regression)
|
|
697
|
+
// We need includeVectors: true because:
|
|
698
|
+
// 1. SaveNounOperation requires the vector
|
|
699
|
+
// 2. HNSW reindexing operations need the original vector
|
|
700
|
+
const existing = await this.get(params.id, { includeVectors: true });
|
|
699
701
|
if (!existing) {
|
|
700
702
|
throw new Error(`Entity ${params.id} not found`);
|
|
701
703
|
}
|
|
@@ -4146,10 +4148,6 @@ export class Brainy {
|
|
|
4146
4148
|
// No longer throw errors for mismatches - storageFactory now handles this intelligently
|
|
4147
4149
|
// Both 'gcs' and 'gcs-native' can now use either gcsStorage or gcsNativeStorage
|
|
4148
4150
|
}
|
|
4149
|
-
// Validate model configuration
|
|
4150
|
-
if (config?.model?.type && !['fast', 'accurate', 'custom'].includes(config.model.type)) {
|
|
4151
|
-
throw new Error(`Invalid model type: ${config.model.type}. Must be one of: fast, accurate, custom`);
|
|
4152
|
-
}
|
|
4153
4151
|
// Validate numeric configurations
|
|
4154
4152
|
if (config?.index?.m && (config.index.m < 1 || config.index.m > 128)) {
|
|
4155
4153
|
throw new Error(`Invalid index m parameter: ${config.index.m}. Must be between 1 and 128`);
|
|
@@ -4164,7 +4162,6 @@ export class Brainy {
|
|
|
4164
4162
|
const distributedConfig = this.autoDetectDistributed(config?.distributed);
|
|
4165
4163
|
return {
|
|
4166
4164
|
storage: config?.storage || { type: 'auto' },
|
|
4167
|
-
model: config?.model || { type: 'fast' },
|
|
4168
4165
|
index: config?.index || {},
|
|
4169
4166
|
cache: config?.cache ?? true,
|
|
4170
4167
|
augmentations: config?.augmentations || {},
|
package/dist/config/index.d.ts
CHANGED
|
@@ -2,9 +2,7 @@
|
|
|
2
2
|
* Zero-Configuration System
|
|
3
3
|
* Main entry point for all auto-configuration features
|
|
4
4
|
*/
|
|
5
|
-
export {
|
|
6
|
-
ModelPreset, shouldAutoDownloadModels, getModelPath, logModelConfig } from './modelAutoConfig.js';
|
|
7
|
-
export declare const getModelPrecision: () => "q8";
|
|
5
|
+
export { getModelPrecision, shouldAutoDownloadModels, getModelPath } from './modelAutoConfig.js';
|
|
8
6
|
export { autoDetectStorage, StorageType, StoragePreset, StorageConfigResult, logStorageConfig, type StorageTypeString, type StoragePresetString } from './storageAutoConfig.js';
|
|
9
7
|
export { SharedConfig, SharedConfigManager } from './sharedConfigManager.js';
|
|
10
8
|
export { BrainyZeroConfig, processZeroConfig, createEmbeddingFunctionWithPrecision } from './zeroConfig.js';
|
package/dist/config/index.js
CHANGED
|
@@ -2,10 +2,8 @@
|
|
|
2
2
|
* Zero-Configuration System
|
|
3
3
|
* Main entry point for all auto-configuration features
|
|
4
4
|
*/
|
|
5
|
-
// Model configuration
|
|
6
|
-
export {
|
|
7
|
-
// Model precision - Always Q8 now (99% accuracy, 75% smaller)
|
|
8
|
-
export const getModelPrecision = () => 'q8';
|
|
5
|
+
// Model configuration (simplified - always Q8 WASM)
|
|
6
|
+
export { getModelPrecision, shouldAutoDownloadModels, getModelPath } from './modelAutoConfig.js';
|
|
9
7
|
// Storage configuration
|
|
10
8
|
export { autoDetectStorage, StorageType, StoragePreset, logStorageConfig } from './storageAutoConfig.js';
|
|
11
9
|
// Shared configuration for multi-instance
|
|
@@ -1,32 +1,25 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Model Configuration
|
|
3
|
-
*
|
|
2
|
+
* Model Configuration
|
|
3
|
+
* Brainy uses Q8 WASM embeddings - no configuration needed (zero-config)
|
|
4
4
|
*/
|
|
5
|
-
export type ModelPrecision = 'q8';
|
|
6
|
-
export type ModelPreset = 'small' | 'auto';
|
|
7
5
|
interface ModelConfigResult {
|
|
8
|
-
precision:
|
|
6
|
+
precision: 'q8';
|
|
9
7
|
reason: string;
|
|
10
8
|
autoSelected: boolean;
|
|
11
9
|
}
|
|
12
10
|
/**
|
|
13
|
-
*
|
|
14
|
-
* Q8
|
|
15
|
-
* @param override - For backward compatibility, ignored
|
|
11
|
+
* Get model precision configuration
|
|
12
|
+
* Always returns Q8 - the optimal balance of size and accuracy
|
|
16
13
|
*/
|
|
17
|
-
export declare function
|
|
14
|
+
export declare function getModelPrecision(): ModelConfigResult;
|
|
18
15
|
/**
|
|
19
|
-
*
|
|
20
|
-
*
|
|
16
|
+
* Check if models need to be downloaded
|
|
17
|
+
* With bundled WASM model, this is rarely needed
|
|
21
18
|
*/
|
|
22
19
|
export declare function shouldAutoDownloadModels(): boolean;
|
|
23
20
|
/**
|
|
24
|
-
* Get the model path
|
|
25
|
-
*
|
|
21
|
+
* Get the model path
|
|
22
|
+
* With bundled WASM model, this points to the package assets
|
|
26
23
|
*/
|
|
27
24
|
export declare function getModelPath(): string;
|
|
28
|
-
/**
|
|
29
|
-
* Log model configuration decision (only in verbose mode)
|
|
30
|
-
*/
|
|
31
|
-
export declare function logModelConfig(config: ModelConfigResult, verbose?: boolean): void;
|
|
32
25
|
export {};
|
|
@@ -1,35 +1,16 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Model Configuration
|
|
3
|
-
*
|
|
2
|
+
* Model Configuration
|
|
3
|
+
* Brainy uses Q8 WASM embeddings - no configuration needed (zero-config)
|
|
4
4
|
*/
|
|
5
5
|
import { isBrowser, isNode } from '../utils/environment.js';
|
|
6
6
|
/**
|
|
7
|
-
*
|
|
8
|
-
* Q8
|
|
9
|
-
* @param override - For backward compatibility, ignored
|
|
7
|
+
* Get model precision configuration
|
|
8
|
+
* Always returns Q8 - the optimal balance of size and accuracy
|
|
10
9
|
*/
|
|
11
|
-
export function
|
|
12
|
-
// Always use Q8 regardless of override for simplicity
|
|
13
|
-
// Q8 is optimal: 33MB vs 130MB, 99% accuracy retained
|
|
14
|
-
// Log deprecation notice if FP32 was requested
|
|
15
|
-
if (typeof override === 'string' && override.toLowerCase().includes('fp32')) {
|
|
16
|
-
console.log('Note: FP32 precision is deprecated. Using Q8 (99% accuracy, 75% smaller).');
|
|
17
|
-
}
|
|
18
|
-
return {
|
|
19
|
-
precision: 'q8',
|
|
20
|
-
reason: 'Q8 precision (99% accuracy, 75% smaller)',
|
|
21
|
-
autoSelected: true
|
|
22
|
-
};
|
|
23
|
-
}
|
|
24
|
-
/**
|
|
25
|
-
* Automatically detect the best model precision for the environment
|
|
26
|
-
* DEPRECATED: Always returns Q8 now
|
|
27
|
-
*/
|
|
28
|
-
function autoDetectBestPrecision() {
|
|
29
|
-
// Always return Q8 - deprecated function kept for backward compatibility
|
|
10
|
+
export function getModelPrecision() {
|
|
30
11
|
return {
|
|
31
12
|
precision: 'q8',
|
|
32
|
-
reason: 'Q8
|
|
13
|
+
reason: 'Q8 WASM (23MB bundled, no downloads)',
|
|
33
14
|
autoSelected: true
|
|
34
15
|
};
|
|
35
16
|
}
|
|
@@ -48,68 +29,25 @@ function isServerlessEnvironment() {
|
|
|
48
29
|
);
|
|
49
30
|
}
|
|
50
31
|
/**
|
|
51
|
-
*
|
|
52
|
-
|
|
53
|
-
function getAvailableMemoryMB() {
|
|
54
|
-
if (isBrowser()) {
|
|
55
|
-
// @ts-ignore - navigator.deviceMemory is experimental
|
|
56
|
-
if (navigator.deviceMemory) {
|
|
57
|
-
// @ts-ignore
|
|
58
|
-
return navigator.deviceMemory * 1024; // Device memory in GB
|
|
59
|
-
}
|
|
60
|
-
return 256; // Conservative default for browsers
|
|
61
|
-
}
|
|
62
|
-
if (isNode()) {
|
|
63
|
-
try {
|
|
64
|
-
// Try to get memory info synchronously for Node.js
|
|
65
|
-
// This will be available in Node.js environments
|
|
66
|
-
if (typeof process !== 'undefined' && process.memoryUsage) {
|
|
67
|
-
// Use RSS (Resident Set Size) as a proxy for available memory
|
|
68
|
-
const rss = process.memoryUsage().rss;
|
|
69
|
-
// Assume we can use up to 4GB or 50% more than current usage
|
|
70
|
-
return Math.min(4096, Math.floor(rss / (1024 * 1024) * 1.5));
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
catch {
|
|
74
|
-
// Fall through to default
|
|
75
|
-
}
|
|
76
|
-
return 1024; // Default 1GB for Node.js
|
|
77
|
-
}
|
|
78
|
-
return 512; // Conservative default
|
|
79
|
-
}
|
|
80
|
-
/**
|
|
81
|
-
* Convenience function to check if models need to be downloaded
|
|
82
|
-
* This replaces the need for BRAINY_ALLOW_REMOTE_MODELS
|
|
32
|
+
* Check if models need to be downloaded
|
|
33
|
+
* With bundled WASM model, this is rarely needed
|
|
83
34
|
*/
|
|
84
35
|
export function shouldAutoDownloadModels() {
|
|
85
|
-
//
|
|
86
|
-
// This
|
|
36
|
+
// Model is bundled - no downloads needed in normal operation
|
|
37
|
+
// This flag exists for edge cases only
|
|
87
38
|
const explicitlyDisabled = process.env.BRAINY_ALLOW_REMOTE_MODELS === 'false';
|
|
88
|
-
|
|
89
|
-
console.warn('Model downloads disabled via BRAINY_ALLOW_REMOTE_MODELS=false');
|
|
90
|
-
return false;
|
|
91
|
-
}
|
|
92
|
-
// In production, always allow downloads for seamless operation
|
|
93
|
-
if (process.env.NODE_ENV === 'production') {
|
|
94
|
-
return true;
|
|
95
|
-
}
|
|
96
|
-
// In development, allow downloads with a one-time notice
|
|
97
|
-
if (process.env.NODE_ENV === 'development') {
|
|
98
|
-
return true;
|
|
99
|
-
}
|
|
100
|
-
// Default: allow downloads
|
|
101
|
-
return true;
|
|
39
|
+
return !explicitlyDisabled;
|
|
102
40
|
}
|
|
103
41
|
/**
|
|
104
|
-
* Get the model path
|
|
105
|
-
*
|
|
42
|
+
* Get the model path
|
|
43
|
+
* With bundled WASM model, this points to the package assets
|
|
106
44
|
*/
|
|
107
45
|
export function getModelPath() {
|
|
108
|
-
// Check if user explicitly set a path (
|
|
46
|
+
// Check if user explicitly set a path (for advanced users)
|
|
109
47
|
if (process.env.BRAINY_MODELS_PATH) {
|
|
110
48
|
return process.env.BRAINY_MODELS_PATH;
|
|
111
49
|
}
|
|
112
|
-
// Browser - use cache API or IndexedDB
|
|
50
|
+
// Browser - use cache API or IndexedDB
|
|
113
51
|
if (isBrowser()) {
|
|
114
52
|
return 'browser-cache';
|
|
115
53
|
}
|
|
@@ -119,21 +57,10 @@ export function getModelPath() {
|
|
|
119
57
|
}
|
|
120
58
|
// Node.js - use home directory for persistent storage
|
|
121
59
|
if (isNode()) {
|
|
122
|
-
// Use process.env.HOME as a fallback
|
|
123
60
|
const homeDir = process.env.HOME || process.env.USERPROFILE || '~';
|
|
124
61
|
return `${homeDir}/.brainy/models`;
|
|
125
62
|
}
|
|
126
63
|
// Fallback
|
|
127
64
|
return './.brainy/models';
|
|
128
65
|
}
|
|
129
|
-
/**
|
|
130
|
-
* Log model configuration decision (only in verbose mode)
|
|
131
|
-
*/
|
|
132
|
-
export function logModelConfig(config, verbose = false) {
|
|
133
|
-
if (!verbose && process.env.NODE_ENV === 'production') {
|
|
134
|
-
return; // Silent in production unless verbose
|
|
135
|
-
}
|
|
136
|
-
const icon = config.autoSelected ? '🤖' : '👤';
|
|
137
|
-
console.log(`${icon} Model: ${config.precision.toUpperCase()} - ${config.reason}`);
|
|
138
|
-
}
|
|
139
66
|
//# sourceMappingURL=modelAutoConfig.js.map
|
|
@@ -2,10 +2,9 @@
|
|
|
2
2
|
* Shared Configuration Manager
|
|
3
3
|
* Ensures configuration consistency across multiple instances using shared storage
|
|
4
4
|
*/
|
|
5
|
-
import { ModelPrecision } from './modelAutoConfig.js';
|
|
6
5
|
export interface SharedConfig {
|
|
7
6
|
version: string;
|
|
8
|
-
precision:
|
|
7
|
+
precision: 'q8';
|
|
9
8
|
dimensions: number;
|
|
10
9
|
hnswM: number;
|
|
11
10
|
hnswEfConstruction: number;
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
* Zero-Configuration System for Brainy
|
|
3
3
|
* Provides intelligent defaults while preserving full control
|
|
4
4
|
*/
|
|
5
|
-
import { ModelPrecision, ModelPreset } from './modelAutoConfig.js';
|
|
6
5
|
import { StorageType, StoragePreset } from './storageAutoConfig.js';
|
|
7
6
|
/**
|
|
8
7
|
* Simplified configuration interface
|
|
@@ -19,15 +18,6 @@ export interface BrainyZeroConfig {
|
|
|
19
18
|
* - 'reader': Read-only instance for distributed setups (no write operations)
|
|
20
19
|
*/
|
|
21
20
|
mode?: 'production' | 'development' | 'minimal' | 'zero' | 'writer' | 'reader';
|
|
22
|
-
/**
|
|
23
|
-
* Model precision configuration
|
|
24
|
-
* - 'fp32': Full precision (best quality, larger size)
|
|
25
|
-
* - 'q8': Quantized 8-bit (smaller size, slightly lower quality)
|
|
26
|
-
* - 'fast': Alias for fp32
|
|
27
|
-
* - 'small': Alias for q8
|
|
28
|
-
* - 'auto': Auto-detect based on environment (default)
|
|
29
|
-
*/
|
|
30
|
-
model?: ModelPrecision | ModelPreset;
|
|
31
21
|
/**
|
|
32
22
|
* Storage configuration
|
|
33
23
|
* - 'memory': In-memory only (no persistence)
|
|
@@ -62,7 +52,6 @@ export interface BrainyZeroConfig {
|
|
|
62
52
|
*/
|
|
63
53
|
export declare function processZeroConfig(input?: string | BrainyZeroConfig): Promise<any>;
|
|
64
54
|
/**
|
|
65
|
-
* Create embedding function
|
|
66
|
-
* This ensures the model precision is respected
|
|
55
|
+
* Create embedding function (always Q8 WASM)
|
|
67
56
|
*/
|
|
68
|
-
export declare function createEmbeddingFunctionWithPrecision(
|
|
57
|
+
export declare function createEmbeddingFunctionWithPrecision(): Promise<any>;
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Zero-Configuration System for Brainy
|
|
3
3
|
* Provides intelligent defaults while preserving full control
|
|
4
4
|
*/
|
|
5
|
-
import {
|
|
5
|
+
import { getModelPrecision, getModelPath, shouldAutoDownloadModels } from './modelAutoConfig.js';
|
|
6
6
|
import { autoDetectStorage } from './storageAutoConfig.js';
|
|
7
7
|
import { AutoConfiguration } from '../utils/autoConfiguration.js';
|
|
8
8
|
/**
|
|
@@ -11,31 +11,26 @@ import { AutoConfiguration } from '../utils/autoConfiguration.js';
|
|
|
11
11
|
const PRESETS = {
|
|
12
12
|
production: {
|
|
13
13
|
storage: 'disk',
|
|
14
|
-
model: 'auto',
|
|
15
14
|
features: 'default',
|
|
16
15
|
verbose: false
|
|
17
16
|
},
|
|
18
17
|
development: {
|
|
19
18
|
storage: 'memory',
|
|
20
|
-
model: 'q8', // Q8 is now the default for all presets
|
|
21
19
|
features: 'full',
|
|
22
20
|
verbose: true
|
|
23
21
|
},
|
|
24
22
|
minimal: {
|
|
25
23
|
storage: 'memory',
|
|
26
|
-
model: 'q8',
|
|
27
24
|
features: 'minimal',
|
|
28
25
|
verbose: false
|
|
29
26
|
},
|
|
30
27
|
zero: {
|
|
31
28
|
storage: 'auto',
|
|
32
|
-
model: 'auto',
|
|
33
29
|
features: 'default',
|
|
34
30
|
verbose: false
|
|
35
31
|
},
|
|
36
32
|
writer: {
|
|
37
33
|
storage: 'auto',
|
|
38
|
-
model: 'auto',
|
|
39
34
|
features: 'minimal',
|
|
40
35
|
verbose: false,
|
|
41
36
|
// Writer-specific settings
|
|
@@ -46,7 +41,6 @@ const PRESETS = {
|
|
|
46
41
|
},
|
|
47
42
|
reader: {
|
|
48
43
|
storage: 'auto',
|
|
49
|
-
model: 'auto',
|
|
50
44
|
features: 'default',
|
|
51
45
|
verbose: false,
|
|
52
46
|
// Reader-specific settings
|
|
@@ -117,7 +111,6 @@ export async function processZeroConfig(input) {
|
|
|
117
111
|
...preset,
|
|
118
112
|
...config,
|
|
119
113
|
// Preserve explicit overrides
|
|
120
|
-
model: config.model ?? preset.model,
|
|
121
114
|
storage: config.storage ?? preset.storage,
|
|
122
115
|
features: config.features ?? preset.features,
|
|
123
116
|
verbose: config.verbose ?? preset.verbose
|
|
@@ -125,8 +118,8 @@ export async function processZeroConfig(input) {
|
|
|
125
118
|
}
|
|
126
119
|
// Auto-detect environment if not in preset mode
|
|
127
120
|
const environment = detectEnvironmentMode();
|
|
128
|
-
//
|
|
129
|
-
const modelConfig =
|
|
121
|
+
// Get model configuration (always Q8 WASM)
|
|
122
|
+
const modelConfig = getModelPrecision();
|
|
130
123
|
// Process storage configuration
|
|
131
124
|
const storageConfig = await autoDetectStorage(config.storage);
|
|
132
125
|
// Process features configuration
|
|
@@ -287,14 +280,13 @@ function logConfigurationSummary(config) {
|
|
|
287
280
|
console.log('================================\n');
|
|
288
281
|
}
|
|
289
282
|
/**
|
|
290
|
-
* Create embedding function
|
|
291
|
-
* This ensures the model precision is respected
|
|
283
|
+
* Create embedding function (always Q8 WASM)
|
|
292
284
|
*/
|
|
293
|
-
export async function createEmbeddingFunctionWithPrecision(
|
|
285
|
+
export async function createEmbeddingFunctionWithPrecision() {
|
|
294
286
|
const { createEmbeddingFunction } = await import('../utils/embedding.js');
|
|
295
|
-
// Create embedding function
|
|
287
|
+
// Create embedding function - always Q8 WASM
|
|
296
288
|
return createEmbeddingFunction({
|
|
297
|
-
precision:
|
|
289
|
+
precision: 'q8',
|
|
298
290
|
verbose: false // Silent by default in zero-config
|
|
299
291
|
});
|
|
300
292
|
}
|
|
@@ -518,11 +518,6 @@ export interface BrainyConfig {
|
|
|
518
518
|
options?: any;
|
|
519
519
|
branch?: string;
|
|
520
520
|
};
|
|
521
|
-
model?: {
|
|
522
|
-
type: 'fast' | 'accurate' | 'balanced' | 'custom';
|
|
523
|
-
name?: string;
|
|
524
|
-
precision?: 'q8';
|
|
525
|
-
};
|
|
526
521
|
index?: {
|
|
527
522
|
m?: number;
|
|
528
523
|
efConstruction?: number;
|
|
@@ -424,6 +424,28 @@ export declare class MetadataIndexManager {
|
|
|
424
424
|
* Gracefully handles missing registry (first run or corrupted data).
|
|
425
425
|
*/
|
|
426
426
|
private loadFieldRegistry;
|
|
427
|
+
/**
|
|
428
|
+
* Get list of persisted fields from storage (not in-memory)
|
|
429
|
+
* v6.7.0: Used during rebuild to discover which chunk files need deletion
|
|
430
|
+
*
|
|
431
|
+
* @returns Array of field names that have persisted sparse indices
|
|
432
|
+
*/
|
|
433
|
+
private getPersistedFieldList;
|
|
434
|
+
/**
|
|
435
|
+
* Delete all chunk files for a specific field
|
|
436
|
+
* v6.7.0: Used during rebuild to ensure clean slate
|
|
437
|
+
*
|
|
438
|
+
* @param field Field name whose chunks should be deleted
|
|
439
|
+
*/
|
|
440
|
+
private deleteFieldChunks;
|
|
441
|
+
/**
|
|
442
|
+
* Clear ALL metadata index data from storage (for recovery)
|
|
443
|
+
* v6.7.0: Nuclear option for recovering from corrupted index state
|
|
444
|
+
*
|
|
445
|
+
* WARNING: This deletes all indexed data - requires full rebuild after!
|
|
446
|
+
* Use when index is corrupted beyond normal rebuild repair.
|
|
447
|
+
*/
|
|
448
|
+
clearAllIndexData(): Promise<void>;
|
|
427
449
|
/**
|
|
428
450
|
* Get count of entities by type - O(1) operation using existing tracking
|
|
429
451
|
* This exposes the production-ready counting that's already maintained
|
|
@@ -957,6 +957,11 @@ export class MetadataIndexManager {
|
|
|
957
957
|
*/
|
|
958
958
|
async addToIndex(id, entityOrMetadata, skipFlush = false) {
|
|
959
959
|
const fields = this.extractIndexableFields(entityOrMetadata);
|
|
960
|
+
// v6.7.0: Sanity check for excessive indexed fields (indicates possible data issue)
|
|
961
|
+
if (fields.length > 100) {
|
|
962
|
+
prodLog.warn(`Entity ${id} has ${fields.length} indexed fields (expected ~30). ` +
|
|
963
|
+
`Possible deeply nested metadata or data issue. First 10 fields: ${fields.slice(0, 10).map(f => f.field).join(', ')}`);
|
|
964
|
+
}
|
|
960
965
|
// Sort fields to process 'noun' field first for type-field affinity tracking
|
|
961
966
|
fields.sort((a, b) => {
|
|
962
967
|
if (a.field === 'noun')
|
|
@@ -1875,6 +1880,91 @@ export class MetadataIndexManager {
|
|
|
1875
1880
|
prodLog.debug('Could not load field registry:', error);
|
|
1876
1881
|
}
|
|
1877
1882
|
}
|
|
1883
|
+
/**
|
|
1884
|
+
* Get list of persisted fields from storage (not in-memory)
|
|
1885
|
+
* v6.7.0: Used during rebuild to discover which chunk files need deletion
|
|
1886
|
+
*
|
|
1887
|
+
* @returns Array of field names that have persisted sparse indices
|
|
1888
|
+
*/
|
|
1889
|
+
async getPersistedFieldList() {
|
|
1890
|
+
try {
|
|
1891
|
+
const registry = await this.storage.getMetadata('__metadata_field_registry__');
|
|
1892
|
+
if (!registry?.fields || !Array.isArray(registry.fields)) {
|
|
1893
|
+
return [];
|
|
1894
|
+
}
|
|
1895
|
+
return registry.fields.filter((f) => typeof f === 'string' && f.length > 0);
|
|
1896
|
+
}
|
|
1897
|
+
catch (error) {
|
|
1898
|
+
prodLog.debug('Could not load persisted field list:', error);
|
|
1899
|
+
return [];
|
|
1900
|
+
}
|
|
1901
|
+
}
|
|
1902
|
+
/**
|
|
1903
|
+
* Delete all chunk files for a specific field
|
|
1904
|
+
* v6.7.0: Used during rebuild to ensure clean slate
|
|
1905
|
+
*
|
|
1906
|
+
* @param field Field name whose chunks should be deleted
|
|
1907
|
+
*/
|
|
1908
|
+
async deleteFieldChunks(field) {
|
|
1909
|
+
try {
|
|
1910
|
+
// Load sparse index to get chunk IDs
|
|
1911
|
+
const indexPath = `__sparse_index__${field}`;
|
|
1912
|
+
const sparseData = await this.storage.getMetadata(indexPath);
|
|
1913
|
+
if (sparseData) {
|
|
1914
|
+
const sparseIndex = SparseIndex.fromJSON(sparseData);
|
|
1915
|
+
// Delete all chunk files for this field
|
|
1916
|
+
for (const chunkId of sparseIndex.getAllChunkIds()) {
|
|
1917
|
+
await this.chunkManager.deleteChunk(field, chunkId);
|
|
1918
|
+
}
|
|
1919
|
+
// Delete the sparse index file itself
|
|
1920
|
+
await this.storage.saveMetadata(indexPath, null);
|
|
1921
|
+
}
|
|
1922
|
+
}
|
|
1923
|
+
catch (error) {
|
|
1924
|
+
// Silent failure - if we can't delete old chunks, rebuild will still work
|
|
1925
|
+
// (new chunks will be created, old ones become orphaned)
|
|
1926
|
+
prodLog.debug(`Could not clear chunks for field '${field}':`, error);
|
|
1927
|
+
}
|
|
1928
|
+
}
|
|
1929
|
+
/**
|
|
1930
|
+
* Clear ALL metadata index data from storage (for recovery)
|
|
1931
|
+
* v6.7.0: Nuclear option for recovering from corrupted index state
|
|
1932
|
+
*
|
|
1933
|
+
* WARNING: This deletes all indexed data - requires full rebuild after!
|
|
1934
|
+
* Use when index is corrupted beyond normal rebuild repair.
|
|
1935
|
+
*/
|
|
1936
|
+
async clearAllIndexData() {
|
|
1937
|
+
prodLog.warn('🗑️ Clearing ALL metadata index data from storage...');
|
|
1938
|
+
// Get all persisted fields
|
|
1939
|
+
const fields = await this.getPersistedFieldList();
|
|
1940
|
+
// Delete chunks and sparse indices for each field
|
|
1941
|
+
let deletedCount = 0;
|
|
1942
|
+
for (const field of fields) {
|
|
1943
|
+
await this.deleteFieldChunks(field);
|
|
1944
|
+
deletedCount++;
|
|
1945
|
+
}
|
|
1946
|
+
// Delete field registry
|
|
1947
|
+
try {
|
|
1948
|
+
await this.storage.saveMetadata('__metadata_field_registry__', null);
|
|
1949
|
+
}
|
|
1950
|
+
catch (error) {
|
|
1951
|
+
prodLog.debug('Could not delete field registry:', error);
|
|
1952
|
+
}
|
|
1953
|
+
// Clear in-memory state
|
|
1954
|
+
this.fieldIndexes.clear();
|
|
1955
|
+
this.dirtyFields.clear();
|
|
1956
|
+
this.unifiedCache.clear('metadata');
|
|
1957
|
+
this.totalEntitiesByType.clear();
|
|
1958
|
+
this.entityCountsByTypeFixed.fill(0);
|
|
1959
|
+
this.verbCountsByTypeFixed.fill(0);
|
|
1960
|
+
this.typeFieldAffinity.clear();
|
|
1961
|
+
// Clear EntityIdMapper
|
|
1962
|
+
await this.idMapper.clear();
|
|
1963
|
+
// Clear chunk manager cache
|
|
1964
|
+
this.chunkManager.clearCache();
|
|
1965
|
+
prodLog.info(`✅ Cleared ${deletedCount} field indexes and all in-memory state`);
|
|
1966
|
+
prodLog.info('⚠️ Run brain.index.rebuild() to recreate the index from entity data');
|
|
1967
|
+
}
|
|
1878
1968
|
/**
|
|
1879
1969
|
* Get count of entities by type - O(1) operation using existing tracking
|
|
1880
1970
|
* This exposes the production-ready counting that's already maintained
|
|
@@ -2080,6 +2170,15 @@ export class MetadataIndexManager {
|
|
|
2080
2170
|
}
|
|
2081
2171
|
}
|
|
2082
2172
|
}
|
|
2173
|
+
// v6.7.0: Sanity check for index corruption (77x overcounting bug detection)
|
|
2174
|
+
const entityCount = this.idMapper.size;
|
|
2175
|
+
if (entityCount > 0) {
|
|
2176
|
+
const avgIdsPerEntity = totalIds / entityCount;
|
|
2177
|
+
if (avgIdsPerEntity > 100) {
|
|
2178
|
+
prodLog.warn(`⚠️ Metadata index may be corrupted: ${avgIdsPerEntity.toFixed(1)} avg entries/entity (expected ~30). ` +
|
|
2179
|
+
`Try running brain.index.clearAllIndexData() followed by brain.index.rebuild() to fix.`);
|
|
2180
|
+
}
|
|
2181
|
+
}
|
|
2083
2182
|
return {
|
|
2084
2183
|
totalEntries,
|
|
2085
2184
|
totalIds,
|
|
@@ -2114,6 +2213,28 @@ export class MetadataIndexManager {
|
|
|
2114
2213
|
// Clear all cached sparse indices in UnifiedCache
|
|
2115
2214
|
// This ensures rebuild starts fresh (v3.44.1)
|
|
2116
2215
|
this.unifiedCache.clear('metadata');
|
|
2216
|
+
// v6.7.0: CRITICAL FIX - Delete existing chunk files from storage
|
|
2217
|
+
// Without this, old chunk data accumulates with each rebuild causing 77x overcounting!
|
|
2218
|
+
// Previous fix (v6.2.4) cleared type counts but missed chunk file accumulation.
|
|
2219
|
+
prodLog.info('🗑️ Clearing existing metadata index chunks from storage...');
|
|
2220
|
+
const existingFields = await this.getPersistedFieldList();
|
|
2221
|
+
if (existingFields.length > 0) {
|
|
2222
|
+
for (const field of existingFields) {
|
|
2223
|
+
await this.deleteFieldChunks(field);
|
|
2224
|
+
}
|
|
2225
|
+
// Delete field registry (will be recreated on flush)
|
|
2226
|
+
try {
|
|
2227
|
+
await this.storage.saveMetadata('__metadata_field_registry__', null);
|
|
2228
|
+
}
|
|
2229
|
+
catch (error) {
|
|
2230
|
+
prodLog.debug('Could not delete field registry:', error);
|
|
2231
|
+
}
|
|
2232
|
+
prodLog.info(`✅ Cleared ${existingFields.length} field indexes from storage`);
|
|
2233
|
+
}
|
|
2234
|
+
// Clear EntityIdMapper to start fresh (v6.7.0)
|
|
2235
|
+
await this.idMapper.clear();
|
|
2236
|
+
// Clear chunk manager cache
|
|
2237
|
+
this.chunkManager.clearCache();
|
|
2117
2238
|
// Adaptive rebuild strategy based on storage adapter (v4.2.3)
|
|
2118
2239
|
// FileSystem/Memory/OPFS: Load all at once (avoids getAllShardedFiles() overhead on every batch)
|
|
2119
2240
|
// Cloud (GCS/S3/R2): Use pagination with small batches (prevent socket exhaustion)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@soulcraft/brainy",
|
|
3
|
-
"version": "6.6.
|
|
3
|
+
"version": "6.6.2",
|
|
4
4
|
"description": "Universal Knowledge Protocol™ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. Stage 3 CANONICAL: 42 nouns × 127 verbs covering 96-97% of all human knowledge.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|