@soulcraft/cortex 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +16 -0
- package/README.md +125 -0
- package/dist/graph/NativeGraphAdjacencyIndex.d.ts +92 -0
- package/dist/graph/NativeGraphAdjacencyIndex.js +671 -0
- package/dist/index.d.ts +22 -0
- package/dist/index.js +23 -0
- package/dist/license.d.ts +18 -0
- package/dist/license.js +172 -0
- package/dist/native/NativeEmbeddingEngine.d.ts +79 -0
- package/dist/native/NativeEmbeddingEngine.js +302 -0
- package/dist/native/NativeRoaringBitmap32.d.ts +114 -0
- package/dist/native/NativeRoaringBitmap32.js +221 -0
- package/dist/native/ffi.d.ts +20 -0
- package/dist/native/ffi.js +48 -0
- package/dist/native/index.d.ts +30 -0
- package/dist/native/index.js +58 -0
- package/dist/native/napi.d.ts +21 -0
- package/dist/native/napi.js +88 -0
- package/dist/native/types.d.ts +710 -0
- package/dist/native/types.js +16 -0
- package/dist/plugin.d.ts +22 -0
- package/dist/plugin.js +115 -0
- package/dist/storage/mmapFileSystemStorage.d.ts +24 -0
- package/dist/storage/mmapFileSystemStorage.js +73 -0
- package/dist/utils/NativeMetadataIndex.d.ts +185 -0
- package/dist/utils/NativeMetadataIndex.js +1274 -0
- package/dist/utils/nativeEntityIdMapper.d.ts +84 -0
- package/dist/utils/nativeEntityIdMapper.js +134 -0
- package/native/brainy-native.darwin-arm64.node +0 -0
- package/native/brainy-native.darwin-x64.node +0 -0
- package/native/brainy-native.linux-arm64-gnu.node +0 -0
- package/native/brainy-native.linux-x64-gnu.node +0 -0
- package/native/brainy-native.win32-x64-msvc.node +0 -0
- package/native/index.d.ts +1068 -0
- package/package.json +66 -0
|
@@ -0,0 +1,1274 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* NativeMetadataIndex — TypeScript wrapper around the Rust NativeMetadataIndex.
|
|
3
|
+
*
|
|
4
|
+
* Implements the same public API as the old MetadataIndexManager (3,721 lines),
|
|
5
|
+
* delegating core operations (query, mutation, normalization) to Rust.
|
|
6
|
+
*
|
|
7
|
+
* Architecture:
|
|
8
|
+
* - Rust owns: bitmap operations, filter evaluation, value normalization,
|
|
9
|
+
* field extraction, chunk management, text search, entity ID mapping
|
|
10
|
+
* - TS owns: async storage I/O, lazy field loading, rebuild orchestration,
|
|
11
|
+
* flush, getSortedIdsForFilter (loads sort values from storage)
|
|
12
|
+
*
|
|
13
|
+
* Buffer exchange pattern: TS loads data from storage, passes JSON to Rust.
|
|
14
|
+
* Rust operates in-memory, returns serialized state for TS to persist.
|
|
15
|
+
*/
|
|
16
|
+
import { prodLog, getGlobalCache, FieldTypeInference } from '@soulcraft/brainy/internals';
|
|
17
|
+
import { TypeUtils, NOUN_TYPE_COUNT, VERB_TYPE_COUNT } from '@soulcraft/brainy/types/graphTypes';
|
|
18
|
+
import { loadNativeModule } from '../native/index.js';
|
|
19
|
+
/**
|
|
20
|
+
* MetadataIndexManager — native Rust implementation with TS storage bridge.
|
|
21
|
+
*
|
|
22
|
+
* Drop-in replacement for the old pure-TS MetadataIndexManager.
|
|
23
|
+
* All bitmap AND/OR/NOT operations execute in Rust without crossing FFI.
|
|
24
|
+
*/
|
|
25
|
+
export class MetadataIndexManager {
|
|
26
|
+
storage;
|
|
27
|
+
config;
|
|
28
|
+
native;
|
|
29
|
+
isRebuilding = false;
|
|
30
|
+
lastFlushTime = Date.now();
|
|
31
|
+
autoFlushThreshold = 10;
|
|
32
|
+
dirtyFields = new Set();
|
|
33
|
+
// Lazy field loading: tracks which fields have been loaded into Rust
|
|
34
|
+
loadedFields = new Set();
|
|
35
|
+
// Tracks which field indexes are known (from field registry)
|
|
36
|
+
knownFields = new Set();
|
|
37
|
+
// TS-only analytics (not in Rust)
|
|
38
|
+
fieldStats = new Map();
|
|
39
|
+
typeFieldAffinity = new Map();
|
|
40
|
+
totalEntitiesByType = new Map();
|
|
41
|
+
entityCountsByTypeFixed = new Uint32Array(NOUN_TYPE_COUNT);
|
|
42
|
+
verbCountsByTypeFixed = new Uint32Array(VERB_TYPE_COUNT);
|
|
43
|
+
// Unified cache for coordinated memory management
|
|
44
|
+
unifiedCache;
|
|
45
|
+
// Field Type Inference
|
|
46
|
+
fieldTypeInference;
|
|
47
|
+
constructor(storage, config = {}) {
|
|
48
|
+
this.storage = storage;
|
|
49
|
+
this.config = {
|
|
50
|
+
maxIndexSize: config.maxIndexSize ?? 10000,
|
|
51
|
+
rebuildThreshold: config.rebuildThreshold ?? 0.1,
|
|
52
|
+
autoOptimize: config.autoOptimize ?? true,
|
|
53
|
+
indexedFields: config.indexedFields ?? [],
|
|
54
|
+
excludeFields: config.excludeFields ?? [
|
|
55
|
+
'embedding', 'vector', 'embeddings', 'vectors',
|
|
56
|
+
'content', 'data', 'originalData', '_data',
|
|
57
|
+
'id'
|
|
58
|
+
]
|
|
59
|
+
};
|
|
60
|
+
// Build Rust config JSON
|
|
61
|
+
const nativeConfig = {};
|
|
62
|
+
if (this.config.excludeFields.length > 0) {
|
|
63
|
+
nativeConfig.excludeFields = this.config.excludeFields;
|
|
64
|
+
}
|
|
65
|
+
if (this.config.indexedFields.length > 0) {
|
|
66
|
+
nativeConfig.indexedFields = this.config.indexedFields;
|
|
67
|
+
}
|
|
68
|
+
const bindings = loadNativeModule();
|
|
69
|
+
this.native = new bindings.NativeMetadataIndex(Object.keys(nativeConfig).length > 0 ? JSON.stringify(nativeConfig) : null);
|
|
70
|
+
this.unifiedCache = getGlobalCache();
|
|
71
|
+
this.fieldTypeInference = new FieldTypeInference(storage);
|
|
72
|
+
}
|
|
73
|
+
// ==========================================================================
|
|
74
|
+
// Initialization
|
|
75
|
+
// ==========================================================================
|
|
76
|
+
async init() {
|
|
77
|
+
// Load field registry to discover persisted indices
|
|
78
|
+
await this.loadFieldRegistry();
|
|
79
|
+
// Load entity ID mapper
|
|
80
|
+
await this.loadEntityIdMapper();
|
|
81
|
+
const hasFields = this.knownFields.size > 0;
|
|
82
|
+
if (hasFields) {
|
|
83
|
+
await this.warmCache();
|
|
84
|
+
await this.lazyLoadCounts();
|
|
85
|
+
this.syncTypeCountsToFixed();
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
// ==========================================================================
|
|
89
|
+
// Storage I/O helpers
|
|
90
|
+
// ==========================================================================
|
|
91
|
+
async loadEntityIdMapper() {
|
|
92
|
+
try {
|
|
93
|
+
const data = await this.storage.getMetadata('brainy:entityIdMapper');
|
|
94
|
+
if (data && data.nextId !== undefined) {
|
|
95
|
+
this.native.loadEntityIdMapper(JSON.stringify(data));
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
catch {
|
|
99
|
+
// First time — mapper starts empty
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
async saveEntityIdMapper() {
|
|
103
|
+
if (!this.native.isEntityIdMapperDirty())
|
|
104
|
+
return;
|
|
105
|
+
const json = this.native.saveEntityIdMapper();
|
|
106
|
+
const data = JSON.parse(json);
|
|
107
|
+
await this.storage.saveMetadata('brainy:entityIdMapper', data);
|
|
108
|
+
// Reload to clear dirty flag
|
|
109
|
+
this.native.loadEntityIdMapper(json);
|
|
110
|
+
}
|
|
111
|
+
async loadFieldRegistry() {
|
|
112
|
+
try {
|
|
113
|
+
const registry = await this.storage.getMetadata('__metadata_field_registry__');
|
|
114
|
+
if (!registry?.fields || !Array.isArray(registry.fields)) {
|
|
115
|
+
prodLog.debug('No field registry found - will build on first flush');
|
|
116
|
+
return;
|
|
117
|
+
}
|
|
118
|
+
// Load into Rust
|
|
119
|
+
this.native.loadFieldRegistry(JSON.stringify(registry));
|
|
120
|
+
// Track known fields
|
|
121
|
+
for (const field of registry.fields) {
|
|
122
|
+
if (typeof field === 'string' && field.length > 0) {
|
|
123
|
+
this.knownFields.add(field);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
prodLog.info(`Loaded field registry: ${registry.fields.length} persisted fields discovered\n` +
|
|
127
|
+
` Fields: ${registry.fields.slice(0, 5).join(', ')}${registry.fields.length > 5 ? '...' : ''}`);
|
|
128
|
+
}
|
|
129
|
+
catch (error) {
|
|
130
|
+
prodLog.debug('Could not load field registry:', error);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
async saveFieldRegistry() {
|
|
134
|
+
if (this.knownFields.size === 0)
|
|
135
|
+
return;
|
|
136
|
+
try {
|
|
137
|
+
const json = this.native.saveFieldRegistry();
|
|
138
|
+
const data = JSON.parse(json);
|
|
139
|
+
await this.storage.saveMetadata('__metadata_field_registry__', data);
|
|
140
|
+
}
|
|
141
|
+
catch (error) {
|
|
142
|
+
prodLog.warn('Failed to save field registry:', error);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Ensure a field's sparse index + all chunks are loaded into Rust.
|
|
147
|
+
* Matches the lazy-loading pattern of the old UnifiedCache approach.
|
|
148
|
+
*/
|
|
149
|
+
async ensureFieldLoaded(field) {
|
|
150
|
+
if (this.loadedFields.has(field))
|
|
151
|
+
return;
|
|
152
|
+
const indexPath = `__sparse_index__${field}`;
|
|
153
|
+
try {
|
|
154
|
+
const sparseData = await this.storage.getMetadata(indexPath);
|
|
155
|
+
if (sparseData) {
|
|
156
|
+
this.native.loadSparseIndex(field, JSON.stringify(sparseData));
|
|
157
|
+
const chunkIds = this.native.getSparseIndexChunkIds(field);
|
|
158
|
+
// Load all chunks in parallel
|
|
159
|
+
await Promise.all(chunkIds.map(async (chunkId) => {
|
|
160
|
+
if (!this.native.isChunkLoaded(field, chunkId)) {
|
|
161
|
+
const chunkPath = `__chunk__${field}_${chunkId}`;
|
|
162
|
+
const chunkData = await this.storage.getMetadata(chunkPath);
|
|
163
|
+
if (chunkData) {
|
|
164
|
+
this.native.loadChunk(field, chunkId, JSON.stringify(chunkData));
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}));
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
catch (error) {
|
|
171
|
+
prodLog.debug(`Failed to load field '${field}':`, error);
|
|
172
|
+
}
|
|
173
|
+
this.loadedFields.add(field);
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* Ensure multiple fields are loaded (parallel).
|
|
177
|
+
*/
|
|
178
|
+
async ensureFieldsLoaded(fields) {
|
|
179
|
+
const unloaded = fields.filter(f => !this.loadedFields.has(f));
|
|
180
|
+
if (unloaded.length === 0)
|
|
181
|
+
return;
|
|
182
|
+
await Promise.all(unloaded.map(f => this.ensureFieldLoaded(f)));
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Extract filter field names for lazy loading.
|
|
186
|
+
*/
|
|
187
|
+
extractFilterFields(filter) {
|
|
188
|
+
if (!filter || typeof filter !== 'object')
|
|
189
|
+
return [];
|
|
190
|
+
const fields = [];
|
|
191
|
+
if (filter.allOf && Array.isArray(filter.allOf)) {
|
|
192
|
+
for (const sub of filter.allOf) {
|
|
193
|
+
fields.push(...this.extractFilterFields(sub));
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
if (filter.anyOf && Array.isArray(filter.anyOf)) {
|
|
197
|
+
for (const sub of filter.anyOf) {
|
|
198
|
+
fields.push(...this.extractFilterFields(sub));
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
for (const key of Object.keys(filter)) {
|
|
202
|
+
if (key !== 'allOf' && key !== 'anyOf' && key !== 'not') {
|
|
203
|
+
fields.push(key);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
return [...new Set(fields)];
|
|
207
|
+
}
|
|
208
|
+
// ==========================================================================
|
|
209
|
+
// Dirty data persistence
|
|
210
|
+
// ==========================================================================
|
|
211
|
+
/**
|
|
212
|
+
* Persist dirty chunks, sparse indices, field indexes from a MutationResult.
|
|
213
|
+
*/
|
|
214
|
+
async persistMutationResult(result) {
|
|
215
|
+
const promises = [];
|
|
216
|
+
// Save dirty chunks
|
|
217
|
+
for (const { field, chunkId } of result.dirtyChunks) {
|
|
218
|
+
const json = this.native.saveChunk(field, chunkId);
|
|
219
|
+
if (json) {
|
|
220
|
+
const chunkPath = `__chunk__${field}_${chunkId}`;
|
|
221
|
+
promises.push(this.storage.saveMetadata(chunkPath, JSON.parse(json)));
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
// Save dirty sparse indices
|
|
225
|
+
for (const field of result.dirtySparseIndices) {
|
|
226
|
+
const json = this.native.saveSparseIndex(field);
|
|
227
|
+
if (json) {
|
|
228
|
+
const indexPath = `__sparse_index__${field}`;
|
|
229
|
+
promises.push(this.storage.saveMetadata(indexPath, JSON.parse(json)));
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
// Save dirty field indexes
|
|
233
|
+
for (const field of result.dirtyFieldIndexes) {
|
|
234
|
+
const json = this.native.saveFieldIndex(field);
|
|
235
|
+
if (json) {
|
|
236
|
+
const indexId = `__metadata_field_index__field_${field}`;
|
|
237
|
+
promises.push(this.storage.saveMetadata(indexId, JSON.parse(json)));
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
// Track new fields
|
|
241
|
+
for (const field of result.newFields) {
|
|
242
|
+
this.knownFields.add(field);
|
|
243
|
+
this.dirtyFields.add(field);
|
|
244
|
+
}
|
|
245
|
+
if (result.dirtyFieldRegistry) {
|
|
246
|
+
promises.push(this.saveFieldRegistry());
|
|
247
|
+
}
|
|
248
|
+
if (result.dirtyEntityIdMapper) {
|
|
249
|
+
promises.push(this.saveEntityIdMapper());
|
|
250
|
+
}
|
|
251
|
+
await Promise.all(promises);
|
|
252
|
+
}
|
|
253
|
+
// ==========================================================================
|
|
254
|
+
// Cache warming
|
|
255
|
+
// ==========================================================================
|
|
256
|
+
async warmCache() {
|
|
257
|
+
const commonFields = ['noun', 'type', 'service', 'createdAt'];
|
|
258
|
+
await Promise.all(commonFields.map(async (field) => {
|
|
259
|
+
try {
|
|
260
|
+
await this.ensureFieldLoaded(field);
|
|
261
|
+
}
|
|
262
|
+
catch {
|
|
263
|
+
prodLog.debug(`Cache warming: field '${field}' not yet indexed`);
|
|
264
|
+
}
|
|
265
|
+
}));
|
|
266
|
+
await this.warmCacheForTopTypes(3);
|
|
267
|
+
}
|
|
268
|
+
async warmCacheForTopTypes(topN = 3) {
|
|
269
|
+
const topTypes = this.getTopNounTypes(topN);
|
|
270
|
+
if (topTypes.length === 0)
|
|
271
|
+
return;
|
|
272
|
+
for (const type of topTypes) {
|
|
273
|
+
const typeFields = this.typeFieldAffinity.get(type);
|
|
274
|
+
if (!typeFields)
|
|
275
|
+
continue;
|
|
276
|
+
const topFields = Array.from(typeFields.entries())
|
|
277
|
+
.sort((a, b) => b[1] - a[1])
|
|
278
|
+
.slice(0, 5)
|
|
279
|
+
.map(([field]) => field);
|
|
280
|
+
if (topFields.length === 0)
|
|
281
|
+
continue;
|
|
282
|
+
await Promise.all(topFields.map(async (field) => {
|
|
283
|
+
try {
|
|
284
|
+
await this.ensureFieldLoaded(field);
|
|
285
|
+
}
|
|
286
|
+
catch {
|
|
287
|
+
// Silently ignore
|
|
288
|
+
}
|
|
289
|
+
}));
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
// ==========================================================================
|
|
293
|
+
// Lazy load counts
|
|
294
|
+
// ==========================================================================
|
|
295
|
+
async lazyLoadCounts() {
|
|
296
|
+
try {
|
|
297
|
+
this.totalEntitiesByType.clear();
|
|
298
|
+
this.entityCountsByTypeFixed.fill(0);
|
|
299
|
+
this.verbCountsByTypeFixed.fill(0);
|
|
300
|
+
// Ensure 'noun' field is loaded into Rust
|
|
301
|
+
await this.ensureFieldLoaded('noun');
|
|
302
|
+
// Use Rust to get type counts via the noun field
|
|
303
|
+
const nounValues = this.native.getFilterValues('noun');
|
|
304
|
+
for (const typeName of nounValues) {
|
|
305
|
+
const ids = this.native.getIds('noun', JSON.stringify(typeName));
|
|
306
|
+
if (ids.length > 0) {
|
|
307
|
+
this.totalEntitiesByType.set(typeName, ids.length);
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
prodLog.debug(`Loaded type counts: ${this.totalEntitiesByType.size} types`);
|
|
311
|
+
}
|
|
312
|
+
catch (error) {
|
|
313
|
+
prodLog.debug('Could not load type counts:', error);
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
syncTypeCountsToFixed() {
|
|
317
|
+
for (let i = 0; i < NOUN_TYPE_COUNT; i++) {
|
|
318
|
+
const type = TypeUtils.getNounFromIndex(i);
|
|
319
|
+
const count = this.totalEntitiesByType.get(type) || 0;
|
|
320
|
+
this.entityCountsByTypeFixed[i] = count;
|
|
321
|
+
}
|
|
322
|
+
for (let i = 0; i < VERB_TYPE_COUNT; i++) {
|
|
323
|
+
const type = TypeUtils.getVerbFromIndex(i);
|
|
324
|
+
const count = this.totalEntitiesByType.get(type) || 0;
|
|
325
|
+
this.verbCountsByTypeFixed[i] = count;
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
syncTypeCountsFromFixed() {
|
|
329
|
+
for (let i = 0; i < NOUN_TYPE_COUNT; i++) {
|
|
330
|
+
const count = this.entityCountsByTypeFixed[i];
|
|
331
|
+
if (count > 0) {
|
|
332
|
+
const type = TypeUtils.getNounFromIndex(i);
|
|
333
|
+
this.totalEntitiesByType.set(type, count);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
for (let i = 0; i < VERB_TYPE_COUNT; i++) {
|
|
337
|
+
const count = this.verbCountsByTypeFixed[i];
|
|
338
|
+
if (count > 0) {
|
|
339
|
+
const type = TypeUtils.getVerbFromIndex(i);
|
|
340
|
+
this.totalEntitiesByType.set(type, count);
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
// ==========================================================================
|
|
345
|
+
// Core query methods
|
|
346
|
+
// ==========================================================================
|
|
347
|
+
async getIds(field, value) {
|
|
348
|
+
await this.ensureFieldLoaded(field);
|
|
349
|
+
return this.native.getIds(field, JSON.stringify(value));
|
|
350
|
+
}
|
|
351
|
+
async getIdsForFilter(filter) {
|
|
352
|
+
if (!filter || Object.keys(filter).length === 0)
|
|
353
|
+
return [];
|
|
354
|
+
// Extract fields and ensure loaded
|
|
355
|
+
const fields = this.extractFilterFields(filter);
|
|
356
|
+
await this.ensureFieldsLoaded(fields);
|
|
357
|
+
// For ne/exists:false/missing operators, we need allIds
|
|
358
|
+
const needsAllIds = this.filterNeedsAllIds(filter);
|
|
359
|
+
let allIdsJson = null;
|
|
360
|
+
if (needsAllIds) {
|
|
361
|
+
const allIds = await this.getAllIds();
|
|
362
|
+
allIdsJson = JSON.stringify(allIds);
|
|
363
|
+
}
|
|
364
|
+
return this.native.getIdsForFilter(JSON.stringify(filter), allIdsJson);
|
|
365
|
+
}
|
|
366
|
+
filterNeedsAllIds(filter) {
|
|
367
|
+
if (!filter || typeof filter !== 'object')
|
|
368
|
+
return false;
|
|
369
|
+
if (filter.allOf && Array.isArray(filter.allOf)) {
|
|
370
|
+
return filter.allOf.some((sub) => this.filterNeedsAllIds(sub));
|
|
371
|
+
}
|
|
372
|
+
if (filter.anyOf && Array.isArray(filter.anyOf)) {
|
|
373
|
+
return filter.anyOf.some((sub) => this.filterNeedsAllIds(sub));
|
|
374
|
+
}
|
|
375
|
+
for (const [_field, condition] of Object.entries(filter)) {
|
|
376
|
+
if (_field === 'allOf' || _field === 'anyOf' || _field === 'not')
|
|
377
|
+
continue;
|
|
378
|
+
if (condition && typeof condition === 'object' && !Array.isArray(condition)) {
|
|
379
|
+
for (const op of Object.keys(condition)) {
|
|
380
|
+
if (op === 'ne' || op === 'isNot' || op === 'notEquals')
|
|
381
|
+
return true;
|
|
382
|
+
if (op === 'exists' && !condition[op])
|
|
383
|
+
return true;
|
|
384
|
+
if (op === 'missing' && condition[op])
|
|
385
|
+
return true;
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
return false;
|
|
390
|
+
}
|
|
391
|
+
async getIdsForMultipleFields(fieldValuePairs) {
|
|
392
|
+
if (fieldValuePairs.length === 0)
|
|
393
|
+
return [];
|
|
394
|
+
const fields = fieldValuePairs.map(p => p.field);
|
|
395
|
+
await this.ensureFieldsLoaded(fields);
|
|
396
|
+
const pairsJson = JSON.stringify(fieldValuePairs.map(p => ({
|
|
397
|
+
field: p.field,
|
|
398
|
+
value: p.value
|
|
399
|
+
})));
|
|
400
|
+
return this.native.getIdsForMultipleFields(pairsJson);
|
|
401
|
+
}
|
|
402
|
+
async getIdsForTextQuery(query) {
|
|
403
|
+
await this.ensureFieldLoaded('__words__');
|
|
404
|
+
const resultJson = this.native.getIdsForTextQuery(query);
|
|
405
|
+
return JSON.parse(resultJson);
|
|
406
|
+
}
|
|
407
|
+
async getSortedIdsForFilter(filter, orderBy, order = 'asc') {
|
|
408
|
+
const filteredIds = await this.getIdsForFilter(filter);
|
|
409
|
+
if (filteredIds.length === 0)
|
|
410
|
+
return [];
|
|
411
|
+
const idValuePairs = [];
|
|
412
|
+
for (const id of filteredIds) {
|
|
413
|
+
const value = await this.getFieldValueForEntity(id, orderBy);
|
|
414
|
+
idValuePairs.push({ id, value });
|
|
415
|
+
}
|
|
416
|
+
idValuePairs.sort((a, b) => {
|
|
417
|
+
if (a.value == null && b.value == null)
|
|
418
|
+
return 0;
|
|
419
|
+
if (a.value == null)
|
|
420
|
+
return order === 'asc' ? 1 : -1;
|
|
421
|
+
if (b.value == null)
|
|
422
|
+
return order === 'asc' ? -1 : 1;
|
|
423
|
+
if (a.value === b.value)
|
|
424
|
+
return 0;
|
|
425
|
+
const comparison = a.value < b.value ? -1 : 1;
|
|
426
|
+
return order === 'asc' ? comparison : -comparison;
|
|
427
|
+
});
|
|
428
|
+
return idValuePairs.map(p => p.id);
|
|
429
|
+
}
|
|
430
|
+
async getFieldValueForEntity(entityId, field) {
|
|
431
|
+
// For timestamp fields, load actual value from entity metadata
|
|
432
|
+
if (field === 'createdAt' || field === 'updatedAt' || field === 'accessed' || field === 'modified') {
|
|
433
|
+
try {
|
|
434
|
+
const noun = await this.storage.getNoun(entityId);
|
|
435
|
+
if (noun && noun.metadata) {
|
|
436
|
+
return noun.metadata[field];
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
catch {
|
|
440
|
+
// Fall back to index value
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
// For non-timestamp fields, scan chunks for this entity's value
|
|
444
|
+
await this.ensureFieldLoaded(field);
|
|
445
|
+
const ids = this.native.getIds(field, JSON.stringify(entityId));
|
|
446
|
+
// getIds returns IDs matching field=value, but we need value for field+entityId
|
|
447
|
+
// We need to scan through values — use getFilterValues and check membership
|
|
448
|
+
const values = this.native.getFilterValues(field);
|
|
449
|
+
for (const val of values) {
|
|
450
|
+
const matchIds = this.native.getIds(field, JSON.stringify(val));
|
|
451
|
+
if (matchIds.includes(entityId)) {
|
|
452
|
+
return this.denormalizeValue(val, field);
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
return undefined;
|
|
456
|
+
}
|
|
457
|
+
denormalizeValue(normalized, _field) {
|
|
458
|
+
const asNumber = Number(normalized);
|
|
459
|
+
if (!isNaN(asNumber))
|
|
460
|
+
return asNumber;
|
|
461
|
+
return normalized;
|
|
462
|
+
}
|
|
463
|
+
// ==========================================================================
|
|
464
|
+
// Filter discovery
|
|
465
|
+
// ==========================================================================
|
|
466
|
+
async getFilterValues(field) {
|
|
467
|
+
await this.ensureFieldLoaded(field);
|
|
468
|
+
return this.native.getFilterValues(field);
|
|
469
|
+
}
|
|
470
|
+
async getFilterFields() {
|
|
471
|
+
return this.native.getFilterFields();
|
|
472
|
+
}
|
|
473
|
+
// ==========================================================================
|
|
474
|
+
// Mutation methods
|
|
475
|
+
// ==========================================================================
|
|
476
|
+
async addToIndex(id, entityOrMetadata, skipFlush = false, deferWrites = false) {
|
|
477
|
+
// Extract field names from entity to know which fields to load
|
|
478
|
+
const fieldNames = this.native.extractFieldNames(JSON.stringify(entityOrMetadata));
|
|
479
|
+
await this.ensureFieldsLoaded(fieldNames);
|
|
480
|
+
// Execute mutation in Rust (single FFI call)
|
|
481
|
+
const resultJson = this.native.addToIndex(id, JSON.stringify(entityOrMetadata));
|
|
482
|
+
const result = JSON.parse(resultJson);
|
|
483
|
+
// Track new fields in loadedFields
|
|
484
|
+
for (const field of result.newFields) {
|
|
485
|
+
this.loadedFields.add(field);
|
|
486
|
+
this.knownFields.add(field);
|
|
487
|
+
}
|
|
488
|
+
// Update TS-side type tracking
|
|
489
|
+
this.updateTypeTracking(id, entityOrMetadata, 'add');
|
|
490
|
+
// Persist dirty data (unless deferred for batch operations)
|
|
491
|
+
if (!deferWrites) {
|
|
492
|
+
await this.persistMutationResult(result);
|
|
493
|
+
}
|
|
494
|
+
// Adaptive auto-flush
|
|
495
|
+
if (!skipFlush) {
|
|
496
|
+
for (const field of fieldNames) {
|
|
497
|
+
this.dirtyFields.add(field);
|
|
498
|
+
}
|
|
499
|
+
const timeSinceLastFlush = Date.now() - this.lastFlushTime;
|
|
500
|
+
const shouldAutoFlush = this.dirtyFields.size >= this.autoFlushThreshold ||
|
|
501
|
+
(this.dirtyFields.size > 10 && timeSinceLastFlush > 5000);
|
|
502
|
+
if (shouldAutoFlush) {
|
|
503
|
+
const startTime = Date.now();
|
|
504
|
+
await this.flush();
|
|
505
|
+
const flushTime = Date.now() - startTime;
|
|
506
|
+
if (flushTime < 50) {
|
|
507
|
+
this.autoFlushThreshold = Math.min(200, this.autoFlushThreshold * 1.2);
|
|
508
|
+
}
|
|
509
|
+
else if (flushTime > 200) {
|
|
510
|
+
this.autoFlushThreshold = Math.max(20, this.autoFlushThreshold * 0.8);
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
async removeFromIndex(id, metadata) {
|
|
516
|
+
if (metadata) {
|
|
517
|
+
const fieldNames = this.native.extractFieldNames(JSON.stringify(metadata));
|
|
518
|
+
await this.ensureFieldsLoaded(fieldNames);
|
|
519
|
+
const resultJson = this.native.removeFromIndex(id, JSON.stringify(metadata));
|
|
520
|
+
const result = JSON.parse(resultJson);
|
|
521
|
+
this.updateTypeTracking(id, metadata, 'remove');
|
|
522
|
+
await this.persistMutationResult(result);
|
|
523
|
+
}
|
|
524
|
+
else {
|
|
525
|
+
// Remove from all loaded fields (slower path)
|
|
526
|
+
prodLog.warn(`Removing ID ${id} without metadata requires scanning all fields (slow)`);
|
|
527
|
+
const loadedFields = this.native.getLoadedFields();
|
|
528
|
+
for (const field of loadedFields) {
|
|
529
|
+
await this.ensureFieldLoaded(field);
|
|
530
|
+
}
|
|
531
|
+
const resultJson = this.native.removeFromIndex(id);
|
|
532
|
+
const result = JSON.parse(resultJson);
|
|
533
|
+
await this.persistMutationResult(result);
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
updateTypeTracking(id, entityOrMetadata, operation) {
|
|
537
|
+
const nounValue = entityOrMetadata?.type || entityOrMetadata?.noun;
|
|
538
|
+
if (!nounValue)
|
|
539
|
+
return;
|
|
540
|
+
const entityType = String(nounValue).toLowerCase().trim();
|
|
541
|
+
if (operation === 'add') {
|
|
542
|
+
const current = this.totalEntitiesByType.get(entityType) || 0;
|
|
543
|
+
const newCount = current + 1;
|
|
544
|
+
this.totalEntitiesByType.set(entityType, newCount);
|
|
545
|
+
try {
|
|
546
|
+
const nounTypeIndex = TypeUtils.getNounIndex(entityType);
|
|
547
|
+
this.entityCountsByTypeFixed[nounTypeIndex] = newCount;
|
|
548
|
+
}
|
|
549
|
+
catch {
|
|
550
|
+
// Not a recognized noun type
|
|
551
|
+
}
|
|
552
|
+
// Update type-field affinity
|
|
553
|
+
if (!this.typeFieldAffinity.has(entityType)) {
|
|
554
|
+
this.typeFieldAffinity.set(entityType, new Map());
|
|
555
|
+
}
|
|
556
|
+
const typeFields = this.typeFieldAffinity.get(entityType);
|
|
557
|
+
const fieldNames = this.native.extractFieldNames(JSON.stringify(entityOrMetadata));
|
|
558
|
+
for (const field of fieldNames) {
|
|
559
|
+
const count = typeFields.get(field) || 0;
|
|
560
|
+
typeFields.set(field, count + 1);
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
else if (operation === 'remove') {
|
|
564
|
+
const current = this.totalEntitiesByType.get(entityType) || 0;
|
|
565
|
+
if (current > 1) {
|
|
566
|
+
const newCount = current - 1;
|
|
567
|
+
this.totalEntitiesByType.set(entityType, newCount);
|
|
568
|
+
try {
|
|
569
|
+
const nounTypeIndex = TypeUtils.getNounIndex(entityType);
|
|
570
|
+
this.entityCountsByTypeFixed[nounTypeIndex] = newCount;
|
|
571
|
+
}
|
|
572
|
+
catch { }
|
|
573
|
+
}
|
|
574
|
+
else {
|
|
575
|
+
this.totalEntitiesByType.delete(entityType);
|
|
576
|
+
this.typeFieldAffinity.delete(entityType);
|
|
577
|
+
try {
|
|
578
|
+
const nounTypeIndex = TypeUtils.getNounIndex(entityType);
|
|
579
|
+
this.entityCountsByTypeFixed[nounTypeIndex] = 0;
|
|
580
|
+
}
|
|
581
|
+
catch { }
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
// ==========================================================================
|
|
586
|
+
// Tokenization (delegates to Rust via extractFieldNames, but also exposed)
|
|
587
|
+
// ==========================================================================
|
|
588
|
+
tokenize(text) {
|
|
589
|
+
if (!text)
|
|
590
|
+
return [];
|
|
591
|
+
return text
|
|
592
|
+
.toLowerCase()
|
|
593
|
+
.replace(/[^\w\s]/g, ' ')
|
|
594
|
+
.split(/\s+/)
|
|
595
|
+
.filter(w => w.length >= 2 && w.length <= 50)
|
|
596
|
+
.filter((w, i, arr) => arr.indexOf(w) === i);
|
|
597
|
+
}
|
|
598
|
+
hashWord(word) {
|
|
599
|
+
let hash = 2166136261;
|
|
600
|
+
for (let i = 0; i < word.length; i++) {
|
|
601
|
+
hash ^= word.charCodeAt(i);
|
|
602
|
+
hash = Math.imul(hash, 16777619);
|
|
603
|
+
}
|
|
604
|
+
return hash | 0;
|
|
605
|
+
}
|
|
606
|
+
extractTextContent(data) {
|
|
607
|
+
if (data === null || data === undefined)
|
|
608
|
+
return '';
|
|
609
|
+
if (typeof data === 'string')
|
|
610
|
+
return data;
|
|
611
|
+
if (typeof data === 'number' || typeof data === 'boolean')
|
|
612
|
+
return String(data);
|
|
613
|
+
if (Array.isArray(data)) {
|
|
614
|
+
if (data.length > 0 && typeof data[0] === 'number')
|
|
615
|
+
return '';
|
|
616
|
+
return data.map(d => this.extractTextContent(d)).filter(Boolean).join(' ');
|
|
617
|
+
}
|
|
618
|
+
if (typeof data === 'object') {
|
|
619
|
+
const skipKeys = new Set(['vector', 'embedding', 'embeddings', 'connections', 'level', 'id']);
|
|
620
|
+
const texts = [];
|
|
621
|
+
for (const [key, value] of Object.entries(data)) {
|
|
622
|
+
if (skipKeys.has(key) || /^\d+$/.test(key))
|
|
623
|
+
continue;
|
|
624
|
+
const text = this.extractTextContent(value);
|
|
625
|
+
if (text)
|
|
626
|
+
texts.push(text);
|
|
627
|
+
}
|
|
628
|
+
return texts.join(' ');
|
|
629
|
+
}
|
|
630
|
+
return '';
|
|
631
|
+
}
|
|
632
|
+
// ==========================================================================
|
|
633
|
+
// Entity counting
|
|
634
|
+
// ==========================================================================
|
|
635
|
+
getEntityCountByType(type) {
|
|
636
|
+
return this.totalEntitiesByType.get(type) || 0;
|
|
637
|
+
}
|
|
638
|
+
getTotalEntityCount() {
|
|
639
|
+
let total = 0;
|
|
640
|
+
for (const count of this.totalEntitiesByType.values()) {
|
|
641
|
+
total += count;
|
|
642
|
+
}
|
|
643
|
+
return total;
|
|
644
|
+
}
|
|
645
|
+
getAllEntityCounts() {
|
|
646
|
+
return new Map(this.totalEntitiesByType);
|
|
647
|
+
}
|
|
648
|
+
getEntityCountByTypeEnum(type) {
|
|
649
|
+
const index = TypeUtils.getNounIndex(type);
|
|
650
|
+
return this.entityCountsByTypeFixed[index];
|
|
651
|
+
}
|
|
652
|
+
getVerbCountByTypeEnum(type) {
|
|
653
|
+
const index = TypeUtils.getVerbIndex(type);
|
|
654
|
+
return this.verbCountsByTypeFixed[index];
|
|
655
|
+
}
|
|
656
|
+
getTopNounTypes(n) {
|
|
657
|
+
const types = [];
|
|
658
|
+
for (let i = 0; i < NOUN_TYPE_COUNT; i++) {
|
|
659
|
+
const count = this.entityCountsByTypeFixed[i];
|
|
660
|
+
if (count > 0) {
|
|
661
|
+
const type = TypeUtils.getNounFromIndex(i);
|
|
662
|
+
types.push({ type, count });
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
return types.sort((a, b) => b.count - a.count).slice(0, n).map(t => t.type);
|
|
666
|
+
}
|
|
667
|
+
getTopVerbTypes(n) {
|
|
668
|
+
const types = [];
|
|
669
|
+
for (let i = 0; i < VERB_TYPE_COUNT; i++) {
|
|
670
|
+
const count = this.verbCountsByTypeFixed[i];
|
|
671
|
+
if (count > 0) {
|
|
672
|
+
const type = TypeUtils.getVerbFromIndex(i);
|
|
673
|
+
types.push({ type, count });
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
return types.sort((a, b) => b.count - a.count).slice(0, n).map(t => t.type);
|
|
677
|
+
}
|
|
678
|
+
getAllNounTypeCounts() {
|
|
679
|
+
const counts = new Map();
|
|
680
|
+
for (let i = 0; i < NOUN_TYPE_COUNT; i++) {
|
|
681
|
+
const count = this.entityCountsByTypeFixed[i];
|
|
682
|
+
if (count > 0) {
|
|
683
|
+
counts.set(TypeUtils.getNounFromIndex(i), count);
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
return counts;
|
|
687
|
+
}
|
|
688
|
+
getAllVerbTypeCounts() {
|
|
689
|
+
const counts = new Map();
|
|
690
|
+
for (let i = 0; i < VERB_TYPE_COUNT; i++) {
|
|
691
|
+
const count = this.verbCountsByTypeFixed[i];
|
|
692
|
+
if (count > 0) {
|
|
693
|
+
counts.set(TypeUtils.getVerbFromIndex(i), count);
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
return counts;
|
|
697
|
+
}
|
|
698
|
+
// ==========================================================================
|
|
699
|
+
// VFS Statistics (roaring bitmap intersection via Rust)
|
|
700
|
+
// ==========================================================================
|
|
701
|
+
async getVFSEntityCountByType(type) {
|
|
702
|
+
await this.ensureFieldsLoaded(['isVFSEntity', 'noun']);
|
|
703
|
+
const vfsIds = this.native.getIds('isVFSEntity', JSON.stringify(true));
|
|
704
|
+
const typeIds = this.native.getIds('noun', JSON.stringify(type));
|
|
705
|
+
const vfsSet = new Set(vfsIds);
|
|
706
|
+
return typeIds.filter(id => vfsSet.has(id)).length;
|
|
707
|
+
}
|
|
708
|
+
async getAllVFSEntityCounts() {
|
|
709
|
+
await this.ensureFieldsLoaded(['isVFSEntity', 'noun']);
|
|
710
|
+
const vfsIds = this.native.getIds('isVFSEntity', JSON.stringify(true));
|
|
711
|
+
if (vfsIds.length === 0)
|
|
712
|
+
return new Map();
|
|
713
|
+
const vfsSet = new Set(vfsIds);
|
|
714
|
+
const result = new Map();
|
|
715
|
+
for (const type of this.totalEntitiesByType.keys()) {
|
|
716
|
+
const typeIds = this.native.getIds('noun', JSON.stringify(type));
|
|
717
|
+
const count = typeIds.filter(id => vfsSet.has(id)).length;
|
|
718
|
+
if (count > 0)
|
|
719
|
+
result.set(type, count);
|
|
720
|
+
}
|
|
721
|
+
return result;
|
|
722
|
+
}
|
|
723
|
+
async getTotalVFSEntityCount() {
|
|
724
|
+
await this.ensureFieldLoaded('isVFSEntity');
|
|
725
|
+
const vfsIds = this.native.getIds('isVFSEntity', JSON.stringify(true));
|
|
726
|
+
return vfsIds.length;
|
|
727
|
+
}
|
|
728
|
+
// ==========================================================================
|
|
729
|
+
// Criteria-based counting
|
|
730
|
+
// ==========================================================================
|
|
731
|
+
async getCountForCriteria(field, value) {
|
|
732
|
+
const ids = await this.getIds(field, value);
|
|
733
|
+
return ids.length;
|
|
734
|
+
}
|
|
735
|
+
// ==========================================================================
|
|
736
|
+
// getAllIds
|
|
737
|
+
// ==========================================================================
|
|
738
|
+
async getAllIds() {
|
|
739
|
+
const allIds = new Set();
|
|
740
|
+
if (this.storage && typeof this.storage.getNouns === 'function') {
|
|
741
|
+
try {
|
|
742
|
+
const result = await this.storage.getNouns({
|
|
743
|
+
pagination: { limit: 100000 }
|
|
744
|
+
});
|
|
745
|
+
if (result && result.items) {
|
|
746
|
+
result.items.forEach((item) => {
|
|
747
|
+
if (item.id)
|
|
748
|
+
allIds.add(item.id);
|
|
749
|
+
});
|
|
750
|
+
}
|
|
751
|
+
}
|
|
752
|
+
catch (e) {
|
|
753
|
+
prodLog.warn('Failed to get all IDs from storage:', e);
|
|
754
|
+
return [];
|
|
755
|
+
}
|
|
756
|
+
}
|
|
757
|
+
return Array.from(allIds);
|
|
758
|
+
}
|
|
759
|
+
// ==========================================================================
|
|
760
|
+
// Flush
|
|
761
|
+
// ==========================================================================
|
|
762
|
+
async flush() {
|
|
763
|
+
if (this.dirtyFields.size === 0)
|
|
764
|
+
return;
|
|
765
|
+
// Save dirty field indexes
|
|
766
|
+
const promises = [];
|
|
767
|
+
for (const field of this.dirtyFields) {
|
|
768
|
+
const json = this.native.saveFieldIndex(field);
|
|
769
|
+
if (json) {
|
|
770
|
+
const indexId = `__metadata_field_index__field_${field}`;
|
|
771
|
+
promises.push(this.storage.saveMetadata(indexId, JSON.parse(json)));
|
|
772
|
+
}
|
|
773
|
+
}
|
|
774
|
+
await Promise.all(promises);
|
|
775
|
+
// Save entity ID mapper
|
|
776
|
+
await this.saveEntityIdMapper();
|
|
777
|
+
// Save field registry
|
|
778
|
+
await this.saveFieldRegistry();
|
|
779
|
+
this.dirtyFields.clear();
|
|
780
|
+
this.lastFlushTime = Date.now();
|
|
781
|
+
}
|
|
782
|
+
// ==========================================================================
|
|
783
|
+
// Stats
|
|
784
|
+
// ==========================================================================
|
|
785
|
+
async getStats() {
|
|
786
|
+
const statsJson = this.native.getStats();
|
|
787
|
+
const stats = JSON.parse(statsJson);
|
|
788
|
+
return {
|
|
789
|
+
totalEntries: stats.totalEntries || 0,
|
|
790
|
+
totalIds: 0,
|
|
791
|
+
fieldsIndexed: stats.fieldsIndexed || [],
|
|
792
|
+
lastRebuild: Date.now(),
|
|
793
|
+
indexSize: (stats.totalEntries || 0) * 100
|
|
794
|
+
};
|
|
795
|
+
}
|
|
796
|
+
// ==========================================================================
|
|
797
|
+
// Validation
|
|
798
|
+
// ==========================================================================
|
|
799
|
+
async validateConsistency() {
|
|
800
|
+
const entityCount = this.native.entityIdMapperSize();
|
|
801
|
+
if (entityCount === 0) {
|
|
802
|
+
return {
|
|
803
|
+
healthy: true,
|
|
804
|
+
avgEntriesPerEntity: 0,
|
|
805
|
+
entityCount: 0,
|
|
806
|
+
indexEntryCount: 0,
|
|
807
|
+
recommendation: null
|
|
808
|
+
};
|
|
809
|
+
}
|
|
810
|
+
// Load all fields to count entries
|
|
811
|
+
for (const field of this.knownFields) {
|
|
812
|
+
await this.ensureFieldLoaded(field);
|
|
813
|
+
}
|
|
814
|
+
const statsJson = this.native.getStats();
|
|
815
|
+
const stats = JSON.parse(statsJson);
|
|
816
|
+
const indexEntryCount = stats.totalEntries || 0;
|
|
817
|
+
const avgEntriesPerEntity = indexEntryCount / entityCount;
|
|
818
|
+
const CORRUPTION_THRESHOLD = 100;
|
|
819
|
+
const healthy = avgEntriesPerEntity <= CORRUPTION_THRESHOLD;
|
|
820
|
+
let recommendation = null;
|
|
821
|
+
if (!healthy) {
|
|
822
|
+
recommendation = `Index corruption detected (${avgEntriesPerEntity.toFixed(1)} avg entries/entity, expected ~30). ` +
|
|
823
|
+
`Run brain.index.clearAllIndexData() followed by brain.index.rebuild() to repair.`;
|
|
824
|
+
}
|
|
825
|
+
return { healthy, avgEntriesPerEntity, entityCount, indexEntryCount, recommendation };
|
|
826
|
+
}
|
|
827
|
+
async detectAndRepairCorruption() {
|
|
828
|
+
const validation = await this.validateConsistency();
|
|
829
|
+
if (!validation.healthy) {
|
|
830
|
+
prodLog.warn(`Index corruption detected (${validation.avgEntriesPerEntity.toFixed(1)} avg entries/entity)`);
|
|
831
|
+
prodLog.warn('Auto-rebuilding index to repair...');
|
|
832
|
+
await this.clearAllIndexData();
|
|
833
|
+
await this.rebuild();
|
|
834
|
+
const postRebuild = await this.validateConsistency();
|
|
835
|
+
if (postRebuild.healthy) {
|
|
836
|
+
prodLog.info(`Index rebuilt successfully (${postRebuild.avgEntriesPerEntity.toFixed(1)} avg entries/entity)`);
|
|
837
|
+
}
|
|
838
|
+
else {
|
|
839
|
+
prodLog.error(`Index still appears corrupted after rebuild (${postRebuild.avgEntriesPerEntity.toFixed(1)} avg entries/entity).`);
|
|
840
|
+
}
|
|
841
|
+
}
|
|
842
|
+
}
|
|
843
|
+
// ==========================================================================
|
|
844
|
+
// Rebuild
|
|
845
|
+
// ==========================================================================
|
|
846
|
+
async rebuild() {
|
|
847
|
+
if (this.isRebuilding)
|
|
848
|
+
return;
|
|
849
|
+
this.isRebuilding = true;
|
|
850
|
+
try {
|
|
851
|
+
prodLog.info('Starting metadata index rebuild with batch processing...');
|
|
852
|
+
// Clear in-memory state
|
|
853
|
+
this.native.clear();
|
|
854
|
+
this.loadedFields.clear();
|
|
855
|
+
this.knownFields.clear();
|
|
856
|
+
this.dirtyFields.clear();
|
|
857
|
+
this.totalEntitiesByType.clear();
|
|
858
|
+
this.entityCountsByTypeFixed.fill(0);
|
|
859
|
+
this.verbCountsByTypeFixed.fill(0);
|
|
860
|
+
this.typeFieldAffinity.clear();
|
|
861
|
+
this.unifiedCache.clear('metadata');
|
|
862
|
+
// Delete existing chunk files from storage
|
|
863
|
+
prodLog.info('Clearing existing metadata index chunks from storage...');
|
|
864
|
+
const existingFields = await this.getPersistedFieldList();
|
|
865
|
+
if (existingFields.length > 0) {
|
|
866
|
+
for (const field of existingFields) {
|
|
867
|
+
await this.deleteFieldChunks(field);
|
|
868
|
+
}
|
|
869
|
+
try {
|
|
870
|
+
await this.storage.saveMetadata('__metadata_field_registry__', null);
|
|
871
|
+
}
|
|
872
|
+
catch { }
|
|
873
|
+
prodLog.info(`Cleared ${existingFields.length} field indexes from storage`);
|
|
874
|
+
}
|
|
875
|
+
// Clear entity ID mapper in storage
|
|
876
|
+
try {
|
|
877
|
+
await this.storage.saveMetadata('brainy:entityIdMapper', null);
|
|
878
|
+
}
|
|
879
|
+
catch { }
|
|
880
|
+
// Adaptive rebuild strategy
|
|
881
|
+
const storageType = this.storage.constructor.name;
|
|
882
|
+
const isLocalStorage = storageType === 'FileSystemStorage' || storageType === 'MemoryStorage';
|
|
883
|
+
let totalNounsProcessed = 0;
|
|
884
|
+
if (isLocalStorage) {
|
|
885
|
+
const result = await this.storage.getNouns({
|
|
886
|
+
pagination: { offset: 0, limit: 1000000 }
|
|
887
|
+
});
|
|
888
|
+
prodLog.info(`Loading ${result.items.length} nouns with metadata...`);
|
|
889
|
+
const nounIds = result.items.map(noun => noun.id);
|
|
890
|
+
let metadataBatch;
|
|
891
|
+
if (this.storage.getMetadataBatch) {
|
|
892
|
+
metadataBatch = await this.storage.getMetadataBatch(nounIds);
|
|
893
|
+
}
|
|
894
|
+
else {
|
|
895
|
+
metadataBatch = new Map();
|
|
896
|
+
for (const id of nounIds) {
|
|
897
|
+
try {
|
|
898
|
+
const metadata = await this.storage.getNounMetadata(id);
|
|
899
|
+
if (metadata)
|
|
900
|
+
metadataBatch.set(id, metadata);
|
|
901
|
+
}
|
|
902
|
+
catch { }
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
let localCount = 0;
|
|
906
|
+
for (const noun of result.items) {
|
|
907
|
+
const metadata = metadataBatch.get(noun.id);
|
|
908
|
+
if (metadata) {
|
|
909
|
+
await this.addToIndex(noun.id, metadata, true, true);
|
|
910
|
+
localCount++;
|
|
911
|
+
if (localCount % 5000 === 0) {
|
|
912
|
+
await this.flushRebuildDirty();
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
totalNounsProcessed = result.items.length;
|
|
917
|
+
prodLog.info(`Indexed ${totalNounsProcessed} nouns`);
|
|
918
|
+
}
|
|
919
|
+
else {
|
|
920
|
+
// Cloud storage: conservative batching
|
|
921
|
+
const nounLimit = 25;
|
|
922
|
+
let nounOffset = 0;
|
|
923
|
+
let hasMoreNouns = true;
|
|
924
|
+
let consecutiveEmptyBatches = 0;
|
|
925
|
+
const MAX_ITERATIONS = 10000;
|
|
926
|
+
let iterations = 0;
|
|
927
|
+
while (hasMoreNouns && iterations < MAX_ITERATIONS) {
|
|
928
|
+
iterations++;
|
|
929
|
+
const result = await this.storage.getNouns({
|
|
930
|
+
pagination: { offset: nounOffset, limit: nounLimit }
|
|
931
|
+
});
|
|
932
|
+
if (result.items.length === 0) {
|
|
933
|
+
consecutiveEmptyBatches++;
|
|
934
|
+
if (consecutiveEmptyBatches >= 3)
|
|
935
|
+
break;
|
|
936
|
+
if (result.hasMore) {
|
|
937
|
+
hasMoreNouns = false;
|
|
938
|
+
break;
|
|
939
|
+
}
|
|
940
|
+
}
|
|
941
|
+
else {
|
|
942
|
+
consecutiveEmptyBatches = 0;
|
|
943
|
+
}
|
|
944
|
+
const nounIds = result.items.map(noun => noun.id);
|
|
945
|
+
let metadataBatch;
|
|
946
|
+
if (this.storage.getMetadataBatch) {
|
|
947
|
+
metadataBatch = await this.storage.getMetadataBatch(nounIds);
|
|
948
|
+
}
|
|
949
|
+
else {
|
|
950
|
+
metadataBatch = new Map();
|
|
951
|
+
for (const id of nounIds) {
|
|
952
|
+
try {
|
|
953
|
+
const metadata = await this.storage.getNounMetadata(id);
|
|
954
|
+
if (metadata)
|
|
955
|
+
metadataBatch.set(id, metadata);
|
|
956
|
+
}
|
|
957
|
+
catch { }
|
|
958
|
+
}
|
|
959
|
+
}
|
|
960
|
+
for (const noun of result.items) {
|
|
961
|
+
const metadata = metadataBatch.get(noun.id);
|
|
962
|
+
if (metadata) {
|
|
963
|
+
await this.addToIndex(noun.id, metadata, true, true);
|
|
964
|
+
}
|
|
965
|
+
}
|
|
966
|
+
totalNounsProcessed += result.items.length;
|
|
967
|
+
if (totalNounsProcessed % 5000 === 0) {
|
|
968
|
+
await this.flushRebuildDirty();
|
|
969
|
+
}
|
|
970
|
+
hasMoreNouns = result.hasMore;
|
|
971
|
+
nounOffset += nounLimit;
|
|
972
|
+
await this.yieldToEventLoop();
|
|
973
|
+
}
|
|
974
|
+
}
|
|
975
|
+
// Rebuild verb metadata indexes
|
|
976
|
+
let totalVerbsProcessed = 0;
|
|
977
|
+
if (isLocalStorage) {
|
|
978
|
+
const result = await this.storage.getVerbs({
|
|
979
|
+
pagination: { offset: 0, limit: 1000000 }
|
|
980
|
+
});
|
|
981
|
+
const verbIds = result.items.map(verb => verb.id);
|
|
982
|
+
let verbMetadataBatch;
|
|
983
|
+
if (this.storage.getVerbMetadataBatch) {
|
|
984
|
+
verbMetadataBatch = await this.storage.getVerbMetadataBatch(verbIds);
|
|
985
|
+
}
|
|
986
|
+
else {
|
|
987
|
+
verbMetadataBatch = new Map();
|
|
988
|
+
for (const id of verbIds) {
|
|
989
|
+
try {
|
|
990
|
+
const metadata = await this.storage.getVerbMetadata(id);
|
|
991
|
+
if (metadata)
|
|
992
|
+
verbMetadataBatch.set(id, metadata);
|
|
993
|
+
}
|
|
994
|
+
catch { }
|
|
995
|
+
}
|
|
996
|
+
}
|
|
997
|
+
let verbLocalCount = 0;
|
|
998
|
+
for (const verb of result.items) {
|
|
999
|
+
const metadata = verbMetadataBatch.get(verb.id);
|
|
1000
|
+
if (metadata) {
|
|
1001
|
+
await this.addToIndex(verb.id, metadata, true, true);
|
|
1002
|
+
verbLocalCount++;
|
|
1003
|
+
if (verbLocalCount % 5000 === 0) {
|
|
1004
|
+
await this.flushRebuildDirty();
|
|
1005
|
+
}
|
|
1006
|
+
}
|
|
1007
|
+
}
|
|
1008
|
+
totalVerbsProcessed = result.items.length;
|
|
1009
|
+
}
|
|
1010
|
+
else {
|
|
1011
|
+
let verbOffset = 0;
|
|
1012
|
+
const verbLimit = 25;
|
|
1013
|
+
let hasMoreVerbs = true;
|
|
1014
|
+
let consecutiveEmptyVerbBatches = 0;
|
|
1015
|
+
let verbIterations = 0;
|
|
1016
|
+
const MAX_ITERATIONS = 10000;
|
|
1017
|
+
while (hasMoreVerbs && verbIterations < MAX_ITERATIONS) {
|
|
1018
|
+
verbIterations++;
|
|
1019
|
+
const result = await this.storage.getVerbs({
|
|
1020
|
+
pagination: { offset: verbOffset, limit: verbLimit }
|
|
1021
|
+
});
|
|
1022
|
+
if (result.items.length === 0) {
|
|
1023
|
+
consecutiveEmptyVerbBatches++;
|
|
1024
|
+
if (consecutiveEmptyVerbBatches >= 3)
|
|
1025
|
+
break;
|
|
1026
|
+
if (result.hasMore) {
|
|
1027
|
+
hasMoreVerbs = false;
|
|
1028
|
+
break;
|
|
1029
|
+
}
|
|
1030
|
+
}
|
|
1031
|
+
else {
|
|
1032
|
+
consecutiveEmptyVerbBatches = 0;
|
|
1033
|
+
}
|
|
1034
|
+
const verbIds = result.items.map(verb => verb.id);
|
|
1035
|
+
let verbMetadataBatch;
|
|
1036
|
+
if (this.storage.getVerbMetadataBatch) {
|
|
1037
|
+
verbMetadataBatch = await this.storage.getVerbMetadataBatch(verbIds);
|
|
1038
|
+
}
|
|
1039
|
+
else {
|
|
1040
|
+
verbMetadataBatch = new Map();
|
|
1041
|
+
for (const id of verbIds) {
|
|
1042
|
+
try {
|
|
1043
|
+
const metadata = await this.storage.getVerbMetadata(id);
|
|
1044
|
+
if (metadata)
|
|
1045
|
+
verbMetadataBatch.set(id, metadata);
|
|
1046
|
+
}
|
|
1047
|
+
catch { }
|
|
1048
|
+
}
|
|
1049
|
+
}
|
|
1050
|
+
for (const verb of result.items) {
|
|
1051
|
+
const metadata = verbMetadataBatch.get(verb.id);
|
|
1052
|
+
if (metadata) {
|
|
1053
|
+
await this.addToIndex(verb.id, metadata, true, true);
|
|
1054
|
+
}
|
|
1055
|
+
}
|
|
1056
|
+
totalVerbsProcessed += result.items.length;
|
|
1057
|
+
if (totalVerbsProcessed % 5000 === 0) {
|
|
1058
|
+
await this.flushRebuildDirty();
|
|
1059
|
+
}
|
|
1060
|
+
hasMoreVerbs = result.hasMore;
|
|
1061
|
+
verbOffset += verbLimit;
|
|
1062
|
+
await this.yieldToEventLoop();
|
|
1063
|
+
}
|
|
1064
|
+
}
|
|
1065
|
+
// Final flush
|
|
1066
|
+
await this.flushRebuildDirty();
|
|
1067
|
+
await this.flush();
|
|
1068
|
+
prodLog.info(`Metadata index rebuild completed! Processed ${totalNounsProcessed} nouns and ${totalVerbsProcessed} verbs`);
|
|
1069
|
+
}
|
|
1070
|
+
finally {
|
|
1071
|
+
this.isRebuilding = false;
|
|
1072
|
+
}
|
|
1073
|
+
}
|
|
1074
|
+
/**
|
|
1075
|
+
* During rebuild, persist all dirty data accumulated so far.
|
|
1076
|
+
*/
|
|
1077
|
+
async flushRebuildDirty() {
|
|
1078
|
+
// Save all dirty data currently in Rust
|
|
1079
|
+
const promises = [];
|
|
1080
|
+
// Save all loaded sparse indices
|
|
1081
|
+
for (const field of this.loadedFields) {
|
|
1082
|
+
const json = this.native.saveSparseIndex(field);
|
|
1083
|
+
if (json) {
|
|
1084
|
+
const indexPath = `__sparse_index__${field}`;
|
|
1085
|
+
promises.push(this.storage.saveMetadata(indexPath, JSON.parse(json)));
|
|
1086
|
+
}
|
|
1087
|
+
// Save all chunks for this field
|
|
1088
|
+
const chunkIds = this.native.getSparseIndexChunkIds(field);
|
|
1089
|
+
for (const chunkId of chunkIds) {
|
|
1090
|
+
const chunkJson = this.native.saveChunk(field, chunkId);
|
|
1091
|
+
if (chunkJson) {
|
|
1092
|
+
const chunkPath = `__chunk__${field}_${chunkId}`;
|
|
1093
|
+
promises.push(this.storage.saveMetadata(chunkPath, JSON.parse(chunkJson)));
|
|
1094
|
+
}
|
|
1095
|
+
}
|
|
1096
|
+
}
|
|
1097
|
+
// Save entity ID mapper
|
|
1098
|
+
promises.push(this.saveEntityIdMapper());
|
|
1099
|
+
await Promise.all(promises);
|
|
1100
|
+
}
|
|
1101
|
+
// ==========================================================================
|
|
1102
|
+
// Clear
|
|
1103
|
+
// ==========================================================================
|
|
1104
|
+
async clearAllIndexData() {
|
|
1105
|
+
prodLog.warn('Clearing ALL metadata index data from storage...');
|
|
1106
|
+
const fields = await this.getPersistedFieldList();
|
|
1107
|
+
for (const field of fields) {
|
|
1108
|
+
await this.deleteFieldChunks(field);
|
|
1109
|
+
}
|
|
1110
|
+
try {
|
|
1111
|
+
await this.storage.saveMetadata('__metadata_field_registry__', null);
|
|
1112
|
+
}
|
|
1113
|
+
catch { }
|
|
1114
|
+
// Clear in-memory state
|
|
1115
|
+
this.native.clear();
|
|
1116
|
+
this.loadedFields.clear();
|
|
1117
|
+
this.knownFields.clear();
|
|
1118
|
+
this.dirtyFields.clear();
|
|
1119
|
+
this.totalEntitiesByType.clear();
|
|
1120
|
+
this.entityCountsByTypeFixed.fill(0);
|
|
1121
|
+
this.verbCountsByTypeFixed.fill(0);
|
|
1122
|
+
this.typeFieldAffinity.clear();
|
|
1123
|
+
this.unifiedCache.clear('metadata');
|
|
1124
|
+
// Clear entity ID mapper in storage
|
|
1125
|
+
try {
|
|
1126
|
+
await this.storage.saveMetadata('brainy:entityIdMapper', null);
|
|
1127
|
+
}
|
|
1128
|
+
catch { }
|
|
1129
|
+
prodLog.info(`Cleared ${fields.length} field indexes and all in-memory state`);
|
|
1130
|
+
}
|
|
1131
|
+
async getPersistedFieldList() {
|
|
1132
|
+
try {
|
|
1133
|
+
const registry = await this.storage.getMetadata('__metadata_field_registry__');
|
|
1134
|
+
if (!registry?.fields || !Array.isArray(registry.fields))
|
|
1135
|
+
return [];
|
|
1136
|
+
return registry.fields.filter((f) => typeof f === 'string' && f.length > 0);
|
|
1137
|
+
}
|
|
1138
|
+
catch {
|
|
1139
|
+
return [];
|
|
1140
|
+
}
|
|
1141
|
+
}
|
|
1142
|
+
async deleteFieldChunks(field) {
|
|
1143
|
+
try {
|
|
1144
|
+
const indexPath = `__sparse_index__${field}`;
|
|
1145
|
+
const sparseData = await this.storage.getMetadata(indexPath);
|
|
1146
|
+
if (sparseData) {
|
|
1147
|
+
// Parse chunk IDs from sparse index
|
|
1148
|
+
let chunkIds = [];
|
|
1149
|
+
if (sparseData.chunks && Array.isArray(sparseData.chunks)) {
|
|
1150
|
+
chunkIds = sparseData.chunks.map((c) => c.chunkId).filter((id) => typeof id === 'number');
|
|
1151
|
+
}
|
|
1152
|
+
// Delete all chunk files
|
|
1153
|
+
for (const chunkId of chunkIds) {
|
|
1154
|
+
const chunkPath = `__chunk__${field}_${chunkId}`;
|
|
1155
|
+
try {
|
|
1156
|
+
await this.storage.saveMetadata(chunkPath, null);
|
|
1157
|
+
}
|
|
1158
|
+
catch { }
|
|
1159
|
+
}
|
|
1160
|
+
// Delete sparse index
|
|
1161
|
+
try {
|
|
1162
|
+
await this.storage.saveMetadata(indexPath, null);
|
|
1163
|
+
}
|
|
1164
|
+
catch { }
|
|
1165
|
+
}
|
|
1166
|
+
// Delete field index
|
|
1167
|
+
try {
|
|
1168
|
+
await this.storage.saveMetadata(`__metadata_field_index__field_${field}`, null);
|
|
1169
|
+
}
|
|
1170
|
+
catch { }
|
|
1171
|
+
}
|
|
1172
|
+
catch {
|
|
1173
|
+
prodLog.debug(`Could not clear chunks for field '${field}'`);
|
|
1174
|
+
}
|
|
1175
|
+
}
|
|
1176
|
+
// ==========================================================================
|
|
1177
|
+
// Field statistics (TS-only analytics)
|
|
1178
|
+
// ==========================================================================
|
|
1179
|
+
async getFieldStatistics() {
|
|
1180
|
+
for (const field of this.knownFields) {
|
|
1181
|
+
if (!this.fieldStats.has(field)) {
|
|
1182
|
+
this.fieldStats.set(field, {
|
|
1183
|
+
cardinality: {
|
|
1184
|
+
uniqueValues: 0,
|
|
1185
|
+
totalValues: 0,
|
|
1186
|
+
distribution: 'uniform',
|
|
1187
|
+
updateFrequency: 0,
|
|
1188
|
+
lastAnalyzed: Date.now()
|
|
1189
|
+
},
|
|
1190
|
+
queryCount: 0,
|
|
1191
|
+
rangeQueryCount: 0,
|
|
1192
|
+
exactQueryCount: 0,
|
|
1193
|
+
avgQueryTime: 0,
|
|
1194
|
+
indexType: 'hash'
|
|
1195
|
+
});
|
|
1196
|
+
}
|
|
1197
|
+
}
|
|
1198
|
+
return new Map(this.fieldStats);
|
|
1199
|
+
}
|
|
1200
|
+
async getFieldCardinality(field) {
|
|
1201
|
+
const stats = this.fieldStats.get(field);
|
|
1202
|
+
return stats ? stats.cardinality : null;
|
|
1203
|
+
}
|
|
1204
|
+
async getFieldsWithCardinality() {
|
|
1205
|
+
const fields = [];
|
|
1206
|
+
for (const [field, stats] of this.fieldStats) {
|
|
1207
|
+
fields.push({
|
|
1208
|
+
field,
|
|
1209
|
+
cardinality: stats.cardinality.uniqueValues,
|
|
1210
|
+
distribution: stats.cardinality.distribution
|
|
1211
|
+
});
|
|
1212
|
+
}
|
|
1213
|
+
fields.sort((a, b) => a.cardinality - b.cardinality);
|
|
1214
|
+
return fields;
|
|
1215
|
+
}
|
|
1216
|
+
async getOptimalQueryPlan(filters) {
|
|
1217
|
+
const fieldOrder = [];
|
|
1218
|
+
let hasRangeQueries = false;
|
|
1219
|
+
let totalEstimatedCost = 0;
|
|
1220
|
+
for (const [field, value] of Object.entries(filters)) {
|
|
1221
|
+
const stats = this.fieldStats.get(field);
|
|
1222
|
+
if (!stats)
|
|
1223
|
+
continue;
|
|
1224
|
+
if (typeof value === 'object' && value !== null && !Array.isArray(value)) {
|
|
1225
|
+
hasRangeQueries = true;
|
|
1226
|
+
}
|
|
1227
|
+
const cardinality = stats.cardinality.uniqueValues;
|
|
1228
|
+
totalEstimatedCost += Math.log2(Math.max(1, cardinality));
|
|
1229
|
+
fieldOrder.push(field);
|
|
1230
|
+
}
|
|
1231
|
+
fieldOrder.sort((a, b) => {
|
|
1232
|
+
const statsA = this.fieldStats.get(a);
|
|
1233
|
+
const statsB = this.fieldStats.get(b);
|
|
1234
|
+
if (!statsA || !statsB)
|
|
1235
|
+
return 0;
|
|
1236
|
+
return statsA.cardinality.uniqueValues - statsB.cardinality.uniqueValues;
|
|
1237
|
+
});
|
|
1238
|
+
return {
|
|
1239
|
+
strategy: hasRangeQueries ? 'hybrid' : 'exact',
|
|
1240
|
+
fieldOrder,
|
|
1241
|
+
estimatedCost: totalEstimatedCost
|
|
1242
|
+
};
|
|
1243
|
+
}
|
|
1244
|
+
async getFieldsForType(nounType) {
|
|
1245
|
+
const typeFields = this.typeFieldAffinity.get(nounType);
|
|
1246
|
+
const totalEntities = this.totalEntitiesByType.get(nounType);
|
|
1247
|
+
if (!typeFields || !totalEntities)
|
|
1248
|
+
return [];
|
|
1249
|
+
const result = [];
|
|
1250
|
+
for (const [field, count] of typeFields.entries()) {
|
|
1251
|
+
result.push({
|
|
1252
|
+
field,
|
|
1253
|
+
affinity: count / totalEntities,
|
|
1254
|
+
occurrences: count,
|
|
1255
|
+
totalEntities
|
|
1256
|
+
});
|
|
1257
|
+
}
|
|
1258
|
+
result.sort((a, b) => b.affinity - a.affinity);
|
|
1259
|
+
return result;
|
|
1260
|
+
}
|
|
1261
|
+
// ==========================================================================
|
|
1262
|
+
// Deprecated / compatibility
|
|
1263
|
+
// ==========================================================================
|
|
1264
|
+
async getIdsForCriteria(criteria) {
|
|
1265
|
+
return this.getIdsForFilter(criteria);
|
|
1266
|
+
}
|
|
1267
|
+
// ==========================================================================
|
|
1268
|
+
// Private helpers
|
|
1269
|
+
// ==========================================================================
|
|
1270
|
+
async yieldToEventLoop() {
|
|
1271
|
+
return new Promise(resolve => setImmediate(resolve));
|
|
1272
|
+
}
|
|
1273
|
+
}
|
|
1274
|
+
//# sourceMappingURL=NativeMetadataIndex.js.map
|