@soulcraft/cortex 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/LICENSE +16 -0
  2. package/README.md +125 -0
  3. package/dist/graph/NativeGraphAdjacencyIndex.d.ts +92 -0
  4. package/dist/graph/NativeGraphAdjacencyIndex.js +671 -0
  5. package/dist/index.d.ts +22 -0
  6. package/dist/index.js +23 -0
  7. package/dist/license.d.ts +18 -0
  8. package/dist/license.js +172 -0
  9. package/dist/native/NativeEmbeddingEngine.d.ts +79 -0
  10. package/dist/native/NativeEmbeddingEngine.js +302 -0
  11. package/dist/native/NativeRoaringBitmap32.d.ts +114 -0
  12. package/dist/native/NativeRoaringBitmap32.js +221 -0
  13. package/dist/native/ffi.d.ts +20 -0
  14. package/dist/native/ffi.js +48 -0
  15. package/dist/native/index.d.ts +30 -0
  16. package/dist/native/index.js +58 -0
  17. package/dist/native/napi.d.ts +21 -0
  18. package/dist/native/napi.js +88 -0
  19. package/dist/native/types.d.ts +710 -0
  20. package/dist/native/types.js +16 -0
  21. package/dist/plugin.d.ts +22 -0
  22. package/dist/plugin.js +115 -0
  23. package/dist/storage/mmapFileSystemStorage.d.ts +24 -0
  24. package/dist/storage/mmapFileSystemStorage.js +73 -0
  25. package/dist/utils/NativeMetadataIndex.d.ts +185 -0
  26. package/dist/utils/NativeMetadataIndex.js +1274 -0
  27. package/dist/utils/nativeEntityIdMapper.d.ts +84 -0
  28. package/dist/utils/nativeEntityIdMapper.js +134 -0
  29. package/native/brainy-native.darwin-arm64.node +0 -0
  30. package/native/brainy-native.darwin-x64.node +0 -0
  31. package/native/brainy-native.linux-arm64-gnu.node +0 -0
  32. package/native/brainy-native.linux-x64-gnu.node +0 -0
  33. package/native/brainy-native.win32-x64-msvc.node +0 -0
  34. package/native/index.d.ts +1068 -0
  35. package/package.json +66 -0
@@ -0,0 +1,1274 @@
1
+ /**
2
+ * NativeMetadataIndex — TypeScript wrapper around the Rust NativeMetadataIndex.
3
+ *
4
+ * Implements the same public API as the old MetadataIndexManager (3,721 lines),
5
+ * delegating core operations (query, mutation, normalization) to Rust.
6
+ *
7
+ * Architecture:
8
+ * - Rust owns: bitmap operations, filter evaluation, value normalization,
9
+ * field extraction, chunk management, text search, entity ID mapping
10
+ * - TS owns: async storage I/O, lazy field loading, rebuild orchestration,
11
+ * flush, getSortedIdsForFilter (loads sort values from storage)
12
+ *
13
+ * Buffer exchange pattern: TS loads data from storage, passes JSON to Rust.
14
+ * Rust operates in-memory, returns serialized state for TS to persist.
15
+ */
16
+ import { prodLog, getGlobalCache, FieldTypeInference } from '@soulcraft/brainy/internals';
17
+ import { TypeUtils, NOUN_TYPE_COUNT, VERB_TYPE_COUNT } from '@soulcraft/brainy/types/graphTypes';
18
+ import { loadNativeModule } from '../native/index.js';
19
+ /**
20
+ * MetadataIndexManager — native Rust implementation with TS storage bridge.
21
+ *
22
+ * Drop-in replacement for the old pure-TS MetadataIndexManager.
23
+ * All bitmap AND/OR/NOT operations execute in Rust without crossing FFI.
24
+ */
25
+ export class MetadataIndexManager {
26
+ storage;
27
+ config;
28
+ native;
29
+ isRebuilding = false;
30
+ lastFlushTime = Date.now();
31
+ autoFlushThreshold = 10;
32
+ dirtyFields = new Set();
33
+ // Lazy field loading: tracks which fields have been loaded into Rust
34
+ loadedFields = new Set();
35
+ // Tracks which field indexes are known (from field registry)
36
+ knownFields = new Set();
37
+ // TS-only analytics (not in Rust)
38
+ fieldStats = new Map();
39
+ typeFieldAffinity = new Map();
40
+ totalEntitiesByType = new Map();
41
+ entityCountsByTypeFixed = new Uint32Array(NOUN_TYPE_COUNT);
42
+ verbCountsByTypeFixed = new Uint32Array(VERB_TYPE_COUNT);
43
+ // Unified cache for coordinated memory management
44
+ unifiedCache;
45
+ // Field Type Inference
46
+ fieldTypeInference;
47
+ constructor(storage, config = {}) {
48
+ this.storage = storage;
49
+ this.config = {
50
+ maxIndexSize: config.maxIndexSize ?? 10000,
51
+ rebuildThreshold: config.rebuildThreshold ?? 0.1,
52
+ autoOptimize: config.autoOptimize ?? true,
53
+ indexedFields: config.indexedFields ?? [],
54
+ excludeFields: config.excludeFields ?? [
55
+ 'embedding', 'vector', 'embeddings', 'vectors',
56
+ 'content', 'data', 'originalData', '_data',
57
+ 'id'
58
+ ]
59
+ };
60
+ // Build Rust config JSON
61
+ const nativeConfig = {};
62
+ if (this.config.excludeFields.length > 0) {
63
+ nativeConfig.excludeFields = this.config.excludeFields;
64
+ }
65
+ if (this.config.indexedFields.length > 0) {
66
+ nativeConfig.indexedFields = this.config.indexedFields;
67
+ }
68
+ const bindings = loadNativeModule();
69
+ this.native = new bindings.NativeMetadataIndex(Object.keys(nativeConfig).length > 0 ? JSON.stringify(nativeConfig) : null);
70
+ this.unifiedCache = getGlobalCache();
71
+ this.fieldTypeInference = new FieldTypeInference(storage);
72
+ }
73
+ // ==========================================================================
74
+ // Initialization
75
+ // ==========================================================================
76
+ async init() {
77
+ // Load field registry to discover persisted indices
78
+ await this.loadFieldRegistry();
79
+ // Load entity ID mapper
80
+ await this.loadEntityIdMapper();
81
+ const hasFields = this.knownFields.size > 0;
82
+ if (hasFields) {
83
+ await this.warmCache();
84
+ await this.lazyLoadCounts();
85
+ this.syncTypeCountsToFixed();
86
+ }
87
+ }
88
+ // ==========================================================================
89
+ // Storage I/O helpers
90
+ // ==========================================================================
91
+ async loadEntityIdMapper() {
92
+ try {
93
+ const data = await this.storage.getMetadata('brainy:entityIdMapper');
94
+ if (data && data.nextId !== undefined) {
95
+ this.native.loadEntityIdMapper(JSON.stringify(data));
96
+ }
97
+ }
98
+ catch {
99
+ // First time — mapper starts empty
100
+ }
101
+ }
102
+ async saveEntityIdMapper() {
103
+ if (!this.native.isEntityIdMapperDirty())
104
+ return;
105
+ const json = this.native.saveEntityIdMapper();
106
+ const data = JSON.parse(json);
107
+ await this.storage.saveMetadata('brainy:entityIdMapper', data);
108
+ // Reload to clear dirty flag
109
+ this.native.loadEntityIdMapper(json);
110
+ }
111
+ async loadFieldRegistry() {
112
+ try {
113
+ const registry = await this.storage.getMetadata('__metadata_field_registry__');
114
+ if (!registry?.fields || !Array.isArray(registry.fields)) {
115
+ prodLog.debug('No field registry found - will build on first flush');
116
+ return;
117
+ }
118
+ // Load into Rust
119
+ this.native.loadFieldRegistry(JSON.stringify(registry));
120
+ // Track known fields
121
+ for (const field of registry.fields) {
122
+ if (typeof field === 'string' && field.length > 0) {
123
+ this.knownFields.add(field);
124
+ }
125
+ }
126
+ prodLog.info(`Loaded field registry: ${registry.fields.length} persisted fields discovered\n` +
127
+ ` Fields: ${registry.fields.slice(0, 5).join(', ')}${registry.fields.length > 5 ? '...' : ''}`);
128
+ }
129
+ catch (error) {
130
+ prodLog.debug('Could not load field registry:', error);
131
+ }
132
+ }
133
+ async saveFieldRegistry() {
134
+ if (this.knownFields.size === 0)
135
+ return;
136
+ try {
137
+ const json = this.native.saveFieldRegistry();
138
+ const data = JSON.parse(json);
139
+ await this.storage.saveMetadata('__metadata_field_registry__', data);
140
+ }
141
+ catch (error) {
142
+ prodLog.warn('Failed to save field registry:', error);
143
+ }
144
+ }
145
+ /**
146
+ * Ensure a field's sparse index + all chunks are loaded into Rust.
147
+ * Matches the lazy-loading pattern of the old UnifiedCache approach.
148
+ */
149
+ async ensureFieldLoaded(field) {
150
+ if (this.loadedFields.has(field))
151
+ return;
152
+ const indexPath = `__sparse_index__${field}`;
153
+ try {
154
+ const sparseData = await this.storage.getMetadata(indexPath);
155
+ if (sparseData) {
156
+ this.native.loadSparseIndex(field, JSON.stringify(sparseData));
157
+ const chunkIds = this.native.getSparseIndexChunkIds(field);
158
+ // Load all chunks in parallel
159
+ await Promise.all(chunkIds.map(async (chunkId) => {
160
+ if (!this.native.isChunkLoaded(field, chunkId)) {
161
+ const chunkPath = `__chunk__${field}_${chunkId}`;
162
+ const chunkData = await this.storage.getMetadata(chunkPath);
163
+ if (chunkData) {
164
+ this.native.loadChunk(field, chunkId, JSON.stringify(chunkData));
165
+ }
166
+ }
167
+ }));
168
+ }
169
+ }
170
+ catch (error) {
171
+ prodLog.debug(`Failed to load field '${field}':`, error);
172
+ }
173
+ this.loadedFields.add(field);
174
+ }
175
+ /**
176
+ * Ensure multiple fields are loaded (parallel).
177
+ */
178
+ async ensureFieldsLoaded(fields) {
179
+ const unloaded = fields.filter(f => !this.loadedFields.has(f));
180
+ if (unloaded.length === 0)
181
+ return;
182
+ await Promise.all(unloaded.map(f => this.ensureFieldLoaded(f)));
183
+ }
184
+ /**
185
+ * Extract filter field names for lazy loading.
186
+ */
187
+ extractFilterFields(filter) {
188
+ if (!filter || typeof filter !== 'object')
189
+ return [];
190
+ const fields = [];
191
+ if (filter.allOf && Array.isArray(filter.allOf)) {
192
+ for (const sub of filter.allOf) {
193
+ fields.push(...this.extractFilterFields(sub));
194
+ }
195
+ }
196
+ if (filter.anyOf && Array.isArray(filter.anyOf)) {
197
+ for (const sub of filter.anyOf) {
198
+ fields.push(...this.extractFilterFields(sub));
199
+ }
200
+ }
201
+ for (const key of Object.keys(filter)) {
202
+ if (key !== 'allOf' && key !== 'anyOf' && key !== 'not') {
203
+ fields.push(key);
204
+ }
205
+ }
206
+ return [...new Set(fields)];
207
+ }
208
+ // ==========================================================================
209
+ // Dirty data persistence
210
+ // ==========================================================================
211
+ /**
212
+ * Persist dirty chunks, sparse indices, field indexes from a MutationResult.
213
+ */
214
+ async persistMutationResult(result) {
215
+ const promises = [];
216
+ // Save dirty chunks
217
+ for (const { field, chunkId } of result.dirtyChunks) {
218
+ const json = this.native.saveChunk(field, chunkId);
219
+ if (json) {
220
+ const chunkPath = `__chunk__${field}_${chunkId}`;
221
+ promises.push(this.storage.saveMetadata(chunkPath, JSON.parse(json)));
222
+ }
223
+ }
224
+ // Save dirty sparse indices
225
+ for (const field of result.dirtySparseIndices) {
226
+ const json = this.native.saveSparseIndex(field);
227
+ if (json) {
228
+ const indexPath = `__sparse_index__${field}`;
229
+ promises.push(this.storage.saveMetadata(indexPath, JSON.parse(json)));
230
+ }
231
+ }
232
+ // Save dirty field indexes
233
+ for (const field of result.dirtyFieldIndexes) {
234
+ const json = this.native.saveFieldIndex(field);
235
+ if (json) {
236
+ const indexId = `__metadata_field_index__field_${field}`;
237
+ promises.push(this.storage.saveMetadata(indexId, JSON.parse(json)));
238
+ }
239
+ }
240
+ // Track new fields
241
+ for (const field of result.newFields) {
242
+ this.knownFields.add(field);
243
+ this.dirtyFields.add(field);
244
+ }
245
+ if (result.dirtyFieldRegistry) {
246
+ promises.push(this.saveFieldRegistry());
247
+ }
248
+ if (result.dirtyEntityIdMapper) {
249
+ promises.push(this.saveEntityIdMapper());
250
+ }
251
+ await Promise.all(promises);
252
+ }
253
+ // ==========================================================================
254
+ // Cache warming
255
+ // ==========================================================================
256
+ async warmCache() {
257
+ const commonFields = ['noun', 'type', 'service', 'createdAt'];
258
+ await Promise.all(commonFields.map(async (field) => {
259
+ try {
260
+ await this.ensureFieldLoaded(field);
261
+ }
262
+ catch {
263
+ prodLog.debug(`Cache warming: field '${field}' not yet indexed`);
264
+ }
265
+ }));
266
+ await this.warmCacheForTopTypes(3);
267
+ }
268
+ async warmCacheForTopTypes(topN = 3) {
269
+ const topTypes = this.getTopNounTypes(topN);
270
+ if (topTypes.length === 0)
271
+ return;
272
+ for (const type of topTypes) {
273
+ const typeFields = this.typeFieldAffinity.get(type);
274
+ if (!typeFields)
275
+ continue;
276
+ const topFields = Array.from(typeFields.entries())
277
+ .sort((a, b) => b[1] - a[1])
278
+ .slice(0, 5)
279
+ .map(([field]) => field);
280
+ if (topFields.length === 0)
281
+ continue;
282
+ await Promise.all(topFields.map(async (field) => {
283
+ try {
284
+ await this.ensureFieldLoaded(field);
285
+ }
286
+ catch {
287
+ // Silently ignore
288
+ }
289
+ }));
290
+ }
291
+ }
292
+ // ==========================================================================
293
+ // Lazy load counts
294
+ // ==========================================================================
295
+ async lazyLoadCounts() {
296
+ try {
297
+ this.totalEntitiesByType.clear();
298
+ this.entityCountsByTypeFixed.fill(0);
299
+ this.verbCountsByTypeFixed.fill(0);
300
+ // Ensure 'noun' field is loaded into Rust
301
+ await this.ensureFieldLoaded('noun');
302
+ // Use Rust to get type counts via the noun field
303
+ const nounValues = this.native.getFilterValues('noun');
304
+ for (const typeName of nounValues) {
305
+ const ids = this.native.getIds('noun', JSON.stringify(typeName));
306
+ if (ids.length > 0) {
307
+ this.totalEntitiesByType.set(typeName, ids.length);
308
+ }
309
+ }
310
+ prodLog.debug(`Loaded type counts: ${this.totalEntitiesByType.size} types`);
311
+ }
312
+ catch (error) {
313
+ prodLog.debug('Could not load type counts:', error);
314
+ }
315
+ }
316
+ syncTypeCountsToFixed() {
317
+ for (let i = 0; i < NOUN_TYPE_COUNT; i++) {
318
+ const type = TypeUtils.getNounFromIndex(i);
319
+ const count = this.totalEntitiesByType.get(type) || 0;
320
+ this.entityCountsByTypeFixed[i] = count;
321
+ }
322
+ for (let i = 0; i < VERB_TYPE_COUNT; i++) {
323
+ const type = TypeUtils.getVerbFromIndex(i);
324
+ const count = this.totalEntitiesByType.get(type) || 0;
325
+ this.verbCountsByTypeFixed[i] = count;
326
+ }
327
+ }
328
+ syncTypeCountsFromFixed() {
329
+ for (let i = 0; i < NOUN_TYPE_COUNT; i++) {
330
+ const count = this.entityCountsByTypeFixed[i];
331
+ if (count > 0) {
332
+ const type = TypeUtils.getNounFromIndex(i);
333
+ this.totalEntitiesByType.set(type, count);
334
+ }
335
+ }
336
+ for (let i = 0; i < VERB_TYPE_COUNT; i++) {
337
+ const count = this.verbCountsByTypeFixed[i];
338
+ if (count > 0) {
339
+ const type = TypeUtils.getVerbFromIndex(i);
340
+ this.totalEntitiesByType.set(type, count);
341
+ }
342
+ }
343
+ }
344
+ // ==========================================================================
345
+ // Core query methods
346
+ // ==========================================================================
347
+ async getIds(field, value) {
348
+ await this.ensureFieldLoaded(field);
349
+ return this.native.getIds(field, JSON.stringify(value));
350
+ }
351
+ async getIdsForFilter(filter) {
352
+ if (!filter || Object.keys(filter).length === 0)
353
+ return [];
354
+ // Extract fields and ensure loaded
355
+ const fields = this.extractFilterFields(filter);
356
+ await this.ensureFieldsLoaded(fields);
357
+ // For ne/exists:false/missing operators, we need allIds
358
+ const needsAllIds = this.filterNeedsAllIds(filter);
359
+ let allIdsJson = null;
360
+ if (needsAllIds) {
361
+ const allIds = await this.getAllIds();
362
+ allIdsJson = JSON.stringify(allIds);
363
+ }
364
+ return this.native.getIdsForFilter(JSON.stringify(filter), allIdsJson);
365
+ }
366
+ filterNeedsAllIds(filter) {
367
+ if (!filter || typeof filter !== 'object')
368
+ return false;
369
+ if (filter.allOf && Array.isArray(filter.allOf)) {
370
+ return filter.allOf.some((sub) => this.filterNeedsAllIds(sub));
371
+ }
372
+ if (filter.anyOf && Array.isArray(filter.anyOf)) {
373
+ return filter.anyOf.some((sub) => this.filterNeedsAllIds(sub));
374
+ }
375
+ for (const [_field, condition] of Object.entries(filter)) {
376
+ if (_field === 'allOf' || _field === 'anyOf' || _field === 'not')
377
+ continue;
378
+ if (condition && typeof condition === 'object' && !Array.isArray(condition)) {
379
+ for (const op of Object.keys(condition)) {
380
+ if (op === 'ne' || op === 'isNot' || op === 'notEquals')
381
+ return true;
382
+ if (op === 'exists' && !condition[op])
383
+ return true;
384
+ if (op === 'missing' && condition[op])
385
+ return true;
386
+ }
387
+ }
388
+ }
389
+ return false;
390
+ }
391
+ async getIdsForMultipleFields(fieldValuePairs) {
392
+ if (fieldValuePairs.length === 0)
393
+ return [];
394
+ const fields = fieldValuePairs.map(p => p.field);
395
+ await this.ensureFieldsLoaded(fields);
396
+ const pairsJson = JSON.stringify(fieldValuePairs.map(p => ({
397
+ field: p.field,
398
+ value: p.value
399
+ })));
400
+ return this.native.getIdsForMultipleFields(pairsJson);
401
+ }
402
+ async getIdsForTextQuery(query) {
403
+ await this.ensureFieldLoaded('__words__');
404
+ const resultJson = this.native.getIdsForTextQuery(query);
405
+ return JSON.parse(resultJson);
406
+ }
407
+ async getSortedIdsForFilter(filter, orderBy, order = 'asc') {
408
+ const filteredIds = await this.getIdsForFilter(filter);
409
+ if (filteredIds.length === 0)
410
+ return [];
411
+ const idValuePairs = [];
412
+ for (const id of filteredIds) {
413
+ const value = await this.getFieldValueForEntity(id, orderBy);
414
+ idValuePairs.push({ id, value });
415
+ }
416
+ idValuePairs.sort((a, b) => {
417
+ if (a.value == null && b.value == null)
418
+ return 0;
419
+ if (a.value == null)
420
+ return order === 'asc' ? 1 : -1;
421
+ if (b.value == null)
422
+ return order === 'asc' ? -1 : 1;
423
+ if (a.value === b.value)
424
+ return 0;
425
+ const comparison = a.value < b.value ? -1 : 1;
426
+ return order === 'asc' ? comparison : -comparison;
427
+ });
428
+ return idValuePairs.map(p => p.id);
429
+ }
430
+ async getFieldValueForEntity(entityId, field) {
431
+ // For timestamp fields, load actual value from entity metadata
432
+ if (field === 'createdAt' || field === 'updatedAt' || field === 'accessed' || field === 'modified') {
433
+ try {
434
+ const noun = await this.storage.getNoun(entityId);
435
+ if (noun && noun.metadata) {
436
+ return noun.metadata[field];
437
+ }
438
+ }
439
+ catch {
440
+ // Fall back to index value
441
+ }
442
+ }
443
+ // For non-timestamp fields, scan chunks for this entity's value
444
+ await this.ensureFieldLoaded(field);
445
+ const ids = this.native.getIds(field, JSON.stringify(entityId));
446
+ // getIds returns IDs matching field=value, but we need value for field+entityId
447
+ // We need to scan through values — use getFilterValues and check membership
448
+ const values = this.native.getFilterValues(field);
449
+ for (const val of values) {
450
+ const matchIds = this.native.getIds(field, JSON.stringify(val));
451
+ if (matchIds.includes(entityId)) {
452
+ return this.denormalizeValue(val, field);
453
+ }
454
+ }
455
+ return undefined;
456
+ }
457
+ denormalizeValue(normalized, _field) {
458
+ const asNumber = Number(normalized);
459
+ if (!isNaN(asNumber))
460
+ return asNumber;
461
+ return normalized;
462
+ }
463
+ // ==========================================================================
464
+ // Filter discovery
465
+ // ==========================================================================
466
+ async getFilterValues(field) {
467
+ await this.ensureFieldLoaded(field);
468
+ return this.native.getFilterValues(field);
469
+ }
470
+ async getFilterFields() {
471
+ return this.native.getFilterFields();
472
+ }
473
+ // ==========================================================================
474
+ // Mutation methods
475
+ // ==========================================================================
476
+ async addToIndex(id, entityOrMetadata, skipFlush = false, deferWrites = false) {
477
+ // Extract field names from entity to know which fields to load
478
+ const fieldNames = this.native.extractFieldNames(JSON.stringify(entityOrMetadata));
479
+ await this.ensureFieldsLoaded(fieldNames);
480
+ // Execute mutation in Rust (single FFI call)
481
+ const resultJson = this.native.addToIndex(id, JSON.stringify(entityOrMetadata));
482
+ const result = JSON.parse(resultJson);
483
+ // Track new fields in loadedFields
484
+ for (const field of result.newFields) {
485
+ this.loadedFields.add(field);
486
+ this.knownFields.add(field);
487
+ }
488
+ // Update TS-side type tracking
489
+ this.updateTypeTracking(id, entityOrMetadata, 'add');
490
+ // Persist dirty data (unless deferred for batch operations)
491
+ if (!deferWrites) {
492
+ await this.persistMutationResult(result);
493
+ }
494
+ // Adaptive auto-flush
495
+ if (!skipFlush) {
496
+ for (const field of fieldNames) {
497
+ this.dirtyFields.add(field);
498
+ }
499
+ const timeSinceLastFlush = Date.now() - this.lastFlushTime;
500
+ const shouldAutoFlush = this.dirtyFields.size >= this.autoFlushThreshold ||
501
+ (this.dirtyFields.size > 10 && timeSinceLastFlush > 5000);
502
+ if (shouldAutoFlush) {
503
+ const startTime = Date.now();
504
+ await this.flush();
505
+ const flushTime = Date.now() - startTime;
506
+ if (flushTime < 50) {
507
+ this.autoFlushThreshold = Math.min(200, this.autoFlushThreshold * 1.2);
508
+ }
509
+ else if (flushTime > 200) {
510
+ this.autoFlushThreshold = Math.max(20, this.autoFlushThreshold * 0.8);
511
+ }
512
+ }
513
+ }
514
+ }
515
+ async removeFromIndex(id, metadata) {
516
+ if (metadata) {
517
+ const fieldNames = this.native.extractFieldNames(JSON.stringify(metadata));
518
+ await this.ensureFieldsLoaded(fieldNames);
519
+ const resultJson = this.native.removeFromIndex(id, JSON.stringify(metadata));
520
+ const result = JSON.parse(resultJson);
521
+ this.updateTypeTracking(id, metadata, 'remove');
522
+ await this.persistMutationResult(result);
523
+ }
524
+ else {
525
+ // Remove from all loaded fields (slower path)
526
+ prodLog.warn(`Removing ID ${id} without metadata requires scanning all fields (slow)`);
527
+ const loadedFields = this.native.getLoadedFields();
528
+ for (const field of loadedFields) {
529
+ await this.ensureFieldLoaded(field);
530
+ }
531
+ const resultJson = this.native.removeFromIndex(id);
532
+ const result = JSON.parse(resultJson);
533
+ await this.persistMutationResult(result);
534
+ }
535
+ }
536
+ updateTypeTracking(id, entityOrMetadata, operation) {
537
+ const nounValue = entityOrMetadata?.type || entityOrMetadata?.noun;
538
+ if (!nounValue)
539
+ return;
540
+ const entityType = String(nounValue).toLowerCase().trim();
541
+ if (operation === 'add') {
542
+ const current = this.totalEntitiesByType.get(entityType) || 0;
543
+ const newCount = current + 1;
544
+ this.totalEntitiesByType.set(entityType, newCount);
545
+ try {
546
+ const nounTypeIndex = TypeUtils.getNounIndex(entityType);
547
+ this.entityCountsByTypeFixed[nounTypeIndex] = newCount;
548
+ }
549
+ catch {
550
+ // Not a recognized noun type
551
+ }
552
+ // Update type-field affinity
553
+ if (!this.typeFieldAffinity.has(entityType)) {
554
+ this.typeFieldAffinity.set(entityType, new Map());
555
+ }
556
+ const typeFields = this.typeFieldAffinity.get(entityType);
557
+ const fieldNames = this.native.extractFieldNames(JSON.stringify(entityOrMetadata));
558
+ for (const field of fieldNames) {
559
+ const count = typeFields.get(field) || 0;
560
+ typeFields.set(field, count + 1);
561
+ }
562
+ }
563
+ else if (operation === 'remove') {
564
+ const current = this.totalEntitiesByType.get(entityType) || 0;
565
+ if (current > 1) {
566
+ const newCount = current - 1;
567
+ this.totalEntitiesByType.set(entityType, newCount);
568
+ try {
569
+ const nounTypeIndex = TypeUtils.getNounIndex(entityType);
570
+ this.entityCountsByTypeFixed[nounTypeIndex] = newCount;
571
+ }
572
+ catch { }
573
+ }
574
+ else {
575
+ this.totalEntitiesByType.delete(entityType);
576
+ this.typeFieldAffinity.delete(entityType);
577
+ try {
578
+ const nounTypeIndex = TypeUtils.getNounIndex(entityType);
579
+ this.entityCountsByTypeFixed[nounTypeIndex] = 0;
580
+ }
581
+ catch { }
582
+ }
583
+ }
584
+ }
585
+ // ==========================================================================
586
+ // Tokenization (delegates to Rust via extractFieldNames, but also exposed)
587
+ // ==========================================================================
588
+ tokenize(text) {
589
+ if (!text)
590
+ return [];
591
+ return text
592
+ .toLowerCase()
593
+ .replace(/[^\w\s]/g, ' ')
594
+ .split(/\s+/)
595
+ .filter(w => w.length >= 2 && w.length <= 50)
596
+ .filter((w, i, arr) => arr.indexOf(w) === i);
597
+ }
598
+ hashWord(word) {
599
+ let hash = 2166136261;
600
+ for (let i = 0; i < word.length; i++) {
601
+ hash ^= word.charCodeAt(i);
602
+ hash = Math.imul(hash, 16777619);
603
+ }
604
+ return hash | 0;
605
+ }
606
+ extractTextContent(data) {
607
+ if (data === null || data === undefined)
608
+ return '';
609
+ if (typeof data === 'string')
610
+ return data;
611
+ if (typeof data === 'number' || typeof data === 'boolean')
612
+ return String(data);
613
+ if (Array.isArray(data)) {
614
+ if (data.length > 0 && typeof data[0] === 'number')
615
+ return '';
616
+ return data.map(d => this.extractTextContent(d)).filter(Boolean).join(' ');
617
+ }
618
+ if (typeof data === 'object') {
619
+ const skipKeys = new Set(['vector', 'embedding', 'embeddings', 'connections', 'level', 'id']);
620
+ const texts = [];
621
+ for (const [key, value] of Object.entries(data)) {
622
+ if (skipKeys.has(key) || /^\d+$/.test(key))
623
+ continue;
624
+ const text = this.extractTextContent(value);
625
+ if (text)
626
+ texts.push(text);
627
+ }
628
+ return texts.join(' ');
629
+ }
630
+ return '';
631
+ }
632
+ // ==========================================================================
633
+ // Entity counting
634
+ // ==========================================================================
635
+ getEntityCountByType(type) {
636
+ return this.totalEntitiesByType.get(type) || 0;
637
+ }
638
+ getTotalEntityCount() {
639
+ let total = 0;
640
+ for (const count of this.totalEntitiesByType.values()) {
641
+ total += count;
642
+ }
643
+ return total;
644
+ }
645
+ getAllEntityCounts() {
646
+ return new Map(this.totalEntitiesByType);
647
+ }
648
+ getEntityCountByTypeEnum(type) {
649
+ const index = TypeUtils.getNounIndex(type);
650
+ return this.entityCountsByTypeFixed[index];
651
+ }
652
+ getVerbCountByTypeEnum(type) {
653
+ const index = TypeUtils.getVerbIndex(type);
654
+ return this.verbCountsByTypeFixed[index];
655
+ }
656
+ getTopNounTypes(n) {
657
+ const types = [];
658
+ for (let i = 0; i < NOUN_TYPE_COUNT; i++) {
659
+ const count = this.entityCountsByTypeFixed[i];
660
+ if (count > 0) {
661
+ const type = TypeUtils.getNounFromIndex(i);
662
+ types.push({ type, count });
663
+ }
664
+ }
665
+ return types.sort((a, b) => b.count - a.count).slice(0, n).map(t => t.type);
666
+ }
667
+ getTopVerbTypes(n) {
668
+ const types = [];
669
+ for (let i = 0; i < VERB_TYPE_COUNT; i++) {
670
+ const count = this.verbCountsByTypeFixed[i];
671
+ if (count > 0) {
672
+ const type = TypeUtils.getVerbFromIndex(i);
673
+ types.push({ type, count });
674
+ }
675
+ }
676
+ return types.sort((a, b) => b.count - a.count).slice(0, n).map(t => t.type);
677
+ }
678
+ getAllNounTypeCounts() {
679
+ const counts = new Map();
680
+ for (let i = 0; i < NOUN_TYPE_COUNT; i++) {
681
+ const count = this.entityCountsByTypeFixed[i];
682
+ if (count > 0) {
683
+ counts.set(TypeUtils.getNounFromIndex(i), count);
684
+ }
685
+ }
686
+ return counts;
687
+ }
688
+ getAllVerbTypeCounts() {
689
+ const counts = new Map();
690
+ for (let i = 0; i < VERB_TYPE_COUNT; i++) {
691
+ const count = this.verbCountsByTypeFixed[i];
692
+ if (count > 0) {
693
+ counts.set(TypeUtils.getVerbFromIndex(i), count);
694
+ }
695
+ }
696
+ return counts;
697
+ }
698
+ // ==========================================================================
699
+ // VFS Statistics (roaring bitmap intersection via Rust)
700
+ // ==========================================================================
701
+ async getVFSEntityCountByType(type) {
702
+ await this.ensureFieldsLoaded(['isVFSEntity', 'noun']);
703
+ const vfsIds = this.native.getIds('isVFSEntity', JSON.stringify(true));
704
+ const typeIds = this.native.getIds('noun', JSON.stringify(type));
705
+ const vfsSet = new Set(vfsIds);
706
+ return typeIds.filter(id => vfsSet.has(id)).length;
707
+ }
708
+ async getAllVFSEntityCounts() {
709
+ await this.ensureFieldsLoaded(['isVFSEntity', 'noun']);
710
+ const vfsIds = this.native.getIds('isVFSEntity', JSON.stringify(true));
711
+ if (vfsIds.length === 0)
712
+ return new Map();
713
+ const vfsSet = new Set(vfsIds);
714
+ const result = new Map();
715
+ for (const type of this.totalEntitiesByType.keys()) {
716
+ const typeIds = this.native.getIds('noun', JSON.stringify(type));
717
+ const count = typeIds.filter(id => vfsSet.has(id)).length;
718
+ if (count > 0)
719
+ result.set(type, count);
720
+ }
721
+ return result;
722
+ }
723
+ async getTotalVFSEntityCount() {
724
+ await this.ensureFieldLoaded('isVFSEntity');
725
+ const vfsIds = this.native.getIds('isVFSEntity', JSON.stringify(true));
726
+ return vfsIds.length;
727
+ }
728
+ // ==========================================================================
729
+ // Criteria-based counting
730
+ // ==========================================================================
731
+ async getCountForCriteria(field, value) {
732
+ const ids = await this.getIds(field, value);
733
+ return ids.length;
734
+ }
735
+ // ==========================================================================
736
+ // getAllIds
737
+ // ==========================================================================
738
+ async getAllIds() {
739
+ const allIds = new Set();
740
+ if (this.storage && typeof this.storage.getNouns === 'function') {
741
+ try {
742
+ const result = await this.storage.getNouns({
743
+ pagination: { limit: 100000 }
744
+ });
745
+ if (result && result.items) {
746
+ result.items.forEach((item) => {
747
+ if (item.id)
748
+ allIds.add(item.id);
749
+ });
750
+ }
751
+ }
752
+ catch (e) {
753
+ prodLog.warn('Failed to get all IDs from storage:', e);
754
+ return [];
755
+ }
756
+ }
757
+ return Array.from(allIds);
758
+ }
759
+ // ==========================================================================
760
+ // Flush
761
+ // ==========================================================================
762
+ async flush() {
763
+ if (this.dirtyFields.size === 0)
764
+ return;
765
+ // Save dirty field indexes
766
+ const promises = [];
767
+ for (const field of this.dirtyFields) {
768
+ const json = this.native.saveFieldIndex(field);
769
+ if (json) {
770
+ const indexId = `__metadata_field_index__field_${field}`;
771
+ promises.push(this.storage.saveMetadata(indexId, JSON.parse(json)));
772
+ }
773
+ }
774
+ await Promise.all(promises);
775
+ // Save entity ID mapper
776
+ await this.saveEntityIdMapper();
777
+ // Save field registry
778
+ await this.saveFieldRegistry();
779
+ this.dirtyFields.clear();
780
+ this.lastFlushTime = Date.now();
781
+ }
782
+ // ==========================================================================
783
+ // Stats
784
+ // ==========================================================================
785
+ async getStats() {
786
+ const statsJson = this.native.getStats();
787
+ const stats = JSON.parse(statsJson);
788
+ return {
789
+ totalEntries: stats.totalEntries || 0,
790
+ totalIds: 0,
791
+ fieldsIndexed: stats.fieldsIndexed || [],
792
+ lastRebuild: Date.now(),
793
+ indexSize: (stats.totalEntries || 0) * 100
794
+ };
795
+ }
796
+ // ==========================================================================
797
+ // Validation
798
+ // ==========================================================================
799
+ async validateConsistency() {
800
+ const entityCount = this.native.entityIdMapperSize();
801
+ if (entityCount === 0) {
802
+ return {
803
+ healthy: true,
804
+ avgEntriesPerEntity: 0,
805
+ entityCount: 0,
806
+ indexEntryCount: 0,
807
+ recommendation: null
808
+ };
809
+ }
810
+ // Load all fields to count entries
811
+ for (const field of this.knownFields) {
812
+ await this.ensureFieldLoaded(field);
813
+ }
814
+ const statsJson = this.native.getStats();
815
+ const stats = JSON.parse(statsJson);
816
+ const indexEntryCount = stats.totalEntries || 0;
817
+ const avgEntriesPerEntity = indexEntryCount / entityCount;
818
+ const CORRUPTION_THRESHOLD = 100;
819
+ const healthy = avgEntriesPerEntity <= CORRUPTION_THRESHOLD;
820
+ let recommendation = null;
821
+ if (!healthy) {
822
+ recommendation = `Index corruption detected (${avgEntriesPerEntity.toFixed(1)} avg entries/entity, expected ~30). ` +
823
+ `Run brain.index.clearAllIndexData() followed by brain.index.rebuild() to repair.`;
824
+ }
825
+ return { healthy, avgEntriesPerEntity, entityCount, indexEntryCount, recommendation };
826
+ }
827
+ async detectAndRepairCorruption() {
828
+ const validation = await this.validateConsistency();
829
+ if (!validation.healthy) {
830
+ prodLog.warn(`Index corruption detected (${validation.avgEntriesPerEntity.toFixed(1)} avg entries/entity)`);
831
+ prodLog.warn('Auto-rebuilding index to repair...');
832
+ await this.clearAllIndexData();
833
+ await this.rebuild();
834
+ const postRebuild = await this.validateConsistency();
835
+ if (postRebuild.healthy) {
836
+ prodLog.info(`Index rebuilt successfully (${postRebuild.avgEntriesPerEntity.toFixed(1)} avg entries/entity)`);
837
+ }
838
+ else {
839
+ prodLog.error(`Index still appears corrupted after rebuild (${postRebuild.avgEntriesPerEntity.toFixed(1)} avg entries/entity).`);
840
+ }
841
+ }
842
+ }
843
+ // ==========================================================================
844
+ // Rebuild
845
+ // ==========================================================================
846
+ async rebuild() {
847
+ if (this.isRebuilding)
848
+ return;
849
+ this.isRebuilding = true;
850
+ try {
851
+ prodLog.info('Starting metadata index rebuild with batch processing...');
852
+ // Clear in-memory state
853
+ this.native.clear();
854
+ this.loadedFields.clear();
855
+ this.knownFields.clear();
856
+ this.dirtyFields.clear();
857
+ this.totalEntitiesByType.clear();
858
+ this.entityCountsByTypeFixed.fill(0);
859
+ this.verbCountsByTypeFixed.fill(0);
860
+ this.typeFieldAffinity.clear();
861
+ this.unifiedCache.clear('metadata');
862
+ // Delete existing chunk files from storage
863
+ prodLog.info('Clearing existing metadata index chunks from storage...');
864
+ const existingFields = await this.getPersistedFieldList();
865
+ if (existingFields.length > 0) {
866
+ for (const field of existingFields) {
867
+ await this.deleteFieldChunks(field);
868
+ }
869
+ try {
870
+ await this.storage.saveMetadata('__metadata_field_registry__', null);
871
+ }
872
+ catch { }
873
+ prodLog.info(`Cleared ${existingFields.length} field indexes from storage`);
874
+ }
875
+ // Clear entity ID mapper in storage
876
+ try {
877
+ await this.storage.saveMetadata('brainy:entityIdMapper', null);
878
+ }
879
+ catch { }
880
+ // Adaptive rebuild strategy
881
+ const storageType = this.storage.constructor.name;
882
+ const isLocalStorage = storageType === 'FileSystemStorage' || storageType === 'MemoryStorage';
883
+ let totalNounsProcessed = 0;
884
+ if (isLocalStorage) {
885
+ const result = await this.storage.getNouns({
886
+ pagination: { offset: 0, limit: 1000000 }
887
+ });
888
+ prodLog.info(`Loading ${result.items.length} nouns with metadata...`);
889
+ const nounIds = result.items.map(noun => noun.id);
890
+ let metadataBatch;
891
+ if (this.storage.getMetadataBatch) {
892
+ metadataBatch = await this.storage.getMetadataBatch(nounIds);
893
+ }
894
+ else {
895
+ metadataBatch = new Map();
896
+ for (const id of nounIds) {
897
+ try {
898
+ const metadata = await this.storage.getNounMetadata(id);
899
+ if (metadata)
900
+ metadataBatch.set(id, metadata);
901
+ }
902
+ catch { }
903
+ }
904
+ }
905
+ let localCount = 0;
906
+ for (const noun of result.items) {
907
+ const metadata = metadataBatch.get(noun.id);
908
+ if (metadata) {
909
+ await this.addToIndex(noun.id, metadata, true, true);
910
+ localCount++;
911
+ if (localCount % 5000 === 0) {
912
+ await this.flushRebuildDirty();
913
+ }
914
+ }
915
+ }
916
+ totalNounsProcessed = result.items.length;
917
+ prodLog.info(`Indexed ${totalNounsProcessed} nouns`);
918
+ }
919
+ else {
920
+ // Cloud storage: conservative batching
921
+ const nounLimit = 25;
922
+ let nounOffset = 0;
923
+ let hasMoreNouns = true;
924
+ let consecutiveEmptyBatches = 0;
925
+ const MAX_ITERATIONS = 10000;
926
+ let iterations = 0;
927
+ while (hasMoreNouns && iterations < MAX_ITERATIONS) {
928
+ iterations++;
929
+ const result = await this.storage.getNouns({
930
+ pagination: { offset: nounOffset, limit: nounLimit }
931
+ });
932
+ if (result.items.length === 0) {
933
+ consecutiveEmptyBatches++;
934
+ if (consecutiveEmptyBatches >= 3)
935
+ break;
936
+ if (result.hasMore) {
937
+ hasMoreNouns = false;
938
+ break;
939
+ }
940
+ }
941
+ else {
942
+ consecutiveEmptyBatches = 0;
943
+ }
944
+ const nounIds = result.items.map(noun => noun.id);
945
+ let metadataBatch;
946
+ if (this.storage.getMetadataBatch) {
947
+ metadataBatch = await this.storage.getMetadataBatch(nounIds);
948
+ }
949
+ else {
950
+ metadataBatch = new Map();
951
+ for (const id of nounIds) {
952
+ try {
953
+ const metadata = await this.storage.getNounMetadata(id);
954
+ if (metadata)
955
+ metadataBatch.set(id, metadata);
956
+ }
957
+ catch { }
958
+ }
959
+ }
960
+ for (const noun of result.items) {
961
+ const metadata = metadataBatch.get(noun.id);
962
+ if (metadata) {
963
+ await this.addToIndex(noun.id, metadata, true, true);
964
+ }
965
+ }
966
+ totalNounsProcessed += result.items.length;
967
+ if (totalNounsProcessed % 5000 === 0) {
968
+ await this.flushRebuildDirty();
969
+ }
970
+ hasMoreNouns = result.hasMore;
971
+ nounOffset += nounLimit;
972
+ await this.yieldToEventLoop();
973
+ }
974
+ }
975
+ // Rebuild verb metadata indexes
976
+ let totalVerbsProcessed = 0;
977
+ if (isLocalStorage) {
978
+ const result = await this.storage.getVerbs({
979
+ pagination: { offset: 0, limit: 1000000 }
980
+ });
981
+ const verbIds = result.items.map(verb => verb.id);
982
+ let verbMetadataBatch;
983
+ if (this.storage.getVerbMetadataBatch) {
984
+ verbMetadataBatch = await this.storage.getVerbMetadataBatch(verbIds);
985
+ }
986
+ else {
987
+ verbMetadataBatch = new Map();
988
+ for (const id of verbIds) {
989
+ try {
990
+ const metadata = await this.storage.getVerbMetadata(id);
991
+ if (metadata)
992
+ verbMetadataBatch.set(id, metadata);
993
+ }
994
+ catch { }
995
+ }
996
+ }
997
+ let verbLocalCount = 0;
998
+ for (const verb of result.items) {
999
+ const metadata = verbMetadataBatch.get(verb.id);
1000
+ if (metadata) {
1001
+ await this.addToIndex(verb.id, metadata, true, true);
1002
+ verbLocalCount++;
1003
+ if (verbLocalCount % 5000 === 0) {
1004
+ await this.flushRebuildDirty();
1005
+ }
1006
+ }
1007
+ }
1008
+ totalVerbsProcessed = result.items.length;
1009
+ }
1010
+ else {
1011
+ let verbOffset = 0;
1012
+ const verbLimit = 25;
1013
+ let hasMoreVerbs = true;
1014
+ let consecutiveEmptyVerbBatches = 0;
1015
+ let verbIterations = 0;
1016
+ const MAX_ITERATIONS = 10000;
1017
+ while (hasMoreVerbs && verbIterations < MAX_ITERATIONS) {
1018
+ verbIterations++;
1019
+ const result = await this.storage.getVerbs({
1020
+ pagination: { offset: verbOffset, limit: verbLimit }
1021
+ });
1022
+ if (result.items.length === 0) {
1023
+ consecutiveEmptyVerbBatches++;
1024
+ if (consecutiveEmptyVerbBatches >= 3)
1025
+ break;
1026
+ if (result.hasMore) {
1027
+ hasMoreVerbs = false;
1028
+ break;
1029
+ }
1030
+ }
1031
+ else {
1032
+ consecutiveEmptyVerbBatches = 0;
1033
+ }
1034
+ const verbIds = result.items.map(verb => verb.id);
1035
+ let verbMetadataBatch;
1036
+ if (this.storage.getVerbMetadataBatch) {
1037
+ verbMetadataBatch = await this.storage.getVerbMetadataBatch(verbIds);
1038
+ }
1039
+ else {
1040
+ verbMetadataBatch = new Map();
1041
+ for (const id of verbIds) {
1042
+ try {
1043
+ const metadata = await this.storage.getVerbMetadata(id);
1044
+ if (metadata)
1045
+ verbMetadataBatch.set(id, metadata);
1046
+ }
1047
+ catch { }
1048
+ }
1049
+ }
1050
+ for (const verb of result.items) {
1051
+ const metadata = verbMetadataBatch.get(verb.id);
1052
+ if (metadata) {
1053
+ await this.addToIndex(verb.id, metadata, true, true);
1054
+ }
1055
+ }
1056
+ totalVerbsProcessed += result.items.length;
1057
+ if (totalVerbsProcessed % 5000 === 0) {
1058
+ await this.flushRebuildDirty();
1059
+ }
1060
+ hasMoreVerbs = result.hasMore;
1061
+ verbOffset += verbLimit;
1062
+ await this.yieldToEventLoop();
1063
+ }
1064
+ }
1065
+ // Final flush
1066
+ await this.flushRebuildDirty();
1067
+ await this.flush();
1068
+ prodLog.info(`Metadata index rebuild completed! Processed ${totalNounsProcessed} nouns and ${totalVerbsProcessed} verbs`);
1069
+ }
1070
+ finally {
1071
+ this.isRebuilding = false;
1072
+ }
1073
+ }
1074
+ /**
1075
+ * During rebuild, persist all dirty data accumulated so far.
1076
+ */
1077
+ async flushRebuildDirty() {
1078
+ // Save all dirty data currently in Rust
1079
+ const promises = [];
1080
+ // Save all loaded sparse indices
1081
+ for (const field of this.loadedFields) {
1082
+ const json = this.native.saveSparseIndex(field);
1083
+ if (json) {
1084
+ const indexPath = `__sparse_index__${field}`;
1085
+ promises.push(this.storage.saveMetadata(indexPath, JSON.parse(json)));
1086
+ }
1087
+ // Save all chunks for this field
1088
+ const chunkIds = this.native.getSparseIndexChunkIds(field);
1089
+ for (const chunkId of chunkIds) {
1090
+ const chunkJson = this.native.saveChunk(field, chunkId);
1091
+ if (chunkJson) {
1092
+ const chunkPath = `__chunk__${field}_${chunkId}`;
1093
+ promises.push(this.storage.saveMetadata(chunkPath, JSON.parse(chunkJson)));
1094
+ }
1095
+ }
1096
+ }
1097
+ // Save entity ID mapper
1098
+ promises.push(this.saveEntityIdMapper());
1099
+ await Promise.all(promises);
1100
+ }
1101
+ // ==========================================================================
1102
+ // Clear
1103
+ // ==========================================================================
1104
+ async clearAllIndexData() {
1105
+ prodLog.warn('Clearing ALL metadata index data from storage...');
1106
+ const fields = await this.getPersistedFieldList();
1107
+ for (const field of fields) {
1108
+ await this.deleteFieldChunks(field);
1109
+ }
1110
+ try {
1111
+ await this.storage.saveMetadata('__metadata_field_registry__', null);
1112
+ }
1113
+ catch { }
1114
+ // Clear in-memory state
1115
+ this.native.clear();
1116
+ this.loadedFields.clear();
1117
+ this.knownFields.clear();
1118
+ this.dirtyFields.clear();
1119
+ this.totalEntitiesByType.clear();
1120
+ this.entityCountsByTypeFixed.fill(0);
1121
+ this.verbCountsByTypeFixed.fill(0);
1122
+ this.typeFieldAffinity.clear();
1123
+ this.unifiedCache.clear('metadata');
1124
+ // Clear entity ID mapper in storage
1125
+ try {
1126
+ await this.storage.saveMetadata('brainy:entityIdMapper', null);
1127
+ }
1128
+ catch { }
1129
+ prodLog.info(`Cleared ${fields.length} field indexes and all in-memory state`);
1130
+ }
1131
+ async getPersistedFieldList() {
1132
+ try {
1133
+ const registry = await this.storage.getMetadata('__metadata_field_registry__');
1134
+ if (!registry?.fields || !Array.isArray(registry.fields))
1135
+ return [];
1136
+ return registry.fields.filter((f) => typeof f === 'string' && f.length > 0);
1137
+ }
1138
+ catch {
1139
+ return [];
1140
+ }
1141
+ }
1142
+ async deleteFieldChunks(field) {
1143
+ try {
1144
+ const indexPath = `__sparse_index__${field}`;
1145
+ const sparseData = await this.storage.getMetadata(indexPath);
1146
+ if (sparseData) {
1147
+ // Parse chunk IDs from sparse index
1148
+ let chunkIds = [];
1149
+ if (sparseData.chunks && Array.isArray(sparseData.chunks)) {
1150
+ chunkIds = sparseData.chunks.map((c) => c.chunkId).filter((id) => typeof id === 'number');
1151
+ }
1152
+ // Delete all chunk files
1153
+ for (const chunkId of chunkIds) {
1154
+ const chunkPath = `__chunk__${field}_${chunkId}`;
1155
+ try {
1156
+ await this.storage.saveMetadata(chunkPath, null);
1157
+ }
1158
+ catch { }
1159
+ }
1160
+ // Delete sparse index
1161
+ try {
1162
+ await this.storage.saveMetadata(indexPath, null);
1163
+ }
1164
+ catch { }
1165
+ }
1166
+ // Delete field index
1167
+ try {
1168
+ await this.storage.saveMetadata(`__metadata_field_index__field_${field}`, null);
1169
+ }
1170
+ catch { }
1171
+ }
1172
+ catch {
1173
+ prodLog.debug(`Could not clear chunks for field '${field}'`);
1174
+ }
1175
+ }
1176
+ // ==========================================================================
1177
+ // Field statistics (TS-only analytics)
1178
+ // ==========================================================================
1179
+ async getFieldStatistics() {
1180
+ for (const field of this.knownFields) {
1181
+ if (!this.fieldStats.has(field)) {
1182
+ this.fieldStats.set(field, {
1183
+ cardinality: {
1184
+ uniqueValues: 0,
1185
+ totalValues: 0,
1186
+ distribution: 'uniform',
1187
+ updateFrequency: 0,
1188
+ lastAnalyzed: Date.now()
1189
+ },
1190
+ queryCount: 0,
1191
+ rangeQueryCount: 0,
1192
+ exactQueryCount: 0,
1193
+ avgQueryTime: 0,
1194
+ indexType: 'hash'
1195
+ });
1196
+ }
1197
+ }
1198
+ return new Map(this.fieldStats);
1199
+ }
1200
+ async getFieldCardinality(field) {
1201
+ const stats = this.fieldStats.get(field);
1202
+ return stats ? stats.cardinality : null;
1203
+ }
1204
+ async getFieldsWithCardinality() {
1205
+ const fields = [];
1206
+ for (const [field, stats] of this.fieldStats) {
1207
+ fields.push({
1208
+ field,
1209
+ cardinality: stats.cardinality.uniqueValues,
1210
+ distribution: stats.cardinality.distribution
1211
+ });
1212
+ }
1213
+ fields.sort((a, b) => a.cardinality - b.cardinality);
1214
+ return fields;
1215
+ }
1216
+ async getOptimalQueryPlan(filters) {
1217
+ const fieldOrder = [];
1218
+ let hasRangeQueries = false;
1219
+ let totalEstimatedCost = 0;
1220
+ for (const [field, value] of Object.entries(filters)) {
1221
+ const stats = this.fieldStats.get(field);
1222
+ if (!stats)
1223
+ continue;
1224
+ if (typeof value === 'object' && value !== null && !Array.isArray(value)) {
1225
+ hasRangeQueries = true;
1226
+ }
1227
+ const cardinality = stats.cardinality.uniqueValues;
1228
+ totalEstimatedCost += Math.log2(Math.max(1, cardinality));
1229
+ fieldOrder.push(field);
1230
+ }
1231
+ fieldOrder.sort((a, b) => {
1232
+ const statsA = this.fieldStats.get(a);
1233
+ const statsB = this.fieldStats.get(b);
1234
+ if (!statsA || !statsB)
1235
+ return 0;
1236
+ return statsA.cardinality.uniqueValues - statsB.cardinality.uniqueValues;
1237
+ });
1238
+ return {
1239
+ strategy: hasRangeQueries ? 'hybrid' : 'exact',
1240
+ fieldOrder,
1241
+ estimatedCost: totalEstimatedCost
1242
+ };
1243
+ }
1244
+ async getFieldsForType(nounType) {
1245
+ const typeFields = this.typeFieldAffinity.get(nounType);
1246
+ const totalEntities = this.totalEntitiesByType.get(nounType);
1247
+ if (!typeFields || !totalEntities)
1248
+ return [];
1249
+ const result = [];
1250
+ for (const [field, count] of typeFields.entries()) {
1251
+ result.push({
1252
+ field,
1253
+ affinity: count / totalEntities,
1254
+ occurrences: count,
1255
+ totalEntities
1256
+ });
1257
+ }
1258
+ result.sort((a, b) => b.affinity - a.affinity);
1259
+ return result;
1260
+ }
1261
+ // ==========================================================================
1262
+ // Deprecated / compatibility
1263
+ // ==========================================================================
1264
+ async getIdsForCriteria(criteria) {
1265
+ return this.getIdsForFilter(criteria);
1266
+ }
1267
+ // ==========================================================================
1268
+ // Private helpers
1269
+ // ==========================================================================
1270
+ async yieldToEventLoop() {
1271
+ return new Promise(resolve => setImmediate(resolve));
1272
+ }
1273
+ }
1274
+ //# sourceMappingURL=NativeMetadataIndex.js.map