@soulcraft/brainy 3.41.1 → 3.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/brainy.js CHANGED
@@ -100,6 +100,7 @@ export class Brainy {
100
100
  this.index = this.setupIndex();
101
101
  // Initialize core metadata index
102
102
  this.metadataIndex = new MetadataIndexManager(this.storage);
103
+ await this.metadataIndex.init();
103
104
  // Initialize core graph index
104
105
  this.graphIndex = new GraphAdjacencyIndex(this.storage);
105
106
  // Rebuild indexes if needed for existing data
@@ -0,0 +1,93 @@
1
+ /**
2
+ * EntityIdMapper - Bidirectional mapping between UUID strings and integer IDs for roaring bitmaps
3
+ *
4
+ * Roaring bitmaps require 32-bit unsigned integers, but Brainy uses UUID strings as entity IDs.
5
+ * This class provides efficient bidirectional mapping with persistence support.
6
+ *
7
+ * Features:
8
+ * - O(1) lookup in both directions
9
+ * - Persistent storage via storage adapter
10
+ * - Atomic counter for next ID
11
+ * - Serialization/deserialization support
12
+ *
13
+ * @module utils/entityIdMapper
14
+ */
15
+ import type { StorageAdapter } from '../coreTypes.js';
16
+ export interface EntityIdMapperOptions {
17
+ storage: StorageAdapter;
18
+ storageKey?: string;
19
+ }
20
+ export interface EntityIdMapperData {
21
+ nextId: number;
22
+ uuidToInt: Record<string, number>;
23
+ intToUuid: Record<number, string>;
24
+ }
25
+ /**
26
+ * Maps entity UUIDs to integer IDs for use with Roaring Bitmaps
27
+ */
28
+ export declare class EntityIdMapper {
29
+ private storage;
30
+ private storageKey;
31
+ private uuidToInt;
32
+ private intToUuid;
33
+ private nextId;
34
+ private dirty;
35
+ constructor(options: EntityIdMapperOptions);
36
+ /**
37
+ * Initialize the mapper by loading from storage
38
+ */
39
+ init(): Promise<void>;
40
+ /**
41
+ * Get integer ID for UUID, assigning a new ID if not exists
42
+ */
43
+ getOrAssign(uuid: string): number;
44
+ /**
45
+ * Get UUID for integer ID
46
+ */
47
+ getUuid(intId: number): string | undefined;
48
+ /**
49
+ * Get integer ID for UUID (without assigning if not exists)
50
+ */
51
+ getInt(uuid: string): number | undefined;
52
+ /**
53
+ * Check if UUID has been assigned an integer ID
54
+ */
55
+ has(uuid: string): boolean;
56
+ /**
57
+ * Remove mapping for UUID
58
+ */
59
+ remove(uuid: string): boolean;
60
+ /**
61
+ * Get total number of mappings
62
+ */
63
+ get size(): number;
64
+ /**
65
+ * Convert array of UUIDs to array of integers
66
+ */
67
+ uuidsToInts(uuids: string[]): number[];
68
+ /**
69
+ * Convert array of integers to array of UUIDs
70
+ */
71
+ intsToUuids(ints: number[]): string[];
72
+ /**
73
+ * Convert iterable of integers to array of UUIDs (for roaring bitmap iteration)
74
+ */
75
+ intsIterableToUuids(ints: Iterable<number>): string[];
76
+ /**
77
+ * Flush mappings to storage
78
+ */
79
+ flush(): Promise<void>;
80
+ /**
81
+ * Clear all mappings
82
+ */
83
+ clear(): Promise<void>;
84
+ /**
85
+ * Get statistics about the mapper
86
+ */
87
+ getStats(): {
88
+ mappings: number;
89
+ nextId: number;
90
+ dirty: boolean;
91
+ memoryEstimate: number;
92
+ };
93
+ }
@@ -0,0 +1,169 @@
1
+ /**
2
+ * EntityIdMapper - Bidirectional mapping between UUID strings and integer IDs for roaring bitmaps
3
+ *
4
+ * Roaring bitmaps require 32-bit unsigned integers, but Brainy uses UUID strings as entity IDs.
5
+ * This class provides efficient bidirectional mapping with persistence support.
6
+ *
7
+ * Features:
8
+ * - O(1) lookup in both directions
9
+ * - Persistent storage via storage adapter
10
+ * - Atomic counter for next ID
11
+ * - Serialization/deserialization support
12
+ *
13
+ * @module utils/entityIdMapper
14
+ */
15
+ /**
16
+ * Maps entity UUIDs to integer IDs for use with Roaring Bitmaps
17
+ */
18
+ export class EntityIdMapper {
19
+ constructor(options) {
20
+ // Bidirectional maps
21
+ this.uuidToInt = new Map();
22
+ this.intToUuid = new Map();
23
+ // Atomic counter for next ID
24
+ this.nextId = 1;
25
+ // Dirty flag for persistence
26
+ this.dirty = false;
27
+ this.storage = options.storage;
28
+ this.storageKey = options.storageKey || 'brainy:entityIdMapper';
29
+ }
30
+ /**
31
+ * Initialize the mapper by loading from storage
32
+ */
33
+ async init() {
34
+ try {
35
+ const data = await this.storage.getMetadata(this.storageKey);
36
+ if (data) {
37
+ this.nextId = data.nextId;
38
+ // Rebuild maps from serialized data
39
+ this.uuidToInt = new Map(Object.entries(data.uuidToInt).map(([k, v]) => [k, Number(v)]));
40
+ this.intToUuid = new Map(Object.entries(data.intToUuid).map(([k, v]) => [Number(k), v]));
41
+ }
42
+ }
43
+ catch (error) {
44
+ // First time initialization - maps are empty, nextId = 1
45
+ }
46
+ }
47
+ /**
48
+ * Get integer ID for UUID, assigning a new ID if not exists
49
+ */
50
+ getOrAssign(uuid) {
51
+ const existing = this.uuidToInt.get(uuid);
52
+ if (existing !== undefined) {
53
+ return existing;
54
+ }
55
+ // Assign new ID
56
+ const newId = this.nextId++;
57
+ this.uuidToInt.set(uuid, newId);
58
+ this.intToUuid.set(newId, uuid);
59
+ this.dirty = true;
60
+ return newId;
61
+ }
62
+ /**
63
+ * Get UUID for integer ID
64
+ */
65
+ getUuid(intId) {
66
+ return this.intToUuid.get(intId);
67
+ }
68
+ /**
69
+ * Get integer ID for UUID (without assigning if not exists)
70
+ */
71
+ getInt(uuid) {
72
+ return this.uuidToInt.get(uuid);
73
+ }
74
+ /**
75
+ * Check if UUID has been assigned an integer ID
76
+ */
77
+ has(uuid) {
78
+ return this.uuidToInt.has(uuid);
79
+ }
80
+ /**
81
+ * Remove mapping for UUID
82
+ */
83
+ remove(uuid) {
84
+ const intId = this.uuidToInt.get(uuid);
85
+ if (intId === undefined) {
86
+ return false;
87
+ }
88
+ this.uuidToInt.delete(uuid);
89
+ this.intToUuid.delete(intId);
90
+ this.dirty = true;
91
+ return true;
92
+ }
93
+ /**
94
+ * Get total number of mappings
95
+ */
96
+ get size() {
97
+ return this.uuidToInt.size;
98
+ }
99
+ /**
100
+ * Convert array of UUIDs to array of integers
101
+ */
102
+ uuidsToInts(uuids) {
103
+ return uuids.map(uuid => this.getOrAssign(uuid));
104
+ }
105
+ /**
106
+ * Convert array of integers to array of UUIDs
107
+ */
108
+ intsToUuids(ints) {
109
+ const result = [];
110
+ for (const intId of ints) {
111
+ const uuid = this.intToUuid.get(intId);
112
+ if (uuid) {
113
+ result.push(uuid);
114
+ }
115
+ }
116
+ return result;
117
+ }
118
+ /**
119
+ * Convert iterable of integers to array of UUIDs (for roaring bitmap iteration)
120
+ */
121
+ intsIterableToUuids(ints) {
122
+ const result = [];
123
+ for (const intId of ints) {
124
+ const uuid = this.intToUuid.get(intId);
125
+ if (uuid) {
126
+ result.push(uuid);
127
+ }
128
+ }
129
+ return result;
130
+ }
131
+ /**
132
+ * Flush mappings to storage
133
+ */
134
+ async flush() {
135
+ if (!this.dirty) {
136
+ return;
137
+ }
138
+ // Convert maps to plain objects for serialization
139
+ const data = {
140
+ nextId: this.nextId,
141
+ uuidToInt: Object.fromEntries(this.uuidToInt),
142
+ intToUuid: Object.fromEntries(this.intToUuid)
143
+ };
144
+ await this.storage.saveMetadata(this.storageKey, data);
145
+ this.dirty = false;
146
+ }
147
+ /**
148
+ * Clear all mappings
149
+ */
150
+ async clear() {
151
+ this.uuidToInt.clear();
152
+ this.intToUuid.clear();
153
+ this.nextId = 1;
154
+ this.dirty = true;
155
+ await this.flush();
156
+ }
157
+ /**
158
+ * Get statistics about the mapper
159
+ */
160
+ getStats() {
161
+ return {
162
+ mappings: this.uuidToInt.size,
163
+ nextId: this.nextId,
164
+ dirty: this.dirty,
165
+ memoryEstimate: this.uuidToInt.size * (36 + 8 + 4 + 8) // uuid string + map overhead + int + map overhead
166
+ };
167
+ }
168
+ }
169
+ //# sourceMappingURL=entityIdMapper.js.map
@@ -29,6 +29,10 @@ export interface MetadataIndexConfig {
29
29
  indexedFields?: string[];
30
30
  excludeFields?: string[];
31
31
  }
32
+ /**
33
+ * Manages metadata indexes for fast filtering
34
+ * Maintains inverted indexes: field+value -> list of IDs
35
+ */
32
36
  interface CardinalityInfo {
33
37
  uniqueValues: number;
34
38
  totalValues: number;
@@ -42,22 +46,18 @@ interface FieldStats {
42
46
  rangeQueryCount: number;
43
47
  exactQueryCount: number;
44
48
  avgQueryTime: number;
45
- indexType: 'hash' | 'sorted' | 'both';
49
+ indexType: 'hash';
46
50
  normalizationStrategy?: 'none' | 'precision' | 'bucket';
47
51
  }
48
52
  export declare class MetadataIndexManager {
49
53
  private storage;
50
54
  private config;
51
- private indexCache;
52
- private dirtyEntries;
53
55
  private isRebuilding;
54
56
  private metadataCache;
55
57
  private fieldIndexes;
56
58
  private dirtyFields;
57
59
  private lastFlushTime;
58
60
  private autoFlushThreshold;
59
- private sortedIndices;
60
- private numericFields;
61
61
  private fieldStats;
62
62
  private cardinalityUpdateInterval;
63
63
  private operationCount;
@@ -70,7 +70,16 @@ export declare class MetadataIndexManager {
70
70
  private activeLocks;
71
71
  private lockPromises;
72
72
  private lockTimers;
73
+ private sparseIndices;
74
+ private chunkManager;
75
+ private chunkingStrategy;
76
+ private idMapper;
73
77
  constructor(storage: StorageAdapter, config?: MetadataIndexConfig);
78
+ /**
79
+ * Initialize the metadata index manager
80
+ * This must be called after construction and before any queries
81
+ */
82
+ init(): Promise<void>;
74
83
  /**
75
84
  * Acquire an in-memory lock for coordinating concurrent metadata index writes
76
85
  * Uses in-memory locks since MetadataIndexManager doesn't have direct file system access
@@ -92,60 +101,68 @@ export declare class MetadataIndexManager {
92
101
  */
93
102
  private lazyLoadCounts;
94
103
  /**
95
- * Get index key for field and value
96
- */
97
- private getIndexKey;
98
- /**
99
- * Ensure sorted index exists for a field (for range queries)
100
- */
101
- private ensureSortedIndex;
102
- /**
103
- * Build sorted index for a field from hash index
104
+ * Update cardinality statistics for a field
104
105
  */
105
- private buildSortedIndex;
106
+ private updateCardinalityStats;
106
107
  /**
107
- * Detect field type from value
108
+ * Analyze field distribution for optimization
108
109
  */
109
- private detectFieldType;
110
+ private analyzeFieldDistribution;
110
111
  /**
111
- * Compare two values based on field type for sorting
112
+ * Update index strategy based on field statistics
112
113
  */
113
- private compareValues;
114
+ private updateIndexStrategy;
114
115
  /**
115
- * Binary search to find insertion position for a value
116
- * Returns the index where the value should be inserted to maintain sorted order
116
+ * Load sparse index from storage
117
117
  */
118
- private findInsertPosition;
118
+ private loadSparseIndex;
119
119
  /**
120
- * Incrementally update sorted index when adding an ID
120
+ * Save sparse index to storage
121
121
  */
122
- private updateSortedIndexAdd;
122
+ private saveSparseIndex;
123
123
  /**
124
- * Incrementally update sorted index when removing an ID
124
+ * Get IDs for a value using chunked sparse index with roaring bitmaps (v3.43.0)
125
125
  */
126
- private updateSortedIndexRemove;
126
+ private getIdsFromChunks;
127
127
  /**
128
- * Binary search for range start (inclusive or exclusive)
128
+ * Get IDs for a range using chunked sparse index with zone maps and roaring bitmaps (v3.43.0)
129
129
  */
130
- private binarySearchStart;
130
+ private getIdsFromChunksForRange;
131
131
  /**
132
- * Binary search for range end (inclusive or exclusive)
132
+ * Get roaring bitmap for a field-value pair without converting to UUIDs (v3.43.0)
133
+ * This is used for fast multi-field intersection queries using hardware-accelerated bitmap AND
134
+ * @returns RoaringBitmap32 containing integer IDs, or null if no matches
133
135
  */
134
- private binarySearchEnd;
136
+ private getBitmapFromChunks;
135
137
  /**
136
- * Update cardinality statistics for a field
138
+ * Get IDs for multiple field-value pairs using fast roaring bitmap intersection (v3.43.0)
139
+ *
140
+ * This method provides 500-900x faster multi-field queries by:
141
+ * - Using hardware-accelerated bitmap AND operations (SIMD: AVX2/SSE4.2)
142
+ * - Avoiding intermediate UUID array allocations
143
+ * - Converting integers to UUIDs only once at the end
144
+ *
145
+ * Example: { status: 'active', role: 'admin', verified: true }
146
+ * Instead of: fetch 3 UUID arrays → convert to Sets → filter intersection
147
+ * We do: fetch 3 bitmaps → hardware AND → convert final bitmap to UUIDs
148
+ *
149
+ * @param fieldValuePairs Array of field-value pairs to intersect
150
+ * @returns Array of UUID strings matching ALL criteria
137
151
  */
138
- private updateCardinalityStats;
152
+ getIdsForMultipleFields(fieldValuePairs: Array<{
153
+ field: string;
154
+ value: any;
155
+ }>): Promise<string[]>;
139
156
  /**
140
- * Analyze field distribution for optimization
157
+ * Add value-ID mapping to chunked index
141
158
  */
142
- private analyzeFieldDistribution;
159
+ private addToChunkedIndex;
143
160
  /**
144
- * Update index strategy based on field statistics
161
+ * Remove ID from chunked index
145
162
  */
146
- private updateIndexStrategy;
163
+ private removeFromChunkedIndex;
147
164
  /**
148
- * Get IDs matching a range query
165
+ * Get IDs matching a range query using zone maps
149
166
  */
150
167
  private getIdsForRange;
151
168
  /**
@@ -193,7 +210,7 @@ export declare class MetadataIndexManager {
193
210
  */
194
211
  getAllIds(): Promise<string[]>;
195
212
  /**
196
- * Get IDs for a specific field-value combination with caching
213
+ * Get IDs for a specific field-value combination using chunked sparse index
197
214
  */
198
215
  getIds(field: string, value: any): Promise<string[]>;
199
216
  /**
@@ -223,6 +240,7 @@ export declare class MetadataIndexManager {
223
240
  getIdsForCriteria(criteria: Record<string, any>): Promise<string[]>;
224
241
  /**
225
242
  * Flush dirty entries to storage (non-blocking version)
243
+ * NOTE (v3.42.0): Sparse indices are flushed immediately in add/remove operations
226
244
  */
227
245
  flush(): Promise<void>;
228
246
  /**
@@ -238,14 +256,6 @@ export declare class MetadataIndexManager {
238
256
  * Save field index to storage with file locking
239
257
  */
240
258
  private saveFieldIndex;
241
- /**
242
- * Save sorted index to storage for range queries with file locking
243
- */
244
- private saveSortedIndex;
245
- /**
246
- * Load sorted index from storage
247
- */
248
- private loadSortedIndex;
249
259
  /**
250
260
  * Get count of entities by type - O(1) operation using existing tracking
251
261
  * This exposes the production-ready counting that's already maintained
@@ -260,7 +270,7 @@ export declare class MetadataIndexManager {
260
270
  */
261
271
  getAllEntityCounts(): Map<string, number>;
262
272
  /**
263
- * Get count of entities matching field-value criteria - O(1) lookup from existing indexes
273
+ * Get count of entities matching field-value criteria - queries chunked sparse index
264
274
  */
265
275
  getCountForCriteria(field: string, value: any): Promise<number>;
266
276
  /**
@@ -272,18 +282,6 @@ export declare class MetadataIndexManager {
272
282
  * Non-blocking version that yields control back to event loop
273
283
  */
274
284
  rebuild(): Promise<void>;
275
- /**
276
- * Load index entry from storage using safe filenames
277
- */
278
- private loadIndexEntry;
279
- /**
280
- * Save index entry to storage using safe filenames with file locking
281
- */
282
- private saveIndexEntry;
283
- /**
284
- * Delete index entry from storage using safe filenames
285
- */
286
- private deleteIndexEntry;
287
285
  /**
288
286
  * Get field statistics for optimization and discovery
289
287
  */