jexidb 2.0.3 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/.babelrc +13 -0
  2. package/.gitattributes +2 -0
  3. package/CHANGELOG.md +132 -101
  4. package/LICENSE +21 -21
  5. package/README.md +301 -639
  6. package/babel.config.json +5 -0
  7. package/dist/Database.cjs +3896 -0
  8. package/docs/API.md +1051 -390
  9. package/docs/EXAMPLES.md +701 -177
  10. package/docs/README.md +194 -184
  11. package/examples/iterate-usage-example.js +157 -0
  12. package/examples/simple-iterate-example.js +115 -0
  13. package/jest.config.js +24 -0
  14. package/package.json +63 -54
  15. package/scripts/README.md +47 -0
  16. package/scripts/clean-test-files.js +75 -0
  17. package/scripts/prepare.js +31 -0
  18. package/scripts/run-tests.js +80 -0
  19. package/src/Database.mjs +4130 -0
  20. package/src/FileHandler.mjs +1101 -0
  21. package/src/OperationQueue.mjs +279 -0
  22. package/src/SchemaManager.mjs +268 -0
  23. package/src/Serializer.mjs +511 -0
  24. package/src/managers/ConcurrencyManager.mjs +257 -0
  25. package/src/managers/IndexManager.mjs +1403 -0
  26. package/src/managers/QueryManager.mjs +1273 -0
  27. package/src/managers/StatisticsManager.mjs +262 -0
  28. package/src/managers/StreamingProcessor.mjs +429 -0
  29. package/src/managers/TermManager.mjs +278 -0
  30. package/test/$not-operator-with-and.test.js +282 -0
  31. package/test/README.md +8 -0
  32. package/test/close-init-cycle.test.js +256 -0
  33. package/test/critical-bugs-fixes.test.js +1069 -0
  34. package/test/index-persistence.test.js +306 -0
  35. package/test/index-serialization.test.js +314 -0
  36. package/test/indexed-query-mode.test.js +360 -0
  37. package/test/iterate-method.test.js +272 -0
  38. package/test/query-operators.test.js +238 -0
  39. package/test/regex-array-fields.test.js +129 -0
  40. package/test/score-method.test.js +238 -0
  41. package/test/setup.js +17 -0
  42. package/test/term-mapping-minimal.test.js +154 -0
  43. package/test/term-mapping-simple.test.js +257 -0
  44. package/test/term-mapping.test.js +514 -0
  45. package/test/writebuffer-flush-resilience.test.js +204 -0
  46. package/dist/FileHandler.js +0 -688
  47. package/dist/IndexManager.js +0 -353
  48. package/dist/IntegrityChecker.js +0 -364
  49. package/dist/JSONLDatabase.js +0 -1333
  50. package/dist/index.js +0 -617
  51. package/docs/MIGRATION.md +0 -295
  52. package/examples/auto-save-example.js +0 -158
  53. package/examples/cjs-usage.cjs +0 -82
  54. package/examples/close-vs-delete-example.js +0 -71
  55. package/examples/esm-usage.js +0 -113
  56. package/examples/example-columns.idx.jdb +0 -0
  57. package/examples/example-columns.jdb +0 -9
  58. package/examples/example-options.idx.jdb +0 -0
  59. package/examples/example-options.jdb +0 -0
  60. package/examples/example-users.idx.jdb +0 -0
  61. package/examples/example-users.jdb +0 -5
  62. package/examples/simple-test.js +0 -55
  63. package/src/FileHandler.js +0 -674
  64. package/src/IndexManager.js +0 -363
  65. package/src/IntegrityChecker.js +0 -379
  66. package/src/JSONLDatabase.js +0 -1391
  67. package/src/index.js +0 -608
@@ -0,0 +1,1403 @@
1
+ import { Mutex } from 'async-mutex'
2
+
3
+ export default class IndexManager {
4
+ constructor(opts, databaseMutex = null, database = null) {
5
+ this.opts = Object.assign({}, opts)
6
+ this.index = Object.assign({data: {}}, this.opts.index)
7
+ this.totalLines = 0
8
+ this.rangeThreshold = 10 // Sensible threshold: 10+ consecutive numbers justify ranges
9
+ this.binarySearchThreshold = 32 // Much higher for better performance
10
+ this.database = database // Reference to database for term manager access
11
+
12
+ // CRITICAL: Use database mutex to prevent deadlocks
13
+ // If no database mutex provided, create a local one (for backward compatibility)
14
+ this.mutex = databaseMutex || new Mutex()
15
+
16
+ // Initialize empty structures for each field ONLY if not already present
17
+ if (this.opts.indexes) {
18
+ this.indexedFields = Object.keys(this.opts.indexes)
19
+ Object.keys(this.opts.indexes).forEach(field => {
20
+ if (!this.index.data[field]) {
21
+ this.index.data[field] = {}
22
+ }
23
+ })
24
+ } else {
25
+ this.indexedFields = []
26
+ }
27
+ }
28
+
29
+ setTotalLines(total) {
30
+ this.totalLines = total
31
+ }
32
+
33
+ // Ultra-fast range conversion - only for very large datasets
34
+ _toRanges(numbers) {
35
+ if (numbers.length === 0) return []
36
+ if (numbers.length < this.rangeThreshold) return numbers // Keep as-is for small arrays
37
+
38
+ const sorted = numbers.sort((a, b) => a - b) // Sort in-place
39
+ const ranges = []
40
+ let start = sorted[0]
41
+ let count = 1
42
+
43
+ for (let i = 1; i < sorted.length; i++) {
44
+ if (sorted[i] === sorted[i-1] + 1) {
45
+ count++
46
+ } else {
47
+ // End of consecutive sequence
48
+ if (count >= this.rangeThreshold) {
49
+ ranges.push({start, count})
50
+ } else {
51
+ // Add individual numbers for small sequences
52
+ for (let j = start; j < start + count; j++) {
53
+ ranges.push(j)
54
+ }
55
+ }
56
+ start = sorted[i]
57
+ count = 1
58
+ }
59
+ }
60
+
61
+ // Handle last sequence
62
+ if (count >= this.rangeThreshold) {
63
+ ranges.push({start, count})
64
+ } else {
65
+ for (let j = start; j < start + count; j++) {
66
+ ranges.push(j)
67
+ }
68
+ }
69
+
70
+ return ranges
71
+ }
72
+
73
+ // Ultra-fast range expansion
74
+ _fromRanges(ranges) {
75
+ if (!ranges || ranges.length === 0) return []
76
+
77
+ const numbers = []
78
+ for (const item of ranges) {
79
+ if (typeof item === 'object' && item.start !== undefined) {
80
+ // It's a range - use direct loop for maximum speed
81
+ const end = item.start + item.count
82
+ for (let i = item.start; i < end; i++) {
83
+ numbers.push(i)
84
+ }
85
+ } else {
86
+ // It's an individual number
87
+ numbers.push(item)
88
+ }
89
+ }
90
+ return numbers
91
+ }
92
+
93
+ // Ultra-fast lookup - optimized for Set operations
94
+ _hasLineNumber(hybridData, lineNumber) {
95
+ if (!hybridData) return false
96
+
97
+ // Check in Set first (O(1)) - most common case
98
+ if (hybridData.set && hybridData.set.has(lineNumber)) {
99
+ return true
100
+ }
101
+
102
+ // Check in ranges only if necessary
103
+ if (hybridData.ranges && hybridData.ranges.length > 0) {
104
+ return this._searchInRanges(hybridData.ranges, lineNumber)
105
+ }
106
+
107
+ return false
108
+ }
109
+
110
+ // Optimized search strategy
111
+ _searchInRanges(ranges, lineNumber) {
112
+ if (ranges.length < this.binarySearchThreshold) {
113
+ // Linear search for small ranges
114
+ return this._linearSearchRanges(ranges, lineNumber)
115
+ } else {
116
+ // Binary search for large ranges
117
+ return this._binarySearchRanges(ranges, lineNumber)
118
+ }
119
+ }
120
+
121
+ // Ultra-fast linear search
122
+ _linearSearchRanges(ranges, lineNumber) {
123
+ for (const item of ranges) {
124
+ if (typeof item === 'object' && item.start !== undefined) {
125
+ // It's a range
126
+ if (lineNumber >= item.start && lineNumber < item.start + item.count) {
127
+ return true
128
+ }
129
+ } else if (item === lineNumber) {
130
+ // It's an individual number
131
+ return true
132
+ }
133
+ }
134
+ return false
135
+ }
136
+
137
+ // Optimized binary search
138
+ _binarySearchRanges(ranges, lineNumber) {
139
+ let left = 0
140
+ let right = ranges.length - 1
141
+
142
+ while (left <= right) {
143
+ const mid = Math.floor((left + right) / 2)
144
+ const range = ranges[mid]
145
+
146
+ if (typeof range === 'object' && range.start !== undefined) {
147
+ // It's a range
148
+ if (lineNumber >= range.start && lineNumber < range.start + range.count) {
149
+ return true
150
+ } else if (lineNumber < range.start) {
151
+ right = mid - 1
152
+ } else {
153
+ left = mid + 1
154
+ }
155
+ } else {
156
+ // It's an individual number
157
+ if (range === lineNumber) {
158
+ return true
159
+ } else if (range < lineNumber) {
160
+ left = mid + 1
161
+ } else {
162
+ right = mid - 1
163
+ }
164
+ }
165
+ }
166
+
167
+ return false
168
+ }
169
+
170
+ // Ultra-fast add operation - minimal overhead
171
+ _addLineNumber(hybridData, lineNumber) {
172
+ // Initialize structure if needed
173
+ if (!hybridData) {
174
+ hybridData = { set: new Set(), ranges: [] }
175
+ }
176
+
177
+ // Add to Set directly (fastest path)
178
+ if (!hybridData.set) {
179
+ hybridData.set = new Set()
180
+ }
181
+ hybridData.set.add(lineNumber)
182
+
183
+ // Optimize to ranges when Set gets reasonably large
184
+ if (hybridData.set.size >= this.rangeThreshold * 2) { // 20 elements
185
+ if (this.opts.debugMode) {
186
+ console.log(`🔧 Triggering range optimization: Set size ${hybridData.set.size} >= threshold ${this.rangeThreshold * 2}`)
187
+ }
188
+ this._optimizeToRanges(hybridData)
189
+ }
190
+
191
+ return hybridData
192
+ }
193
+
194
+ // Ultra-fast remove operation
195
+ _removeLineNumber(hybridData, lineNumber) {
196
+ if (!hybridData) {
197
+ return hybridData
198
+ }
199
+
200
+ // Remove from Set (fast path)
201
+ if (hybridData.set) {
202
+ hybridData.set.delete(lineNumber)
203
+ }
204
+
205
+ // Remove from ranges (less common)
206
+ if (hybridData.ranges) {
207
+ hybridData.ranges = this._removeFromRanges(hybridData.ranges, lineNumber)
208
+ }
209
+
210
+ return hybridData
211
+ }
212
+
213
+ // Optimized range removal
214
+ _removeFromRanges(ranges, lineNumber) {
215
+ if (!ranges || ranges.length === 0) return ranges
216
+
217
+ const newRanges = []
218
+
219
+ for (const item of ranges) {
220
+ if (typeof item === 'object' && item.start !== undefined) {
221
+ // It's a range
222
+ if (lineNumber >= item.start && lineNumber < item.start + item.count) {
223
+ // Split range if needed
224
+ if (lineNumber === item.start) {
225
+ // Remove first element
226
+ if (item.count > 1) {
227
+ newRanges.push({ start: item.start + 1, count: item.count - 1 })
228
+ }
229
+ } else if (lineNumber === item.start + item.count - 1) {
230
+ // Remove last element
231
+ if (item.count > 1) {
232
+ newRanges.push({ start: item.start, count: item.count - 1 })
233
+ }
234
+ } else {
235
+ // Remove from middle - split into two ranges
236
+ const beforeCount = lineNumber - item.start
237
+ const afterCount = item.count - beforeCount - 1
238
+
239
+ if (beforeCount >= this.rangeThreshold) {
240
+ newRanges.push({ start: item.start, count: beforeCount })
241
+ } else {
242
+ // Add individual numbers for small sequences
243
+ for (let i = item.start; i < lineNumber; i++) {
244
+ newRanges.push(i)
245
+ }
246
+ }
247
+
248
+ if (afterCount >= this.rangeThreshold) {
249
+ newRanges.push({ start: lineNumber + 1, count: afterCount })
250
+ } else {
251
+ // Add individual numbers for small sequences
252
+ for (let i = lineNumber + 1; i < item.start + item.count; i++) {
253
+ newRanges.push(i)
254
+ }
255
+ }
256
+ }
257
+ } else {
258
+ newRanges.push(item)
259
+ }
260
+ } else if (item !== lineNumber) {
261
+ // It's an individual number
262
+ newRanges.push(item)
263
+ }
264
+ }
265
+
266
+ return newRanges
267
+ }
268
+
269
+ // Ultra-lazy range conversion - only when absolutely necessary
270
+ _optimizeToRanges(hybridData) {
271
+ if (!hybridData.set || hybridData.set.size === 0) {
272
+ return
273
+ }
274
+
275
+ if (this.opts.debugMode) {
276
+ console.log(`🔧 Starting range optimization for Set with ${hybridData.set.size} elements`)
277
+ }
278
+
279
+ // Only convert if we have enough data to make it worthwhile
280
+ if (hybridData.set.size < this.rangeThreshold) {
281
+ return
282
+ }
283
+
284
+ // Convert Set to array and find consecutive sequences
285
+ const numbers = Array.from(hybridData.set).sort((a, b) => a - b)
286
+ const ranges = []
287
+
288
+ let start = numbers[0]
289
+ let count = 1
290
+
291
+ for (let i = 1; i < numbers.length; i++) {
292
+ if (numbers[i] === numbers[i-1] + 1) {
293
+ count++
294
+ } else {
295
+ // End of consecutive sequence
296
+ if (count >= this.rangeThreshold) {
297
+ ranges.push({start, count})
298
+ // Remove these numbers from Set
299
+ for (let j = start; j < start + count; j++) {
300
+ hybridData.set.delete(j)
301
+ }
302
+ }
303
+ start = numbers[i]
304
+ count = 1
305
+ }
306
+ }
307
+
308
+ // Handle last sequence
309
+ if (count >= this.rangeThreshold) {
310
+ ranges.push({start, count})
311
+ for (let j = start; j < start + count; j++) {
312
+ hybridData.set.delete(j)
313
+ }
314
+ }
315
+
316
+ // Add new ranges to existing ranges
317
+ if (ranges.length > 0) {
318
+ if (!hybridData.ranges) {
319
+ hybridData.ranges = []
320
+ }
321
+ hybridData.ranges.push(...ranges)
322
+ // Keep ranges sorted for efficient binary search
323
+ hybridData.ranges.sort((a, b) => {
324
+ const aStart = typeof a === 'object' ? a.start : a
325
+ const bStart = typeof b === 'object' ? b.start : b
326
+ return aStart - bStart
327
+ })
328
+ }
329
+ }
330
+
331
+ // Ultra-fast get all line numbers
332
+ _getAllLineNumbers(hybridData) {
333
+ if (!hybridData) return []
334
+
335
+ // Use generator for lazy evaluation and better memory efficiency
336
+ return Array.from(this._getAllLineNumbersGenerator(hybridData))
337
+ }
338
+
339
+ // OPTIMIZATION: Generator-based approach for better memory efficiency
340
+ *_getAllLineNumbersGenerator(hybridData) {
341
+ // Yield from Set (fastest path)
342
+ if (hybridData.set) {
343
+ for (const num of hybridData.set) {
344
+ yield num
345
+ }
346
+ }
347
+
348
+ // Yield from ranges (optimized)
349
+ if (hybridData.ranges) {
350
+ for (const item of hybridData.ranges) {
351
+ if (typeof item === 'object' && item.start !== undefined) {
352
+ // It's a range - use direct loop for better performance
353
+ const end = item.start + item.count
354
+ for (let i = item.start; i < end; i++) {
355
+ yield i
356
+ }
357
+ } else {
358
+ // It's an individual number
359
+ yield item
360
+ }
361
+ }
362
+ }
363
+ }
364
+
365
+ // OPTIMIZATION 6: Ultra-fast add operation with incremental index updates
366
+ async add(row, lineNumber) {
367
+ if (typeof row !== 'object' || !row) {
368
+ throw new Error('Invalid \'row\' parameter, it must be an object')
369
+ }
370
+ if (typeof lineNumber !== 'number') {
371
+ throw new Error('Invalid line number')
372
+ }
373
+
374
+ // OPTIMIZATION 6: Use direct field access with minimal operations
375
+ const data = this.index.data
376
+
377
+ // OPTIMIZATION 6: Pre-allocate field structures for better performance
378
+ const fields = Object.keys(this.opts.indexes || {})
379
+ for (const field of fields) {
380
+ const value = row[field]
381
+ if (value !== undefined && value !== null) {
382
+ // OPTIMIZATION 6: Initialize field structure if it doesn't exist
383
+ if (!data[field]) {
384
+ data[field] = {}
385
+ }
386
+
387
+ const values = Array.isArray(value) ? value : [value]
388
+ for (const val of values) {
389
+ let key
390
+
391
+ // Check if this is a term mapping field (array:string fields only)
392
+ const isTermMappingField = this.database?.termManager &&
393
+ this.database.termManager.termMappingFields &&
394
+ this.database.termManager.termMappingFields.includes(field)
395
+
396
+ if (isTermMappingField && typeof val === 'number') {
397
+ // For term mapping fields (array:string), the values are already term IDs
398
+ key = String(val)
399
+ if (this.database.opts.debugMode) {
400
+ console.log(`🔍 IndexManager.add: Using term ID ${val} directly for field "${field}"`)
401
+ }
402
+ } else if (isTermMappingField && typeof val === 'string') {
403
+ // For term mapping fields (array:string), convert string to term ID
404
+ const termId = this.database.termManager.getTermIdWithoutIncrement(val)
405
+ key = String(termId)
406
+ if (this.database.opts.debugMode) {
407
+ console.log(`🔍 IndexManager.add: Using term ID ${termId} for term "${val}"`)
408
+ }
409
+ } else {
410
+ // For non-term-mapping fields (including array:number), use values directly
411
+ key = String(val)
412
+ if (this.database?.opts?.debugMode) {
413
+ console.log(`🔍 IndexManager.add: Using value "${val}" directly for field "${field}"`)
414
+ }
415
+ }
416
+
417
+ // OPTIMIZATION 6: Use direct assignment for better performance
418
+ if (!data[field][key]) {
419
+ data[field][key] = { set: new Set(), ranges: [] }
420
+ }
421
+
422
+ // OPTIMIZATION 6: Direct Set operation - fastest possible
423
+ data[field][key].set.add(lineNumber)
424
+
425
+ // OPTIMIZATION 6: Lazy range optimization - only when beneficial
426
+ if (data[field][key].set.size >= this.rangeThreshold * 3) {
427
+ this._optimizeToRanges(data[field][key])
428
+ }
429
+ }
430
+ }
431
+ }
432
+ }
433
+
434
+ /**
435
+ * OPTIMIZATION 6: Add multiple records to the index in batch with optimized operations
436
+ * @param {Array} records - Records to add
437
+ * @param {number} startLineNumber - Starting line number
438
+ */
439
+ async addBatch(records, startLineNumber) {
440
+ if (!records || !records.length) return
441
+
442
+ // OPTIMIZATION 6: Pre-allocate index structures for better performance
443
+ const data = this.index.data
444
+ const fields = Object.keys(this.opts.indexes || {})
445
+
446
+ for (const field of fields) {
447
+ if (!data[field]) {
448
+ data[field] = {}
449
+ }
450
+ }
451
+
452
+ // OPTIMIZATION 6: Use Map for batch processing to reduce lookups
453
+ const fieldUpdates = new Map()
454
+
455
+ // OPTIMIZATION 6: Process all records in batch with optimized data structures
456
+ for (let i = 0; i < records.length; i++) {
457
+ const row = records[i]
458
+ const lineNumber = startLineNumber + i
459
+
460
+ for (const field of fields) {
461
+ const value = row[field]
462
+ if (value !== undefined && value !== null) {
463
+ const values = Array.isArray(value) ? value : [value]
464
+ for (const val of values) {
465
+ let key
466
+
467
+ // Check if this is a term mapping field (array:string fields only)
468
+ const isTermMappingField = this.database?.termManager &&
469
+ this.database.termManager.termMappingFields &&
470
+ this.database.termManager.termMappingFields.includes(field)
471
+
472
+ if (isTermMappingField && typeof val === 'number') {
473
+ // For term mapping fields (array:string), the values are already term IDs
474
+ key = String(val)
475
+ if (this.database.opts.debugMode) {
476
+ console.log(`🔍 IndexManager.addBatch: Using term ID ${val} directly for field "${field}"`)
477
+ }
478
+ } else if (isTermMappingField && typeof val === 'string') {
479
+ // For term mapping fields (array:string), convert string to term ID
480
+ const termId = this.database.termManager.getTermIdWithoutIncrement(val)
481
+ key = String(termId)
482
+ if (this.database.opts.debugMode) {
483
+ console.log(`🔍 IndexManager.addBatch: Using term ID ${termId} for term "${val}"`)
484
+ }
485
+ } else {
486
+ // For non-term-mapping fields (including array:number), use values directly
487
+ key = String(val)
488
+ if (this.database?.opts?.debugMode) {
489
+ console.log(`🔍 IndexManager.addBatch: Using value "${val}" directly for field "${field}"`)
490
+ }
491
+ }
492
+
493
+ // OPTIMIZATION 6: Use Map for efficient batch updates
494
+ if (!fieldUpdates.has(field)) {
495
+ fieldUpdates.set(field, new Map())
496
+ }
497
+
498
+ const fieldMap = fieldUpdates.get(field)
499
+ if (!fieldMap.has(key)) {
500
+ fieldMap.set(key, new Set())
501
+ }
502
+
503
+ fieldMap.get(key).add(lineNumber)
504
+ }
505
+ }
506
+ }
507
+ }
508
+
509
+ // OPTIMIZATION 6: Apply all updates in batch for better performance
510
+ for (const [field, fieldMap] of fieldUpdates) {
511
+ for (const [key, lineNumbers] of fieldMap) {
512
+ if (!data[field][key]) {
513
+ data[field][key] = { set: new Set(), ranges: [] }
514
+ }
515
+
516
+ // OPTIMIZATION 6: Add all line numbers at once
517
+ for (const lineNumber of lineNumbers) {
518
+ data[field][key].set.add(lineNumber)
519
+ }
520
+
521
+ // OPTIMIZATION 6: Lazy range optimization - only when beneficial
522
+ if (data[field][key].set.size >= this.rangeThreshold * 3) {
523
+ this._optimizeToRanges(data[field][key])
524
+ }
525
+ }
526
+ }
527
+ }
528
+
529
+ // Ultra-fast dry remove
530
+ dryRemove(ln) {
531
+ const data = this.index.data
532
+ for (const field in data) {
533
+ for (const value in data[field]) {
534
+ // Direct Set operation - fastest possible
535
+ if (data[field][value].set) {
536
+ data[field][value].set.delete(ln)
537
+ }
538
+ if (data[field][value].ranges) {
539
+ data[field][value].ranges = this._removeFromRanges(data[field][value].ranges, ln)
540
+ }
541
+ // Remove empty entries
542
+ if ((!data[field][value].set || data[field][value].set.size === 0) &&
543
+ (!data[field][value].ranges || data[field][value].ranges.length === 0)) {
544
+ delete data[field][value]
545
+ }
546
+ }
547
+ }
548
+ }
549
+
550
+
551
+ // Cleanup method to free memory
552
+ cleanup() {
553
+ const data = this.index.data
554
+ for (const field in data) {
555
+ for (const value in data[field]) {
556
+ if (data[field][value].set) {
557
+ if (typeof data[field][value].set.clearAll === 'function') {
558
+ data[field][value].set.clearAll()
559
+ } else if (typeof data[field][value].set.clear === 'function') {
560
+ data[field][value].set.clear()
561
+ }
562
+ }
563
+ if (data[field][value].ranges) {
564
+ data[field][value].ranges.length = 0
565
+ }
566
+ }
567
+ // Clear the entire field
568
+ data[field] = {}
569
+ }
570
+ // Clear all data
571
+ this.index.data = {}
572
+ this.totalLines = 0
573
+ }
574
+
575
+ // Clear all indexes
576
+ clear() {
577
+ this.index.data = {}
578
+ this.totalLines = 0
579
+ }
580
+
581
+
582
+
583
+ // Update a record in the index
584
+ async update(oldRecord, newRecord, lineNumber = null) {
585
+ if (!oldRecord || !newRecord) return
586
+
587
+ // Remove old record by ID
588
+ await this.remove([oldRecord.id])
589
+
590
+ // Add new record with provided line number or use hash of the ID
591
+ const actualLineNumber = lineNumber !== null ? lineNumber : this._getIdAsNumber(newRecord.id)
592
+ await this.add(newRecord, actualLineNumber)
593
+ }
594
+
595
+ // Convert string ID to number for line number
596
+ _getIdAsNumber(id) {
597
+ if (typeof id === 'number') return id
598
+ if (typeof id === 'string') {
599
+ // Simple hash function to convert string to number
600
+ let hash = 0
601
+ for (let i = 0; i < id.length; i++) {
602
+ const char = id.charCodeAt(i)
603
+ hash = ((hash << 5) - hash) + char
604
+ hash = hash & hash // Convert to 32-bit integer
605
+ }
606
+ return Math.abs(hash)
607
+ }
608
+ return 0
609
+ }
610
+
611
+ // Remove a record from the index
612
+ async remove(record) {
613
+ if (!record) return
614
+
615
+ // If record is an array of line numbers, use the original method
616
+ if (Array.isArray(record)) {
617
+ return this._removeLineNumbers(record)
618
+ }
619
+
620
+ // If record is an object, remove by record data
621
+ if (typeof record === 'object' && record.id) {
622
+ return this._removeRecord(record)
623
+ }
624
+ }
625
+
626
+ // Remove a specific record from the index
627
+ _removeRecord(record) {
628
+ if (!record) return
629
+
630
+ const data = this.index.data
631
+ for (const field in data) {
632
+ if (record[field] !== undefined && record[field] !== null) {
633
+ const values = Array.isArray(record[field]) ? record[field] : [record[field]]
634
+ for (const val of values) {
635
+ let key
636
+
637
+ // Check if this is a term mapping field (array:string fields only)
638
+ const isTermMappingField = this.database?.termManager &&
639
+ this.database.termManager.termMappingFields &&
640
+ this.database.termManager.termMappingFields.includes(field)
641
+
642
+ if (isTermMappingField && typeof val === 'number') {
643
+ // For term mapping fields (array:string), the values are already term IDs
644
+ key = String(val)
645
+ if (this.database.opts.debugMode) {
646
+ console.log(`🔍 IndexManager._removeRecord: Using term ID ${val} directly for field "${field}"`)
647
+ }
648
+ } else if (isTermMappingField && typeof val === 'string') {
649
+ // For term mapping fields (array:string), convert string to term ID
650
+ const termId = this.database.termManager.getTermIdWithoutIncrement(val)
651
+ key = String(termId)
652
+ if (this.database.opts.debugMode) {
653
+ console.log(`🔍 IndexManager._removeRecord: Using term ID ${termId} for term "${val}"`)
654
+ }
655
+ } else {
656
+ // For non-term-mapping fields (including array:number), use values directly
657
+ key = String(val)
658
+ if (this.database?.opts?.debugMode) {
659
+ console.log(`🔍 IndexManager._removeRecord: Using value "${val}" directly for field "${field}"`)
660
+ }
661
+ }
662
+
663
+ // Note: TermManager notification is handled by Database.mjs
664
+ // to avoid double decrementation during updates
665
+
666
+ if (data[field][key]) {
667
+ const lineNumbers = this._getAllLineNumbers(data[field][key])
668
+ // Find and remove the specific record's line number
669
+ const recordLineNumber = this._getIdAsNumber(record.id)
670
+ const filteredLineNumbers = lineNumbers.filter(ln => ln !== recordLineNumber)
671
+
672
+ if (filteredLineNumbers.length === 0) {
673
+ delete data[field][key]
674
+ } else {
675
+ // Rebuild the index value with filtered line numbers
676
+ data[field][key].set = new Set(filteredLineNumbers)
677
+ data[field][key].ranges = []
678
+ }
679
+ }
680
+ }
681
+ }
682
+ }
683
+ }
684
+
685
+ // Ultra-fast remove with batch processing (renamed from remove)
686
+ _removeLineNumbers(lineNumbers) {
687
+ if (!lineNumbers || lineNumbers.length === 0) return
688
+
689
+ lineNumbers.sort((a, b) => a - b) // Sort ascending for efficient processing
690
+
691
+ const data = this.index.data
692
+ for (const field in data) {
693
+ for (const value in data[field]) {
694
+ const numbers = this._getAllLineNumbers(data[field][value])
695
+ const newNumbers = []
696
+
697
+ for (const ln of numbers) {
698
+ let offset = 0
699
+ for (const lineNumber of lineNumbers) {
700
+ if (lineNumber < ln) {
701
+ offset++
702
+ } else if (lineNumber === ln) {
703
+ offset = -1 // Mark for removal
704
+ break
705
+ }
706
+ }
707
+ if (offset >= 0) {
708
+ newNumbers.push(ln - offset) // Update the value
709
+ }
710
+ }
711
+
712
+ if (newNumbers.length > 0) {
713
+ // Rebuild hybrid structure with new numbers
714
+ data[field][value] = { set: new Set(), ranges: [] }
715
+ for (const num of newNumbers) {
716
+ data[field][value] = this._addLineNumber(data[field][value], num)
717
+ }
718
+ } else {
719
+ delete data[field][value]
720
+ }
721
+ }
722
+ }
723
+ }
724
+
725
+ // Ultra-fast replace with batch processing
726
+ replace(map) {
727
+ if (!map || map.size === 0) return
728
+
729
+ const data = this.index.data
730
+ for (const field in data) {
731
+ for (const value in data[field]) {
732
+ const numbers = this._getAllLineNumbers(data[field][value])
733
+ const newNumbers = []
734
+
735
+ for (const lineNumber of numbers) {
736
+ if (map.has(lineNumber)) {
737
+ newNumbers.push(map.get(lineNumber))
738
+ } else {
739
+ newNumbers.push(lineNumber)
740
+ }
741
+ }
742
+
743
+ // Rebuild hybrid structure with new numbers
744
+ data[field][value] = { set: new Set(), ranges: [] }
745
+ for (const num of newNumbers) {
746
+ data[field][value] = this._addLineNumber(data[field][value], num)
747
+ }
748
+ }
749
+ }
750
+ }
751
+
752
+ // Ultra-fast query with early exit and smart processing
753
+ query(criteria, options = {}) {
754
+ if (typeof options === 'boolean') {
755
+ options = { matchAny: options };
756
+ }
757
+ const { matchAny = false, caseInsensitive = false } = options;
758
+
759
+ if (!criteria) {
760
+ // Return all line numbers when no criteria provided
761
+ return new Set(Array.from({ length: this.totalLines || 0 }, (_, i) => i));
762
+ }
763
+
764
+ // Handle $not operator
765
+ if (criteria.$not && typeof criteria.$not === 'object') {
766
+ // Get all possible line numbers from database offsets or totalLines
767
+ const totalRecords = this.database?.offsets?.length || this.totalLines || 0;
768
+ const allLines = new Set(Array.from({ length: totalRecords }, (_, i) => i));
769
+
770
+ // Get line numbers matching the $not condition
771
+ const notLines = this.query(criteria.$not, options);
772
+
773
+ // Return complement (all lines except those matching $not condition)
774
+ const result = new Set([...allLines].filter(x => !notLines.has(x)));
775
+
776
+ // If there are other conditions besides $not, we need to intersect with them
777
+ const otherCriteria = { ...criteria };
778
+ delete otherCriteria.$not;
779
+
780
+ if (Object.keys(otherCriteria).length > 0) {
781
+ const otherResults = this.query(otherCriteria, options);
782
+ return new Set([...result].filter(x => otherResults.has(x)));
783
+ }
784
+
785
+ return result;
786
+ }
787
+
788
+ // Handle $and queries with parallel processing optimization
789
+ if (criteria.$and && Array.isArray(criteria.$and)) {
790
+ // OPTIMIZATION: Process conditions in parallel for better performance
791
+ if (criteria.$and.length > 1) {
792
+ // Process all conditions in parallel (synchronous since query is not async)
793
+ const conditionResults = criteria.$and.map(andCondition =>
794
+ this.query(andCondition, options)
795
+ );
796
+
797
+ // Intersect all results for AND logic
798
+ let result = conditionResults[0];
799
+ for (let i = 1; i < conditionResults.length; i++) {
800
+ result = new Set([...result].filter(x => conditionResults[i].has(x)));
801
+ }
802
+
803
+ // IMPORTANT: Check if there are other fields besides $and at the root level
804
+ // If so, we need to intersect with them too
805
+ const otherCriteria = { ...criteria };
806
+ delete otherCriteria.$and;
807
+
808
+ if (Object.keys(otherCriteria).length > 0) {
809
+ const otherResults = this.query(otherCriteria, options);
810
+ result = new Set([...result].filter(x => otherResults.has(x)));
811
+ }
812
+
813
+ return result || new Set();
814
+ } else {
815
+ // Single condition - check for other criteria at root level
816
+ const andResult = this.query(criteria.$and[0], options);
817
+
818
+ const otherCriteria = { ...criteria };
819
+ delete otherCriteria.$and;
820
+
821
+ if (Object.keys(otherCriteria).length > 0) {
822
+ const otherResults = this.query(otherCriteria, options);
823
+ return new Set([...andResult].filter(x => otherResults.has(x)));
824
+ }
825
+
826
+ return andResult;
827
+ }
828
+ }
829
+
830
+ const fields = Object.keys(criteria);
831
+ if (!fields.length) {
832
+ // Return all line numbers when criteria is empty object
833
+ return new Set(Array.from({ length: this.totalLines || 0 }, (_, i) => i));
834
+ }
835
+
836
+ let matchingLines = matchAny ? new Set() : null;
837
+ const data = this.index.data
838
+
839
+ for (const field of fields) {
840
+ // Skip logical operators - they are handled separately
841
+ if (field.startsWith('$')) continue;
842
+
843
+ if (typeof data[field] === 'undefined') continue;
844
+
845
+ const criteriaValue = criteria[field];
846
+ let lineNumbersForField = new Set();
847
+ const isNumericField = this.opts.indexes[field] === 'number';
848
+
849
+ // Handle RegExp values directly (MUST check before object check since RegExp is an object)
850
+ if (criteriaValue instanceof RegExp) {
851
+ // RegExp cannot be efficiently queried using indices - fall back to streaming
852
+ // This will be handled by the QueryManager's streaming strategy
853
+ continue;
854
+ }
855
+
856
+ if (typeof criteriaValue === 'object' && !Array.isArray(criteriaValue)) {
857
+ const fieldIndex = data[field];
858
+
859
+ // Handle $in operator for array queries
860
+ if (criteriaValue.$in !== undefined) {
861
+ const inValues = Array.isArray(criteriaValue.$in) ? criteriaValue.$in : [criteriaValue.$in];
862
+ for (const inValue of inValues) {
863
+ // SPACE OPTIMIZATION: Convert search term to term ID for lookup
864
+ let searchTermId
865
+
866
+ // Check if this is a term mapping field
867
+ const isTermMappingField = this.database?.termManager &&
868
+ this.database.termManager.termMappingFields &&
869
+ this.database.termManager.termMappingFields.includes(field)
870
+
871
+ if (isTermMappingField && typeof inValue === 'number') {
872
+ // For term mapping fields (array:string), the search value is already a term ID
873
+ searchTermId = String(inValue)
874
+ } else if (isTermMappingField && typeof inValue === 'string') {
875
+ // For term mapping fields (array:string), convert string to term ID
876
+ searchTermId = this.database?.termManager?.getTermIdWithoutIncrement(String(inValue)) || String(inValue)
877
+ } else {
878
+ // For non-term-mapping fields (including array:number), use values directly
879
+ searchTermId = String(inValue)
880
+ }
881
+
882
+ // Handle case-insensitive for $in
883
+ if (caseInsensitive && typeof inValue === 'string') {
884
+ for (const value in fieldIndex) {
885
+ if (value.toLowerCase() === searchTermId.toLowerCase()) {
886
+ const numbers = this._getAllLineNumbers(fieldIndex[value]);
887
+ for (const lineNumber of numbers) {
888
+ lineNumbersForField.add(lineNumber);
889
+ }
890
+ }
891
+ }
892
+ } else {
893
+ if (fieldIndex[searchTermId]) {
894
+ const numbers = this._getAllLineNumbers(fieldIndex[searchTermId]);
895
+ for (const lineNumber of numbers) {
896
+ lineNumbersForField.add(lineNumber);
897
+ }
898
+ }
899
+ }
900
+ }
901
+ }
902
+ // Handle $nin operator (not in) - returns complement of $in
903
+ else if (criteriaValue.$nin !== undefined) {
904
+ const ninValues = Array.isArray(criteriaValue.$nin) ? criteriaValue.$nin : [criteriaValue.$nin];
905
+
906
+ // Get all possible line numbers
907
+ const totalRecords = this.database?.offsets?.length || this.totalLines || 0;
908
+ const allLines = new Set(Array.from({ length: totalRecords }, (_, i) => i));
909
+
910
+ // Get line numbers that match any of the $nin values
911
+ const matchingLines = new Set();
912
+
913
+ for (const ninValue of ninValues) {
914
+ // SPACE OPTIMIZATION: Convert search term to term ID for lookup
915
+ let searchTermId
916
+
917
+ // Check if this is a term mapping field
918
+ const isTermMappingField = this.database?.termManager &&
919
+ this.database.termManager.termMappingFields &&
920
+ this.database.termManager.termMappingFields.includes(field)
921
+
922
+ if (isTermMappingField && typeof ninValue === 'number') {
923
+ // For term mapping fields (array:string), the search value is already a term ID
924
+ searchTermId = String(ninValue)
925
+ } else if (isTermMappingField && typeof ninValue === 'string') {
926
+ // For term mapping fields (array:string), convert string to term ID
927
+ searchTermId = this.database?.termManager?.getTermIdWithoutIncrement(String(ninValue)) || String(ninValue)
928
+ } else {
929
+ // For non-term-mapping fields (including array:number), use values directly
930
+ searchTermId = String(ninValue)
931
+ }
932
+
933
+ // Handle case-insensitive for $nin
934
+ if (caseInsensitive && typeof ninValue === 'string') {
935
+ for (const value in fieldIndex) {
936
+ if (value.toLowerCase() === searchTermId.toLowerCase()) {
937
+ const numbers = this._getAllLineNumbers(fieldIndex[value]);
938
+ for (const lineNumber of numbers) {
939
+ matchingLines.add(lineNumber);
940
+ }
941
+ }
942
+ }
943
+ } else {
944
+ if (fieldIndex[searchTermId]) {
945
+ const numbers = this._getAllLineNumbers(fieldIndex[searchTermId]);
946
+ for (const lineNumber of numbers) {
947
+ matchingLines.add(lineNumber);
948
+ }
949
+ }
950
+ }
951
+ }
952
+
953
+ // Return complement: all lines EXCEPT those matching $nin values
954
+ lineNumbersForField = new Set([...allLines].filter(x => !matchingLines.has(x)));
955
+ }
956
+ // Handle $contains operator for array queries
957
+ else if (criteriaValue.$contains !== undefined) {
958
+ const containsValue = criteriaValue.$contains;
959
+ // Handle case-insensitive for $contains
960
+ if (caseInsensitive && typeof containsValue === 'string') {
961
+ for (const value in fieldIndex) {
962
+ if (value.toLowerCase() === containsValue.toLowerCase()) {
963
+ const numbers = this._getAllLineNumbers(fieldIndex[value]);
964
+ for (const lineNumber of numbers) {
965
+ lineNumbersForField.add(lineNumber);
966
+ }
967
+ }
968
+ }
969
+ } else {
970
+ if (fieldIndex[containsValue]) {
971
+ const numbers = this._getAllLineNumbers(fieldIndex[containsValue]);
972
+ for (const lineNumber of numbers) {
973
+ lineNumbersForField.add(lineNumber);
974
+ }
975
+ }
976
+ }
977
+ }
978
+ // Handle $all operator for array queries - FIXED FOR TERM MAPPING
979
+ else if (criteriaValue.$all !== undefined) {
980
+ const allValues = Array.isArray(criteriaValue.$all) ? criteriaValue.$all : [criteriaValue.$all];
981
+
982
+ // Early exit optimization
983
+ if (allValues.length === 0) {
984
+ // Empty $all matches everything
985
+ for (const value in fieldIndex) {
986
+ const numbers = this._getAllLineNumbers(fieldIndex[value]);
987
+ for (const lineNumber of numbers) {
988
+ lineNumbersForField.add(lineNumber);
989
+ }
990
+ }
991
+ } else {
992
+ // For term mapping, we need to find records that contain ALL specified terms
993
+ // This requires a different approach than simple field matching
994
+
995
+ // First, get all line numbers that contain each individual term
996
+ const termLineNumbers = new Map();
997
+ for (const term of allValues) {
998
+ if (fieldIndex[term]) {
999
+ termLineNumbers.set(term, new Set(this._getAllLineNumbers(fieldIndex[term])));
1000
+ } else {
1001
+ // If any term doesn't exist, no records can match $all
1002
+ termLineNumbers.set(term, new Set());
1003
+ }
1004
+ }
1005
+
1006
+ // Find intersection of all term line numbers
1007
+ if (termLineNumbers.size > 0) {
1008
+ const allTermSets = Array.from(termLineNumbers.values());
1009
+ let intersection = allTermSets[0];
1010
+
1011
+ for (let i = 1; i < allTermSets.length; i++) {
1012
+ intersection = new Set([...intersection].filter(x => allTermSets[i].has(x)));
1013
+ }
1014
+
1015
+ // Add all line numbers from intersection
1016
+ for (const lineNumber of intersection) {
1017
+ lineNumbersForField.add(lineNumber);
1018
+ }
1019
+ }
1020
+ }
1021
+ }
1022
+ // Handle other operators
1023
+ else {
1024
+ for (const value in fieldIndex) {
1025
+ let includeValue = true;
1026
+ if (isNumericField) {
1027
+ const numericValue = parseFloat(value);
1028
+ if (!isNaN(numericValue)) {
1029
+ if (criteriaValue['>'] !== undefined && numericValue <= criteriaValue['>']) {
1030
+ includeValue = false;
1031
+ }
1032
+ if (criteriaValue['>='] !== undefined && numericValue < criteriaValue['>=']) {
1033
+ includeValue = false;
1034
+ }
1035
+ if (criteriaValue['<'] !== undefined && numericValue >= criteriaValue['<']) {
1036
+ includeValue = false;
1037
+ }
1038
+ if (criteriaValue['<='] !== undefined && numericValue > criteriaValue['<=']) {
1039
+ includeValue = false;
1040
+ }
1041
+ if (criteriaValue['!='] !== undefined) {
1042
+ const excludeValues = Array.isArray(criteriaValue['!='])
1043
+ ? criteriaValue['!=']
1044
+ : [criteriaValue['!=']];
1045
+ if (excludeValues.includes(numericValue)) {
1046
+ includeValue = false;
1047
+ }
1048
+ }
1049
+ }
1050
+ } else {
1051
+ if (criteriaValue['contains'] !== undefined && typeof value === 'string') {
1052
+ const term = String(criteriaValue['contains']);
1053
+ if (caseInsensitive) {
1054
+ if (!value.toLowerCase().includes(term.toLowerCase())) {
1055
+ includeValue = false;
1056
+ }
1057
+ } else {
1058
+ if (!value.includes(term)) {
1059
+ includeValue = false;
1060
+ }
1061
+ }
1062
+ }
1063
+ if (criteriaValue['regex'] !== undefined) {
1064
+ let regex;
1065
+ if (typeof criteriaValue['regex'] === 'string') {
1066
+ regex = new RegExp(criteriaValue['regex'], caseInsensitive ? 'i' : '');
1067
+ } else if (criteriaValue['regex'] instanceof RegExp) {
1068
+ if (caseInsensitive && !criteriaValue['regex'].ignoreCase) {
1069
+ const flags = criteriaValue['regex'].flags.includes('i')
1070
+ ? criteriaValue['regex'].flags
1071
+ : criteriaValue['regex'].flags + 'i';
1072
+ regex = new RegExp(criteriaValue['regex'].source, flags);
1073
+ } else {
1074
+ regex = criteriaValue['regex'];
1075
+ }
1076
+ }
1077
+ if (regex) {
1078
+ // For array fields, test regex against each element
1079
+ if (Array.isArray(value)) {
1080
+ if (!value.some(element => regex.test(String(element)))) {
1081
+ includeValue = false;
1082
+ }
1083
+ } else {
1084
+ // For non-array fields, test regex against the value directly
1085
+ if (!regex.test(String(value))) {
1086
+ includeValue = false;
1087
+ }
1088
+ }
1089
+ }
1090
+ }
1091
+ if (criteriaValue['!='] !== undefined) {
1092
+ const excludeValues = Array.isArray(criteriaValue['!='])
1093
+ ? criteriaValue['!=']
1094
+ : [criteriaValue['!=']];
1095
+ if (excludeValues.includes(value)) {
1096
+ includeValue = false;
1097
+ }
1098
+ }
1099
+ }
1100
+
1101
+ if (includeValue) {
1102
+ const numbers = this._getAllLineNumbers(fieldIndex[value]);
1103
+ for (const lineNumber of numbers) {
1104
+ lineNumbersForField.add(lineNumber);
1105
+ }
1106
+ }
1107
+ }
1108
+ }
1109
+ } else {
1110
+ // Simple equality comparison - handle array queries
1111
+ const values = Array.isArray(criteriaValue) ? criteriaValue : [criteriaValue];
1112
+ const fieldData = data[field];
1113
+ for (const searchValue of values) {
1114
+ // SPACE OPTIMIZATION: Convert search term to term ID for lookup
1115
+ let searchTermId
1116
+
1117
+ // Check if this is a term mapping field
1118
+ const isTermMappingField = this.database?.termManager &&
1119
+ this.database.termManager.termMappingFields &&
1120
+ this.database.termManager.termMappingFields.includes(field)
1121
+
1122
+ if (isTermMappingField && typeof searchValue === 'number') {
1123
+ // For term mapping fields (array:string), the search value is already a term ID
1124
+ searchTermId = String(searchValue)
1125
+ } else if (isTermMappingField && typeof searchValue === 'string') {
1126
+ // For term mapping fields (array:string), convert string to term ID
1127
+ searchTermId = this.database?.termManager?.getTermIdWithoutIncrement(String(searchValue)) || String(searchValue)
1128
+ } else {
1129
+ // For non-term-mapping fields (including array:number), use values directly
1130
+ searchTermId = String(searchValue)
1131
+ }
1132
+
1133
+ for (const key in fieldData) {
1134
+ let match = false;
1135
+ if (isNumericField) {
1136
+ // Convert both parts to number
1137
+ match = Number(key) === Number(searchValue);
1138
+ } else {
1139
+ // SPACE OPTIMIZATION: Compare term IDs instead of full terms
1140
+ if (caseInsensitive) {
1141
+ // For case-insensitive, we need to check if the search term ID matches any key
1142
+ match = key === String(searchTermId);
1143
+ } else {
1144
+ match = key === String(searchTermId);
1145
+ }
1146
+ }
1147
+ if (match) {
1148
+ const numbers = this._getAllLineNumbers(fieldData[key]);
1149
+ for (const lineNumber of numbers) {
1150
+ lineNumbersForField.add(lineNumber);
1151
+ }
1152
+ }
1153
+ }
1154
+ }
1155
+ }
1156
+
1157
+ // Consolidate results from each field
1158
+ if (matchAny) {
1159
+ matchingLines = new Set([...matchingLines, ...lineNumbersForField]);
1160
+ } else {
1161
+ if (matchingLines === null) {
1162
+ matchingLines = lineNumbersForField;
1163
+ } else {
1164
+ matchingLines = new Set([...matchingLines].filter(n => lineNumbersForField.has(n)));
1165
+ }
1166
+ if (!matchingLines.size) {
1167
+ return new Set();
1168
+ }
1169
+ }
1170
+ }
1171
+ return matchingLines || new Set();
1172
+ }
1173
+
1174
+ // Ultra-fast load with minimal conversions
1175
+ load(index) {
1176
+ // CRITICAL FIX: Check if index is already loaded by looking for actual data, not just empty field structures
1177
+ if (this.index && this.index.data) {
1178
+ let hasActualData = false
1179
+ for (const field in this.index.data) {
1180
+ const fieldData = this.index.data[field]
1181
+ if (fieldData && Object.keys(fieldData).length > 0) {
1182
+ // Check if any field has actual index entries with data
1183
+ for (const key in fieldData) {
1184
+ const entry = fieldData[key]
1185
+ if (entry && ((entry.set && entry.set.size > 0) || (entry.ranges && entry.ranges.length > 0))) {
1186
+ hasActualData = true
1187
+ break
1188
+ }
1189
+ }
1190
+ if (hasActualData) break
1191
+ }
1192
+ }
1193
+
1194
+ if (hasActualData) {
1195
+ if (this.opts.debugMode) {
1196
+ console.log('🔍 IndexManager.load: Index already loaded with actual data, skipping')
1197
+ }
1198
+ return
1199
+ }
1200
+ }
1201
+
1202
+ // CRITICAL FIX: Add comprehensive null/undefined validation
1203
+ if (!index || typeof index !== 'object') {
1204
+ if (this.opts.debugMode) {
1205
+ console.log(`🔍 IndexManager.load: Invalid index data provided (${typeof index}), using defaults`)
1206
+ }
1207
+ return this._initializeDefaults()
1208
+ }
1209
+
1210
+ if (!index.data || typeof index.data !== 'object') {
1211
+ if (this.opts.debugMode) {
1212
+ console.log(`🔍 IndexManager.load: Invalid index.data provided (${typeof index.data}), using defaults`)
1213
+ }
1214
+ return this._initializeDefaults()
1215
+ }
1216
+
1217
+ // CRITICAL FIX: Only log if there are actual fields to load
1218
+ if (this.opts.debugMode && Object.keys(index.data).length > 0) {
1219
+ console.log(`🔍 IndexManager.load: Loading index with fields: ${Object.keys(index.data).join(', ')}`)
1220
+ }
1221
+
1222
+ // Create a deep copy to avoid reference issues
1223
+ const processedIndex = {
1224
+ data: {}
1225
+ }
1226
+
1227
+ // CRITICAL FIX: Add null/undefined checks for field iteration
1228
+ const fields = Object.keys(index.data)
1229
+ for(const field of fields) {
1230
+ if (!field || typeof field !== 'string') {
1231
+ continue // Skip invalid field names
1232
+ }
1233
+
1234
+ const fieldData = index.data[field]
1235
+ if (!fieldData || typeof fieldData !== 'object') {
1236
+ continue // Skip invalid field data
1237
+ }
1238
+
1239
+ processedIndex.data[field] = {}
1240
+
1241
+ const terms = Object.keys(fieldData)
1242
+ for(const term of terms) {
1243
+ if (!term || typeof term !== 'string') {
1244
+ continue // Skip invalid term names
1245
+ }
1246
+
1247
+ const termData = fieldData[term]
1248
+
1249
+ // Convert various formats to new hybrid format
1250
+ if (Array.isArray(termData)) {
1251
+ // Check if it's the new compact format [setArray, rangesArray]
1252
+ if (termData.length === 2 && Array.isArray(termData[0]) && Array.isArray(termData[1])) {
1253
+ // New compact format: [setArray, rangesArray]
1254
+ // Convert ultra-compact ranges [start, count] back to {start, count}
1255
+ const ranges = termData[1].map(range => {
1256
+ if (Array.isArray(range) && range.length === 2) {
1257
+ // Ultra-compact format: [start, count]
1258
+ return { start: range[0], count: range[1] }
1259
+ } else {
1260
+ // Legacy format: {start, count}
1261
+ return range
1262
+ }
1263
+ })
1264
+ processedIndex.data[field][term] = {
1265
+ set: new Set(termData[0]),
1266
+ ranges: ranges
1267
+ }
1268
+ } else {
1269
+ // Legacy array format (just set data)
1270
+ processedIndex.data[field][term] = { set: new Set(termData), ranges: [] }
1271
+ }
1272
+ } else if (termData && typeof termData === 'object') {
1273
+ if (termData.set || termData.ranges) {
1274
+ // Legacy hybrid format - convert set array back to Set
1275
+ const hybridData = termData
1276
+ let setObject
1277
+ if (Array.isArray(hybridData.set)) {
1278
+ // Convert array back to Set
1279
+ setObject = new Set(hybridData.set)
1280
+ } else {
1281
+ // Fallback to empty Set
1282
+ setObject = new Set()
1283
+ }
1284
+ processedIndex.data[field][term] = {
1285
+ set: setObject,
1286
+ ranges: hybridData.ranges || []
1287
+ }
1288
+ } else {
1289
+ // Convert from Set format to hybrid
1290
+ const numbers = Array.from(termData || [])
1291
+ processedIndex.data[field][term] = { set: new Set(numbers), ranges: [] }
1292
+ }
1293
+ }
1294
+ }
1295
+ }
1296
+
1297
+ // Preserve initialized fields if no data was loaded
1298
+ if (!processedIndex.data || Object.keys(processedIndex.data).length === 0) {
1299
+ // CRITICAL FIX: Only log if debug mode is enabled and there are actual fields
1300
+ if (this.opts.debugMode && this.index.data && Object.keys(this.index.data).length > 0) {
1301
+ console.log(`🔍 IndexManager.load: No data loaded, preserving initialized fields: ${Object.keys(this.index.data).join(', ')}`)
1302
+ }
1303
+ // Keep the current index with initialized fields
1304
+ return
1305
+ }
1306
+
1307
+ this.index = processedIndex
1308
+ }
1309
+
1310
+ /**
1311
+ * CRITICAL FIX: Initialize default index structure when invalid data is provided
1312
+ * This prevents TypeError when Object.keys() is called on null/undefined
1313
+ */
1314
+ _initializeDefaults() {
1315
+ if (this.opts.debugMode) {
1316
+ console.log(`🔍 IndexManager._initializeDefaults: Initializing default index structure`)
1317
+ }
1318
+
1319
+ // Initialize empty index structure
1320
+ this.index = { data: {} }
1321
+
1322
+ // Initialize fields from options if available
1323
+ if (this.opts.indexes && typeof this.opts.indexes === 'object') {
1324
+ const fields = Object.keys(this.opts.indexes)
1325
+ for (const field of fields) {
1326
+ if (field && typeof field === 'string') {
1327
+ this.index.data[field] = {}
1328
+ }
1329
+ }
1330
+ }
1331
+
1332
+ if (this.opts.debugMode) {
1333
+ console.log(`🔍 IndexManager._initializeDefaults: Initialized with fields: ${Object.keys(this.index.data).join(', ')}`)
1334
+ }
1335
+ }
1336
+
1337
+ readColumnIndex(column) {
1338
+ return new Set((this.index.data && this.index.data[column]) ? Object.keys(this.index.data[column]) : [])
1339
+ }
1340
+
1341
+ /**
1342
+ * Convert index to JSON-serializable format for debugging and export
1343
+ * This resolves the issue where Sets appear as empty objects in JSON.stringify
1344
+ */
1345
+ toJSON() {
1346
+ const serializable = { data: {} }
1347
+
1348
+ for (const field in this.index.data) {
1349
+ serializable.data[field] = {}
1350
+
1351
+ for (const term in this.index.data[field]) {
1352
+ const hybridData = this.index.data[field][term]
1353
+
1354
+ // OPTIMIZATION: Create ranges before serialization if beneficial
1355
+ if (hybridData.set && hybridData.set.size >= this.rangeThreshold) {
1356
+ this._optimizeToRanges(hybridData)
1357
+ }
1358
+
1359
+ // Convert hybrid structure to serializable format
1360
+ let setArray = []
1361
+ if (hybridData.set) {
1362
+ if (typeof hybridData.set.size !== 'undefined') {
1363
+ // Regular Set
1364
+ setArray = Array.from(hybridData.set)
1365
+ }
1366
+ }
1367
+
1368
+ // Use ultra-compact format: [setArray, rangesArray] to save space
1369
+ const ranges = hybridData.ranges || []
1370
+ if (ranges.length > 0) {
1371
+ // Convert ranges to ultra-compact format: [start, count] instead of {start, count}
1372
+ const compactRanges = ranges.map(range => [range.start, range.count])
1373
+ serializable.data[field][term] = [setArray, compactRanges]
1374
+ } else {
1375
+ // CRITICAL FIX: Always use the [setArray, []] format for consistency
1376
+ // This ensures the load() method can properly deserialize the data
1377
+ serializable.data[field][term] = [setArray, []]
1378
+ }
1379
+ }
1380
+ }
1381
+
1382
+ return serializable
1383
+ }
1384
+
1385
+ /**
1386
+ * Get a JSON string representation of the index
1387
+ * This properly handles Sets unlike the default JSON.stringify
1388
+ */
1389
+ toString() {
1390
+ return JSON.stringify(this.toJSON(), null, 2)
1391
+ }
1392
+
1393
+ // Simplified term mapping methods - just basic functionality
1394
+
1395
+ /**
1396
+ * Rebuild index (stub for compatibility)
1397
+ */
1398
+ async rebuild() {
1399
+ // Stub implementation for compatibility
1400
+ return Promise.resolve()
1401
+ }
1402
+ }
1403
+