jexidb 2.1.0 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Database.cjs +1642 -334
- package/docs/API.md +1057 -1051
- package/package.json +1 -1
- package/scripts/benchmark-array-serialization.js +108 -0
- package/scripts/score-mode-demo.js +45 -0
- package/src/Database.mjs +1362 -167
- package/src/FileHandler.mjs +83 -44
- package/src/OperationQueue.mjs +23 -23
- package/src/Serializer.mjs +214 -23
- package/src/managers/IndexManager.mjs +778 -87
- package/src/managers/QueryManager.mjs +266 -49
- package/src/managers/TermManager.mjs +7 -7
- package/src/utils/operatorNormalizer.mjs +116 -0
- package/test/coverage-method.test.js +93 -0
- package/test/deserialize-corruption-fixes.test.js +296 -0
- package/test/exists-method.test.js +318 -0
- package/test/explicit-indexes-comparison.test.js +219 -0
- package/test/filehandler-non-adjacent-ranges-bug.test.js +175 -0
- package/test/index-line-number-regression.test.js +100 -0
- package/test/index-missing-index-data.test.js +91 -0
- package/test/index-persistence.test.js +205 -20
- package/test/insert-session-auto-flush.test.js +353 -0
- package/test/legacy-operator-compat.test.js +154 -0
- package/test/score-method.test.js +60 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { Mutex } from 'async-mutex'
|
|
2
|
+
import { normalizeCriteriaOperators } from '../utils/operatorNormalizer.mjs'
|
|
2
3
|
|
|
3
4
|
export default class IndexManager {
|
|
4
5
|
constructor(opts, databaseMutex = null, database = null) {
|
|
@@ -13,21 +14,107 @@ export default class IndexManager {
|
|
|
13
14
|
// If no database mutex provided, create a local one (for backward compatibility)
|
|
14
15
|
this.mutex = databaseMutex || new Mutex()
|
|
15
16
|
|
|
16
|
-
|
|
17
|
-
|
|
17
|
+
this.indexedFields = []
|
|
18
|
+
this.setIndexesConfig(this.opts.indexes)
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
setTotalLines(total) {
|
|
22
|
+
this.totalLines = total
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Update indexes configuration and ensure internal structures stay in sync
|
|
27
|
+
* @param {Object|Array<string>} indexes
|
|
28
|
+
*/
|
|
29
|
+
setIndexesConfig(indexes) {
|
|
30
|
+
if (!indexes) {
|
|
31
|
+
this.opts.indexes = undefined
|
|
32
|
+
this.indexedFields = []
|
|
33
|
+
return
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
if (Array.isArray(indexes)) {
|
|
37
|
+
const fields = indexes.map(field => String(field))
|
|
38
|
+
this.indexedFields = fields
|
|
39
|
+
|
|
40
|
+
const normalizedConfig = {}
|
|
41
|
+
for (const field of fields) {
|
|
42
|
+
const existingConfig = (!Array.isArray(this.opts.indexes) && typeof this.opts.indexes === 'object') ? this.opts.indexes[field] : undefined
|
|
43
|
+
normalizedConfig[field] = existingConfig ?? 'auto'
|
|
44
|
+
if (!this.index.data[field]) {
|
|
45
|
+
this.index.data[field] = {}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
this.opts.indexes = normalizedConfig
|
|
49
|
+
return
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (typeof indexes === 'object') {
|
|
53
|
+
this.opts.indexes = Object.assign({}, indexes)
|
|
18
54
|
this.indexedFields = Object.keys(this.opts.indexes)
|
|
19
|
-
|
|
55
|
+
|
|
56
|
+
for (const field of this.indexedFields) {
|
|
20
57
|
if (!this.index.data[field]) {
|
|
21
58
|
this.index.data[field] = {}
|
|
22
59
|
}
|
|
23
|
-
}
|
|
24
|
-
}
|
|
25
|
-
this.indexedFields = []
|
|
26
|
-
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
27
62
|
}
|
|
28
63
|
|
|
29
|
-
|
|
30
|
-
|
|
64
|
+
/**
|
|
65
|
+
* Check if a field is configured as an index
|
|
66
|
+
* @param {string} field - Field name
|
|
67
|
+
* @returns {boolean}
|
|
68
|
+
*/
|
|
69
|
+
isFieldIndexed(field) {
|
|
70
|
+
if (!field) return false
|
|
71
|
+
if (!Array.isArray(this.indexedFields)) {
|
|
72
|
+
return false
|
|
73
|
+
}
|
|
74
|
+
return this.indexedFields.includes(field)
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Determine whether the index has usable data for a given field
|
|
79
|
+
* @param {string} field - Field name
|
|
80
|
+
* @returns {boolean}
|
|
81
|
+
*/
|
|
82
|
+
hasUsableIndexData(field) {
|
|
83
|
+
if (!field) return false
|
|
84
|
+
const fieldData = this.index?.data?.[field]
|
|
85
|
+
if (!fieldData || typeof fieldData !== 'object') {
|
|
86
|
+
return false
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
for (const key in fieldData) {
|
|
90
|
+
if (!Object.prototype.hasOwnProperty.call(fieldData, key)) continue
|
|
91
|
+
const entry = fieldData[key]
|
|
92
|
+
if (!entry) continue
|
|
93
|
+
|
|
94
|
+
if (entry.set && typeof entry.set.size === 'number' && entry.set.size > 0) {
|
|
95
|
+
return true
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (Array.isArray(entry.ranges) && entry.ranges.length > 0) {
|
|
99
|
+
const hasRangeData = entry.ranges.some(range => {
|
|
100
|
+
if (range === null || typeof range === 'undefined') {
|
|
101
|
+
return false
|
|
102
|
+
}
|
|
103
|
+
if (typeof range === 'object') {
|
|
104
|
+
const count = typeof range.count === 'number' ? range.count : 0
|
|
105
|
+
return count > 0
|
|
106
|
+
}
|
|
107
|
+
// When ranges are stored as individual numbers
|
|
108
|
+
return true
|
|
109
|
+
})
|
|
110
|
+
|
|
111
|
+
if (hasRangeData) {
|
|
112
|
+
return true
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return false
|
|
31
118
|
}
|
|
32
119
|
|
|
33
120
|
// Ultra-fast range conversion - only for very large datasets
|
|
@@ -338,10 +425,25 @@ export default class IndexManager {
|
|
|
338
425
|
|
|
339
426
|
// OPTIMIZATION: Generator-based approach for better memory efficiency
|
|
340
427
|
*_getAllLineNumbersGenerator(hybridData) {
|
|
428
|
+
const normalizeLineNumber = (value) => {
|
|
429
|
+
if (typeof value === 'number') {
|
|
430
|
+
return value
|
|
431
|
+
}
|
|
432
|
+
if (typeof value === 'string') {
|
|
433
|
+
const parsed = Number(value)
|
|
434
|
+
return Number.isNaN(parsed) ? value : parsed
|
|
435
|
+
}
|
|
436
|
+
if (typeof value === 'bigint') {
|
|
437
|
+
const maxSafe = BigInt(Number.MAX_SAFE_INTEGER)
|
|
438
|
+
return value <= maxSafe ? Number(value) : value
|
|
439
|
+
}
|
|
440
|
+
return value
|
|
441
|
+
}
|
|
442
|
+
|
|
341
443
|
// Yield from Set (fastest path)
|
|
342
444
|
if (hybridData.set) {
|
|
343
445
|
for (const num of hybridData.set) {
|
|
344
|
-
yield num
|
|
446
|
+
yield normalizeLineNumber(num)
|
|
345
447
|
}
|
|
346
448
|
}
|
|
347
449
|
|
|
@@ -352,11 +454,11 @@ export default class IndexManager {
|
|
|
352
454
|
// It's a range - use direct loop for better performance
|
|
353
455
|
const end = item.start + item.count
|
|
354
456
|
for (let i = item.start; i < end; i++) {
|
|
355
|
-
yield i
|
|
457
|
+
yield normalizeLineNumber(i)
|
|
356
458
|
}
|
|
357
459
|
} else {
|
|
358
460
|
// It's an individual number
|
|
359
|
-
yield item
|
|
461
|
+
yield normalizeLineNumber(item)
|
|
360
462
|
}
|
|
361
463
|
}
|
|
362
464
|
}
|
|
@@ -377,7 +479,29 @@ export default class IndexManager {
|
|
|
377
479
|
// OPTIMIZATION 6: Pre-allocate field structures for better performance
|
|
378
480
|
const fields = Object.keys(this.opts.indexes || {})
|
|
379
481
|
for (const field of fields) {
|
|
380
|
-
|
|
482
|
+
// PERFORMANCE: Check if this is a term mapping field once
|
|
483
|
+
const isTermMappingField = this.database?.termManager &&
|
|
484
|
+
this.database.termManager.termMappingFields &&
|
|
485
|
+
this.database.termManager.termMappingFields.includes(field)
|
|
486
|
+
|
|
487
|
+
// CRITICAL FIX: For term mapping fields, prefer ${field}Ids if available
|
|
488
|
+
// Records processed by processTermMapping have term IDs in ${field}Ids
|
|
489
|
+
// Records loaded from file have term IDs directly in ${field} (after restoreTermIdsAfterDeserialization)
|
|
490
|
+
let value
|
|
491
|
+
if (isTermMappingField) {
|
|
492
|
+
const termIdsField = `${field}Ids`
|
|
493
|
+
const termIds = row[termIdsField]
|
|
494
|
+
if (termIds && Array.isArray(termIds) && termIds.length > 0) {
|
|
495
|
+
// Use term IDs from ${field}Ids (preferred - from processTermMapping)
|
|
496
|
+
value = termIds
|
|
497
|
+
} else {
|
|
498
|
+
// Fallback: use field directly (for records loaded from file that have term IDs in field)
|
|
499
|
+
value = row[field]
|
|
500
|
+
}
|
|
501
|
+
} else {
|
|
502
|
+
value = row[field]
|
|
503
|
+
}
|
|
504
|
+
|
|
381
505
|
if (value !== undefined && value !== null) {
|
|
382
506
|
// OPTIMIZATION 6: Initialize field structure if it doesn't exist
|
|
383
507
|
if (!data[field]) {
|
|
@@ -388,30 +512,18 @@ export default class IndexManager {
|
|
|
388
512
|
for (const val of values) {
|
|
389
513
|
let key
|
|
390
514
|
|
|
391
|
-
// Check if this is a term mapping field (array:string fields only)
|
|
392
|
-
const isTermMappingField = this.database?.termManager &&
|
|
393
|
-
this.database.termManager.termMappingFields &&
|
|
394
|
-
this.database.termManager.termMappingFields.includes(field)
|
|
395
|
-
|
|
396
515
|
if (isTermMappingField && typeof val === 'number') {
|
|
397
|
-
// For term mapping fields
|
|
516
|
+
// For term mapping fields, values are already term IDs
|
|
398
517
|
key = String(val)
|
|
399
|
-
if (this.database.opts.debugMode) {
|
|
400
|
-
console.log(`🔍 IndexManager.add: Using term ID ${val} directly for field "${field}"`)
|
|
401
|
-
}
|
|
402
518
|
} else if (isTermMappingField && typeof val === 'string') {
|
|
403
|
-
//
|
|
404
|
-
|
|
519
|
+
// Fallback: convert string to term ID
|
|
520
|
+
// CRITICAL: During indexing (add), we should use getTermId() to create IDs if needed
|
|
521
|
+
// This is different from queries where we use getTermIdWithoutIncrement() to avoid creating new IDs
|
|
522
|
+
const termId = this.database.termManager.getTermId(val)
|
|
405
523
|
key = String(termId)
|
|
406
|
-
if (this.database.opts.debugMode) {
|
|
407
|
-
console.log(`🔍 IndexManager.add: Using term ID ${termId} for term "${val}"`)
|
|
408
|
-
}
|
|
409
524
|
} else {
|
|
410
525
|
// For non-term-mapping fields (including array:number), use values directly
|
|
411
526
|
key = String(val)
|
|
412
|
-
if (this.database?.opts?.debugMode) {
|
|
413
|
-
console.log(`🔍 IndexManager.add: Using value "${val}" directly for field "${field}"`)
|
|
414
|
-
}
|
|
415
527
|
}
|
|
416
528
|
|
|
417
529
|
// OPTIMIZATION 6: Use direct assignment for better performance
|
|
@@ -458,36 +570,46 @@ export default class IndexManager {
|
|
|
458
570
|
const lineNumber = startLineNumber + i
|
|
459
571
|
|
|
460
572
|
for (const field of fields) {
|
|
461
|
-
|
|
573
|
+
// PERFORMANCE: Check if this is a term mapping field once
|
|
574
|
+
const isTermMappingField = this.database?.termManager &&
|
|
575
|
+
this.database.termManager.termMappingFields &&
|
|
576
|
+
this.database.termManager.termMappingFields.includes(field)
|
|
577
|
+
|
|
578
|
+
// CRITICAL FIX: For term mapping fields, prefer ${field}Ids if available
|
|
579
|
+
// Records processed by processTermMapping have term IDs in ${field}Ids
|
|
580
|
+
// Records loaded from file have term IDs directly in ${field} (after restoreTermIdsAfterDeserialization)
|
|
581
|
+
let value
|
|
582
|
+
if (isTermMappingField) {
|
|
583
|
+
const termIdsField = `${field}Ids`
|
|
584
|
+
const termIds = row[termIdsField]
|
|
585
|
+
if (termIds && Array.isArray(termIds) && termIds.length > 0) {
|
|
586
|
+
// Use term IDs from ${field}Ids (preferred - from processTermMapping)
|
|
587
|
+
value = termIds
|
|
588
|
+
} else {
|
|
589
|
+
// Fallback: use field directly (for records loaded from file that have term IDs in field)
|
|
590
|
+
value = row[field]
|
|
591
|
+
}
|
|
592
|
+
} else {
|
|
593
|
+
value = row[field]
|
|
594
|
+
}
|
|
595
|
+
|
|
462
596
|
if (value !== undefined && value !== null) {
|
|
463
597
|
const values = Array.isArray(value) ? value : [value]
|
|
464
598
|
for (const val of values) {
|
|
465
599
|
let key
|
|
466
600
|
|
|
467
|
-
// Check if this is a term mapping field (array:string fields only)
|
|
468
|
-
const isTermMappingField = this.database?.termManager &&
|
|
469
|
-
this.database.termManager.termMappingFields &&
|
|
470
|
-
this.database.termManager.termMappingFields.includes(field)
|
|
471
|
-
|
|
472
601
|
if (isTermMappingField && typeof val === 'number') {
|
|
473
|
-
// For term mapping fields
|
|
602
|
+
// For term mapping fields, values are already term IDs
|
|
474
603
|
key = String(val)
|
|
475
|
-
if (this.database.opts.debugMode) {
|
|
476
|
-
console.log(`🔍 IndexManager.addBatch: Using term ID ${val} directly for field "${field}"`)
|
|
477
|
-
}
|
|
478
604
|
} else if (isTermMappingField && typeof val === 'string') {
|
|
479
|
-
//
|
|
480
|
-
|
|
605
|
+
// Fallback: convert string to term ID
|
|
606
|
+
// CRITICAL: During indexing (addBatch), we should use getTermId() to create IDs if needed
|
|
607
|
+
// This is different from queries where we use getTermIdWithoutIncrement() to avoid creating new IDs
|
|
608
|
+
const termId = this.database.termManager.getTermId(val)
|
|
481
609
|
key = String(termId)
|
|
482
|
-
if (this.database.opts.debugMode) {
|
|
483
|
-
console.log(`🔍 IndexManager.addBatch: Using term ID ${termId} for term "${val}"`)
|
|
484
|
-
}
|
|
485
610
|
} else {
|
|
486
611
|
// For non-term-mapping fields (including array:number), use values directly
|
|
487
612
|
key = String(val)
|
|
488
|
-
if (this.database?.opts?.debugMode) {
|
|
489
|
-
console.log(`🔍 IndexManager.addBatch: Using value "${val}" directly for field "${field}"`)
|
|
490
|
-
}
|
|
491
613
|
}
|
|
492
614
|
|
|
493
615
|
// OPTIMIZATION 6: Use Map for efficient batch updates
|
|
@@ -585,7 +707,7 @@ export default class IndexManager {
|
|
|
585
707
|
if (!oldRecord || !newRecord) return
|
|
586
708
|
|
|
587
709
|
// Remove old record by ID
|
|
588
|
-
await this.remove(
|
|
710
|
+
await this.remove(oldRecord)
|
|
589
711
|
|
|
590
712
|
// Add new record with provided line number or use hash of the ID
|
|
591
713
|
const actualLineNumber = lineNumber !== null ? lineNumber : this._getIdAsNumber(newRecord.id)
|
|
@@ -619,15 +741,56 @@ export default class IndexManager {
|
|
|
619
741
|
|
|
620
742
|
// If record is an object, remove by record data
|
|
621
743
|
if (typeof record === 'object' && record.id) {
|
|
622
|
-
return this._removeRecord(record)
|
|
744
|
+
return await this._removeRecord(record)
|
|
623
745
|
}
|
|
624
746
|
}
|
|
625
747
|
|
|
626
748
|
// Remove a specific record from the index
|
|
627
|
-
_removeRecord(record) {
|
|
749
|
+
async _removeRecord(record) {
|
|
628
750
|
if (!record) return
|
|
629
751
|
|
|
630
752
|
const data = this.index.data
|
|
753
|
+
const database = this.database
|
|
754
|
+
const persistedCount = Array.isArray(database?.offsets) ? database.offsets.length : 0
|
|
755
|
+
const lineMatchCache = new Map()
|
|
756
|
+
|
|
757
|
+
const doesLineNumberBelongToRecord = async (lineNumber) => {
|
|
758
|
+
if (lineMatchCache.has(lineNumber)) {
|
|
759
|
+
return lineMatchCache.get(lineNumber)
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
let belongs = false
|
|
763
|
+
|
|
764
|
+
try {
|
|
765
|
+
if (lineNumber >= persistedCount) {
|
|
766
|
+
const writeBufferIndex = lineNumber - persistedCount
|
|
767
|
+
const candidate = database?.writeBuffer?.[writeBufferIndex]
|
|
768
|
+
belongs = !!candidate && candidate.id === record.id
|
|
769
|
+
} else if (lineNumber >= 0) {
|
|
770
|
+
const range = database?.locate?.(lineNumber)
|
|
771
|
+
if (range && database.fileHandler && database.serializer) {
|
|
772
|
+
const [start, end] = range
|
|
773
|
+
const buffer = await database.fileHandler.readRange(start, end)
|
|
774
|
+
if (buffer && buffer.length > 0) {
|
|
775
|
+
let line = buffer.toString('utf8')
|
|
776
|
+
if (line) {
|
|
777
|
+
line = line.trim()
|
|
778
|
+
if (line.length > 0) {
|
|
779
|
+
const storedRecord = database.serializer.deserialize(line)
|
|
780
|
+
belongs = storedRecord && storedRecord.id === record.id
|
|
781
|
+
}
|
|
782
|
+
}
|
|
783
|
+
}
|
|
784
|
+
}
|
|
785
|
+
}
|
|
786
|
+
} catch (error) {
|
|
787
|
+
belongs = false
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
lineMatchCache.set(lineNumber, belongs)
|
|
791
|
+
return belongs
|
|
792
|
+
}
|
|
793
|
+
|
|
631
794
|
for (const field in data) {
|
|
632
795
|
if (record[field] !== undefined && record[field] !== null) {
|
|
633
796
|
const values = Array.isArray(record[field]) ? record[field] : [record[field]]
|
|
@@ -663,11 +826,16 @@ export default class IndexManager {
|
|
|
663
826
|
// Note: TermManager notification is handled by Database.mjs
|
|
664
827
|
// to avoid double decrementation during updates
|
|
665
828
|
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
const
|
|
670
|
-
|
|
829
|
+
const indexEntry = data[field][key]
|
|
830
|
+
if (indexEntry) {
|
|
831
|
+
const lineNumbers = this._getAllLineNumbers(indexEntry)
|
|
832
|
+
const filteredLineNumbers = []
|
|
833
|
+
|
|
834
|
+
for (const lineNumber of lineNumbers) {
|
|
835
|
+
if (!(await doesLineNumberBelongToRecord(lineNumber))) {
|
|
836
|
+
filteredLineNumbers.push(lineNumber)
|
|
837
|
+
}
|
|
838
|
+
}
|
|
671
839
|
|
|
672
840
|
if (filteredLineNumbers.length === 0) {
|
|
673
841
|
delete data[field][key]
|
|
@@ -842,7 +1010,8 @@ export default class IndexManager {
|
|
|
842
1010
|
|
|
843
1011
|
if (typeof data[field] === 'undefined') continue;
|
|
844
1012
|
|
|
845
|
-
const
|
|
1013
|
+
const originalCriteriaValue = criteria[field];
|
|
1014
|
+
const criteriaValue = normalizeCriteriaOperators(originalCriteriaValue, { target: 'legacy', preserveOriginal: true });
|
|
846
1015
|
let lineNumbersForField = new Set();
|
|
847
1016
|
const isNumericField = this.opts.indexes[field] === 'number';
|
|
848
1017
|
|
|
@@ -859,45 +1028,69 @@ export default class IndexManager {
|
|
|
859
1028
|
// Handle $in operator for array queries
|
|
860
1029
|
if (criteriaValue.$in !== undefined) {
|
|
861
1030
|
const inValues = Array.isArray(criteriaValue.$in) ? criteriaValue.$in : [criteriaValue.$in];
|
|
1031
|
+
|
|
1032
|
+
// PERFORMANCE: Cache term mapping field check once
|
|
1033
|
+
const isTermMappingField = this.database?.termManager &&
|
|
1034
|
+
this.database.termManager.termMappingFields &&
|
|
1035
|
+
this.database.termManager.termMappingFields.includes(field)
|
|
1036
|
+
|
|
1037
|
+
// PERFORMANCE: Track if any term was found and matched
|
|
1038
|
+
let foundAnyMatch = false
|
|
1039
|
+
|
|
862
1040
|
for (const inValue of inValues) {
|
|
863
1041
|
// SPACE OPTIMIZATION: Convert search term to term ID for lookup
|
|
864
1042
|
let searchTermId
|
|
865
1043
|
|
|
866
|
-
// Check if this is a term mapping field
|
|
867
|
-
const isTermMappingField = this.database?.termManager &&
|
|
868
|
-
this.database.termManager.termMappingFields &&
|
|
869
|
-
this.database.termManager.termMappingFields.includes(field)
|
|
870
|
-
|
|
871
1044
|
if (isTermMappingField && typeof inValue === 'number') {
|
|
872
1045
|
// For term mapping fields (array:string), the search value is already a term ID
|
|
873
1046
|
searchTermId = String(inValue)
|
|
874
1047
|
} else if (isTermMappingField && typeof inValue === 'string') {
|
|
875
1048
|
// For term mapping fields (array:string), convert string to term ID
|
|
876
|
-
|
|
1049
|
+
const termId = this.database?.termManager?.getTermIdWithoutIncrement(String(inValue))
|
|
1050
|
+
if (termId === undefined) {
|
|
1051
|
+
// Term not found in termManager - skip this search value
|
|
1052
|
+
// This means the term was never saved to the database
|
|
1053
|
+
if (this.opts?.debugMode) {
|
|
1054
|
+
console.log(`⚠️ Term "${inValue}" not found in termManager for field "${field}" - skipping`)
|
|
1055
|
+
}
|
|
1056
|
+
continue // Skip this value, no matches possible
|
|
1057
|
+
}
|
|
1058
|
+
searchTermId = String(termId)
|
|
877
1059
|
} else {
|
|
878
1060
|
// For non-term-mapping fields (including array:number), use values directly
|
|
879
1061
|
searchTermId = String(inValue)
|
|
880
1062
|
}
|
|
881
1063
|
|
|
882
|
-
//
|
|
1064
|
+
// PERFORMANCE: Direct lookup instead of iteration
|
|
1065
|
+
let matched = false
|
|
883
1066
|
if (caseInsensitive && typeof inValue === 'string') {
|
|
1067
|
+
const searchLower = searchTermId.toLowerCase()
|
|
884
1068
|
for (const value in fieldIndex) {
|
|
885
|
-
if (value.toLowerCase() ===
|
|
1069
|
+
if (value.toLowerCase() === searchLower) {
|
|
886
1070
|
const numbers = this._getAllLineNumbers(fieldIndex[value]);
|
|
887
1071
|
for (const lineNumber of numbers) {
|
|
888
1072
|
lineNumbersForField.add(lineNumber);
|
|
889
1073
|
}
|
|
1074
|
+
matched = true
|
|
1075
|
+
foundAnyMatch = true
|
|
890
1076
|
}
|
|
891
1077
|
}
|
|
892
1078
|
} else {
|
|
893
|
-
|
|
894
|
-
|
|
1079
|
+
const indexData = fieldIndex[searchTermId]
|
|
1080
|
+
if (indexData) {
|
|
1081
|
+
const numbers = this._getAllLineNumbers(indexData);
|
|
895
1082
|
for (const lineNumber of numbers) {
|
|
896
1083
|
lineNumbersForField.add(lineNumber);
|
|
897
1084
|
}
|
|
1085
|
+
matched = true
|
|
1086
|
+
foundAnyMatch = true
|
|
898
1087
|
}
|
|
899
1088
|
}
|
|
900
1089
|
}
|
|
1090
|
+
|
|
1091
|
+
// CRITICAL FIX: If no matches found at all (all terms were unknown or not in index),
|
|
1092
|
+
// lineNumbersForField remains empty which is correct (no results for $in)
|
|
1093
|
+
// This is handled correctly by the caller - empty Set means no matches
|
|
901
1094
|
}
|
|
902
1095
|
// Handle $nin operator (not in) - returns complement of $in
|
|
903
1096
|
else if (criteriaValue.$nin !== undefined) {
|
|
@@ -910,30 +1103,39 @@ export default class IndexManager {
|
|
|
910
1103
|
// Get line numbers that match any of the $nin values
|
|
911
1104
|
const matchingLines = new Set();
|
|
912
1105
|
|
|
1106
|
+
// PERFORMANCE: Cache term mapping field check once
|
|
1107
|
+
const isTermMappingField = this.database?.termManager &&
|
|
1108
|
+
this.database.termManager.termMappingFields &&
|
|
1109
|
+
this.database.termManager.termMappingFields.includes(field)
|
|
1110
|
+
|
|
913
1111
|
for (const ninValue of ninValues) {
|
|
914
1112
|
// SPACE OPTIMIZATION: Convert search term to term ID for lookup
|
|
915
1113
|
let searchTermId
|
|
916
1114
|
|
|
917
|
-
// Check if this is a term mapping field
|
|
918
|
-
const isTermMappingField = this.database?.termManager &&
|
|
919
|
-
this.database.termManager.termMappingFields &&
|
|
920
|
-
this.database.termManager.termMappingFields.includes(field)
|
|
921
|
-
|
|
922
1115
|
if (isTermMappingField && typeof ninValue === 'number') {
|
|
923
1116
|
// For term mapping fields (array:string), the search value is already a term ID
|
|
924
1117
|
searchTermId = String(ninValue)
|
|
925
1118
|
} else if (isTermMappingField && typeof ninValue === 'string') {
|
|
926
1119
|
// For term mapping fields (array:string), convert string to term ID
|
|
927
|
-
|
|
1120
|
+
const termId = this.database?.termManager?.getTermIdWithoutIncrement(String(ninValue))
|
|
1121
|
+
if (termId === undefined) {
|
|
1122
|
+
// Term not found - skip this value (can't exclude what doesn't exist)
|
|
1123
|
+
if (this.opts?.debugMode) {
|
|
1124
|
+
console.log(`⚠️ Term "${ninValue}" not found in termManager for field "${field}" - skipping`)
|
|
1125
|
+
}
|
|
1126
|
+
continue
|
|
1127
|
+
}
|
|
1128
|
+
searchTermId = String(termId)
|
|
928
1129
|
} else {
|
|
929
1130
|
// For non-term-mapping fields (including array:number), use values directly
|
|
930
1131
|
searchTermId = String(ninValue)
|
|
931
1132
|
}
|
|
932
1133
|
|
|
933
|
-
//
|
|
1134
|
+
// PERFORMANCE: Direct lookup instead of iteration
|
|
934
1135
|
if (caseInsensitive && typeof ninValue === 'string') {
|
|
1136
|
+
const searchLower = searchTermId.toLowerCase()
|
|
935
1137
|
for (const value in fieldIndex) {
|
|
936
|
-
if (value.toLowerCase() ===
|
|
1138
|
+
if (value.toLowerCase() === searchLower) {
|
|
937
1139
|
const numbers = this._getAllLineNumbers(fieldIndex[value]);
|
|
938
1140
|
for (const lineNumber of numbers) {
|
|
939
1141
|
matchingLines.add(lineNumber);
|
|
@@ -941,8 +1143,9 @@ export default class IndexManager {
|
|
|
941
1143
|
}
|
|
942
1144
|
}
|
|
943
1145
|
} else {
|
|
944
|
-
|
|
945
|
-
|
|
1146
|
+
const indexData = fieldIndex[searchTermId]
|
|
1147
|
+
if (indexData) {
|
|
1148
|
+
const numbers = this._getAllLineNumbers(indexData);
|
|
946
1149
|
for (const lineNumber of numbers) {
|
|
947
1150
|
matchingLines.add(lineNumber);
|
|
948
1151
|
}
|
|
@@ -978,9 +1181,40 @@ export default class IndexManager {
|
|
|
978
1181
|
// Handle $all operator for array queries - FIXED FOR TERM MAPPING
|
|
979
1182
|
else if (criteriaValue.$all !== undefined) {
|
|
980
1183
|
const allValues = Array.isArray(criteriaValue.$all) ? criteriaValue.$all : [criteriaValue.$all];
|
|
1184
|
+
|
|
1185
|
+
const isTermMappingField = this.database?.termManager &&
|
|
1186
|
+
this.database.termManager.termMappingFields &&
|
|
1187
|
+
this.database.termManager.termMappingFields.includes(field)
|
|
1188
|
+
|
|
1189
|
+
const normalizeValue = (value) => {
|
|
1190
|
+
if (isTermMappingField) {
|
|
1191
|
+
if (typeof value === 'number') {
|
|
1192
|
+
return String(value)
|
|
1193
|
+
}
|
|
1194
|
+
if (typeof value === 'string') {
|
|
1195
|
+
const termId = this.database?.termManager?.getTermIdWithoutIncrement(value)
|
|
1196
|
+
if (termId !== undefined) {
|
|
1197
|
+
return String(termId)
|
|
1198
|
+
}
|
|
1199
|
+
return null
|
|
1200
|
+
}
|
|
1201
|
+
return null
|
|
1202
|
+
}
|
|
1203
|
+
return String(value)
|
|
1204
|
+
}
|
|
1205
|
+
|
|
1206
|
+
const normalizedValues = []
|
|
1207
|
+
for (const value of allValues) {
|
|
1208
|
+
const normalized = normalizeValue(value)
|
|
1209
|
+
if (normalized === null) {
|
|
1210
|
+
// Term not found in term manager, no matches possible
|
|
1211
|
+
return lineNumbersForField
|
|
1212
|
+
}
|
|
1213
|
+
normalizedValues.push(normalized)
|
|
1214
|
+
}
|
|
981
1215
|
|
|
982
1216
|
// Early exit optimization
|
|
983
|
-
if (
|
|
1217
|
+
if (normalizedValues.length === 0) {
|
|
984
1218
|
// Empty $all matches everything
|
|
985
1219
|
for (const value in fieldIndex) {
|
|
986
1220
|
const numbers = this._getAllLineNumbers(fieldIndex[value]);
|
|
@@ -994,7 +1228,7 @@ export default class IndexManager {
|
|
|
994
1228
|
|
|
995
1229
|
// First, get all line numbers that contain each individual term
|
|
996
1230
|
const termLineNumbers = new Map();
|
|
997
|
-
for (const term of
|
|
1231
|
+
for (const term of normalizedValues) {
|
|
998
1232
|
if (fieldIndex[term]) {
|
|
999
1233
|
termLineNumbers.set(term, new Set(this._getAllLineNumbers(fieldIndex[term])));
|
|
1000
1234
|
} else {
|
|
@@ -1114,7 +1348,7 @@ export default class IndexManager {
|
|
|
1114
1348
|
// SPACE OPTIMIZATION: Convert search term to term ID for lookup
|
|
1115
1349
|
let searchTermId
|
|
1116
1350
|
|
|
1117
|
-
//
|
|
1351
|
+
// PERFORMANCE: Cache term mapping field check once per field
|
|
1118
1352
|
const isTermMappingField = this.database?.termManager &&
|
|
1119
1353
|
this.database.termManager.termMappingFields &&
|
|
1120
1354
|
this.database.termManager.termMappingFields.includes(field)
|
|
@@ -1124,7 +1358,15 @@ export default class IndexManager {
|
|
|
1124
1358
|
searchTermId = String(searchValue)
|
|
1125
1359
|
} else if (isTermMappingField && typeof searchValue === 'string') {
|
|
1126
1360
|
// For term mapping fields (array:string), convert string to term ID
|
|
1127
|
-
|
|
1361
|
+
const termId = this.database?.termManager?.getTermIdWithoutIncrement(String(searchValue))
|
|
1362
|
+
if (termId === undefined) {
|
|
1363
|
+
// Term not found - skip this value
|
|
1364
|
+
if (this.opts?.debugMode) {
|
|
1365
|
+
console.log(`⚠️ Term "${searchValue}" not found in termManager for field "${field}" - skipping`)
|
|
1366
|
+
}
|
|
1367
|
+
continue // Skip this value, no matches possible
|
|
1368
|
+
}
|
|
1369
|
+
searchTermId = String(termId)
|
|
1128
1370
|
} else {
|
|
1129
1371
|
// For non-term-mapping fields (including array:number), use values directly
|
|
1130
1372
|
searchTermId = String(searchValue)
|
|
@@ -1170,6 +1412,406 @@ export default class IndexManager {
|
|
|
1170
1412
|
}
|
|
1171
1413
|
return matchingLines || new Set();
|
|
1172
1414
|
}
|
|
1415
|
+
|
|
1416
|
+
/**
|
|
1417
|
+
* Check if any records exist for given field and terms (index-only, ultra-fast)
|
|
1418
|
+
* Stops at first match for maximum performance - no disk I/O required
|
|
1419
|
+
*
|
|
1420
|
+
* @param {string} fieldName - Indexed field name (e.g., 'nameTerms', 'groupTerms')
|
|
1421
|
+
* @param {string|Array<string>} terms - Single term or array of terms to check
|
|
1422
|
+
* @param {Object} options - Options: { $all: true/false, caseInsensitive: true/false, excludes: Array<string> }
|
|
1423
|
+
* @returns {boolean} - True if at least one match exists
|
|
1424
|
+
*
|
|
1425
|
+
* @example
|
|
1426
|
+
* // Check if any record has 'channel' in nameTerms
|
|
1427
|
+
* indexManager.exists('nameTerms', 'channel')
|
|
1428
|
+
*
|
|
1429
|
+
* @example
|
|
1430
|
+
* // Check if any record has ALL terms ['a', 'e'] in nameTerms ($all)
|
|
1431
|
+
* indexManager.exists('nameTerms', ['a', 'e'], { $all: true })
|
|
1432
|
+
*
|
|
1433
|
+
* @example
|
|
1434
|
+
* // Check if any record has ANY of the terms ['channel', 'tv'] in nameTerms
|
|
1435
|
+
* indexManager.exists('nameTerms', ['channel', 'tv'], { $all: false })
|
|
1436
|
+
*
|
|
1437
|
+
* @example
|
|
1438
|
+
* // Check if any record has 'tv' but NOT 'globo' in nameTerms
|
|
1439
|
+
* indexManager.exists('nameTerms', 'tv', { excludes: ['globo'] })
|
|
1440
|
+
*
|
|
1441
|
+
* @example
|
|
1442
|
+
* // Check if any record has ['tv', 'news'] but NOT 'sports' in nameTerms
|
|
1443
|
+
* indexManager.exists('nameTerms', ['tv', 'news'], { $all: true, excludes: ['sports'] })
|
|
1444
|
+
*/
|
|
1445
|
+
exists(fieldName, terms, options = {}) {
|
|
1446
|
+
// Early exit: validate fieldName
|
|
1447
|
+
if (!fieldName || typeof fieldName !== 'string') {
|
|
1448
|
+
return false;
|
|
1449
|
+
}
|
|
1450
|
+
|
|
1451
|
+
// Early exit: check if field is indexed
|
|
1452
|
+
if (!this.isFieldIndexed(fieldName)) {
|
|
1453
|
+
return false;
|
|
1454
|
+
}
|
|
1455
|
+
|
|
1456
|
+
const fieldIndex = this.index.data[fieldName];
|
|
1457
|
+
if (!fieldIndex || typeof fieldIndex !== 'object') {
|
|
1458
|
+
return false;
|
|
1459
|
+
}
|
|
1460
|
+
|
|
1461
|
+
// Normalize terms to array
|
|
1462
|
+
const termsArray = Array.isArray(terms) ? terms : [terms];
|
|
1463
|
+
if (termsArray.length === 0) {
|
|
1464
|
+
return false;
|
|
1465
|
+
}
|
|
1466
|
+
|
|
1467
|
+
const { $all = false, caseInsensitive = false, excludes = [] } = options;
|
|
1468
|
+
const hasExcludes = Array.isArray(excludes) && excludes.length > 0;
|
|
1469
|
+
const isTermMappingField = this.database?.termManager &&
|
|
1470
|
+
this.database.termManager.termMappingFields &&
|
|
1471
|
+
this.database.termManager.termMappingFields.includes(fieldName);
|
|
1472
|
+
|
|
1473
|
+
// Helper: check if termData has any line numbers (ULTRA LIGHT - no expansion)
|
|
1474
|
+
const hasData = (termData) => {
|
|
1475
|
+
if (!termData) return false;
|
|
1476
|
+
// Check Set size (O(1))
|
|
1477
|
+
if (termData.set && termData.set.size > 0) {
|
|
1478
|
+
return true;
|
|
1479
|
+
}
|
|
1480
|
+
// Check ranges length (O(1))
|
|
1481
|
+
if (termData.ranges && termData.ranges.length > 0) {
|
|
1482
|
+
return true;
|
|
1483
|
+
}
|
|
1484
|
+
return false;
|
|
1485
|
+
};
|
|
1486
|
+
|
|
1487
|
+
// Helper: get term key with term mapping and case-insensitive support
|
|
1488
|
+
const getTermKey = (term, useCaseInsensitive = false) => {
|
|
1489
|
+
if (isTermMappingField && typeof term === 'string') {
|
|
1490
|
+
let termId;
|
|
1491
|
+
if (useCaseInsensitive) {
|
|
1492
|
+
// For case-insensitive, search termManager for case-insensitive match
|
|
1493
|
+
const searchLower = String(term).toLowerCase();
|
|
1494
|
+
termId = null;
|
|
1495
|
+
if (this.database?.termManager?.termToId) {
|
|
1496
|
+
for (const [termStr, id] of this.database.termManager.termToId.entries()) {
|
|
1497
|
+
if (termStr.toLowerCase() === searchLower) {
|
|
1498
|
+
termId = id;
|
|
1499
|
+
break;
|
|
1500
|
+
}
|
|
1501
|
+
}
|
|
1502
|
+
}
|
|
1503
|
+
} else {
|
|
1504
|
+
termId = this.database?.termManager?.getTermIdWithoutIncrement(String(term));
|
|
1505
|
+
}
|
|
1506
|
+
|
|
1507
|
+
if (termId === undefined || termId === null) {
|
|
1508
|
+
return null;
|
|
1509
|
+
}
|
|
1510
|
+
return String(termId);
|
|
1511
|
+
}
|
|
1512
|
+
|
|
1513
|
+
// For non-term-mapping fields
|
|
1514
|
+
if (useCaseInsensitive && typeof term === 'string') {
|
|
1515
|
+
const searchLower = String(term).toLowerCase();
|
|
1516
|
+
for (const key in fieldIndex) {
|
|
1517
|
+
if (key.toLowerCase() === searchLower) {
|
|
1518
|
+
return key;
|
|
1519
|
+
}
|
|
1520
|
+
}
|
|
1521
|
+
return null;
|
|
1522
|
+
}
|
|
1523
|
+
|
|
1524
|
+
return String(term);
|
|
1525
|
+
};
|
|
1526
|
+
|
|
1527
|
+
// Handle $all (all terms must exist and have intersection)
|
|
1528
|
+
if ($all) {
|
|
1529
|
+
// Collect term data for all terms first (with early exit)
|
|
1530
|
+
const termDataArray = [];
|
|
1531
|
+
|
|
1532
|
+
for (const term of termsArray) {
|
|
1533
|
+
// Get term key (with term mapping if applicable)
|
|
1534
|
+
let termKey;
|
|
1535
|
+
if (isTermMappingField && typeof term === 'string') {
|
|
1536
|
+
let termId;
|
|
1537
|
+
if (caseInsensitive) {
|
|
1538
|
+
// For case-insensitive, search termManager for case-insensitive match
|
|
1539
|
+
const searchLower = String(term).toLowerCase();
|
|
1540
|
+
termId = null;
|
|
1541
|
+
for (const [termStr, id] of this.database.termManager.termToId.entries()) {
|
|
1542
|
+
if (termStr.toLowerCase() === searchLower) {
|
|
1543
|
+
termId = id;
|
|
1544
|
+
break;
|
|
1545
|
+
}
|
|
1546
|
+
}
|
|
1547
|
+
} else {
|
|
1548
|
+
termId = this.database?.termManager?.getTermIdWithoutIncrement(String(term));
|
|
1549
|
+
}
|
|
1550
|
+
|
|
1551
|
+
if (termId === undefined || termId === null) {
|
|
1552
|
+
return false; // Early exit: term doesn't exist in mapping
|
|
1553
|
+
}
|
|
1554
|
+
termKey = String(termId);
|
|
1555
|
+
} else {
|
|
1556
|
+
termKey = String(term);
|
|
1557
|
+
// For non-term-mapping fields with case-insensitive, search index keys
|
|
1558
|
+
if (caseInsensitive && typeof term === 'string') {
|
|
1559
|
+
const searchLower = termKey.toLowerCase();
|
|
1560
|
+
let foundKey = null;
|
|
1561
|
+
for (const key in fieldIndex) {
|
|
1562
|
+
if (key.toLowerCase() === searchLower) {
|
|
1563
|
+
foundKey = key;
|
|
1564
|
+
break;
|
|
1565
|
+
}
|
|
1566
|
+
}
|
|
1567
|
+
if (foundKey === null) {
|
|
1568
|
+
return false; // Early exit: term doesn't exist
|
|
1569
|
+
}
|
|
1570
|
+
termKey = foundKey;
|
|
1571
|
+
}
|
|
1572
|
+
}
|
|
1573
|
+
|
|
1574
|
+
// Check if term exists in index
|
|
1575
|
+
const termData = fieldIndex[termKey];
|
|
1576
|
+
if (!termData || !hasData(termData)) {
|
|
1577
|
+
return false; // Early exit: term doesn't exist or has no data
|
|
1578
|
+
}
|
|
1579
|
+
|
|
1580
|
+
termDataArray.push(termData);
|
|
1581
|
+
}
|
|
1582
|
+
|
|
1583
|
+
// If we got here, all terms exist and have data
|
|
1584
|
+
// Now check if there's intersection (only if more than one term)
|
|
1585
|
+
if (termDataArray.length === 1) {
|
|
1586
|
+
// Single term - check excludes if any
|
|
1587
|
+
if (!hasExcludes) {
|
|
1588
|
+
return true; // Single term, already verified it has data, no excludes
|
|
1589
|
+
}
|
|
1590
|
+
// Need to check excludes - expand line numbers
|
|
1591
|
+
const lineNumbers = this._getAllLineNumbers(termDataArray[0]);
|
|
1592
|
+
const candidateLines = new Set(lineNumbers);
|
|
1593
|
+
|
|
1594
|
+
// Remove lines that have exclude terms
|
|
1595
|
+
for (const excludeTerm of excludes) {
|
|
1596
|
+
const excludeKey = getTermKey(excludeTerm, caseInsensitive);
|
|
1597
|
+
if (excludeKey === null) continue;
|
|
1598
|
+
|
|
1599
|
+
const excludeData = fieldIndex[excludeKey];
|
|
1600
|
+
if (!excludeData) continue;
|
|
1601
|
+
|
|
1602
|
+
const excludeLines = this._getAllLineNumbers(excludeData);
|
|
1603
|
+
for (const line of excludeLines) {
|
|
1604
|
+
candidateLines.delete(line);
|
|
1605
|
+
}
|
|
1606
|
+
|
|
1607
|
+
// Early exit if all candidates excluded
|
|
1608
|
+
if (candidateLines.size === 0) {
|
|
1609
|
+
return false;
|
|
1610
|
+
}
|
|
1611
|
+
}
|
|
1612
|
+
|
|
1613
|
+
return candidateLines.size > 0;
|
|
1614
|
+
}
|
|
1615
|
+
|
|
1616
|
+
// For multiple terms, we need to check intersection
|
|
1617
|
+
// But we want to do this as lightly as possible
|
|
1618
|
+
// Get line numbers only for intersection check (unavoidable for $all)
|
|
1619
|
+
const termLineNumberSets = [];
|
|
1620
|
+
for (const termData of termDataArray) {
|
|
1621
|
+
const lineNumbers = this._getAllLineNumbers(termData);
|
|
1622
|
+
if (lineNumbers.length === 0) {
|
|
1623
|
+
return false; // Early exit: no line numbers (shouldn't happen, but safety check)
|
|
1624
|
+
}
|
|
1625
|
+
termLineNumberSets.push(new Set(lineNumbers));
|
|
1626
|
+
}
|
|
1627
|
+
|
|
1628
|
+
// Calculate intersection incrementally with early exit
|
|
1629
|
+
let intersection = termLineNumberSets[0];
|
|
1630
|
+
for (let i = 1; i < termLineNumberSets.length; i++) {
|
|
1631
|
+
// Filter intersection to only include items in current set
|
|
1632
|
+
intersection = new Set([...intersection].filter(x => termLineNumberSets[i].has(x)));
|
|
1633
|
+
if (intersection.size === 0) {
|
|
1634
|
+
return false; // Early exit: intersection is empty
|
|
1635
|
+
}
|
|
1636
|
+
}
|
|
1637
|
+
|
|
1638
|
+
// Apply excludes if any
|
|
1639
|
+
if (hasExcludes) {
|
|
1640
|
+
for (const excludeTerm of excludes) {
|
|
1641
|
+
const excludeKey = getTermKey(excludeTerm, caseInsensitive);
|
|
1642
|
+
if (excludeKey === null) continue;
|
|
1643
|
+
|
|
1644
|
+
const excludeData = fieldIndex[excludeKey];
|
|
1645
|
+
if (!excludeData) continue;
|
|
1646
|
+
|
|
1647
|
+
const excludeLines = this._getAllLineNumbers(excludeData);
|
|
1648
|
+
for (const line of excludeLines) {
|
|
1649
|
+
intersection.delete(line);
|
|
1650
|
+
}
|
|
1651
|
+
|
|
1652
|
+
// Early exit if all candidates excluded
|
|
1653
|
+
if (intersection.size === 0) {
|
|
1654
|
+
return false;
|
|
1655
|
+
}
|
|
1656
|
+
}
|
|
1657
|
+
}
|
|
1658
|
+
|
|
1659
|
+
return intersection.size > 0;
|
|
1660
|
+
}
|
|
1661
|
+
|
|
1662
|
+
// Handle $in behavior (any term exists) - default - ULTRA LIGHT
|
|
1663
|
+
// If no excludes, use ultra-fast path (no expansion needed)
|
|
1664
|
+
if (!hasExcludes) {
|
|
1665
|
+
for (const term of termsArray) {
|
|
1666
|
+
// Handle case-insensitive FIRST (before normal conversion)
|
|
1667
|
+
if (caseInsensitive && typeof term === 'string') {
|
|
1668
|
+
if (isTermMappingField && this.database?.termManager?.termToId) {
|
|
1669
|
+
// For term mapping fields, we need to find the term in termManager first
|
|
1670
|
+
// (case-insensitive), then convert to ID
|
|
1671
|
+
const searchLower = String(term).toLowerCase();
|
|
1672
|
+
let foundTermId = null;
|
|
1673
|
+
|
|
1674
|
+
// Search termManager for case-insensitive match
|
|
1675
|
+
for (const [termStr, termId] of this.database.termManager.termToId.entries()) {
|
|
1676
|
+
if (termStr.toLowerCase() === searchLower) {
|
|
1677
|
+
foundTermId = termId;
|
|
1678
|
+
break;
|
|
1679
|
+
}
|
|
1680
|
+
}
|
|
1681
|
+
|
|
1682
|
+
if (foundTermId !== null) {
|
|
1683
|
+
const termData = fieldIndex[String(foundTermId)];
|
|
1684
|
+
if (hasData(termData)) {
|
|
1685
|
+
return true; // Early exit: found a match
|
|
1686
|
+
}
|
|
1687
|
+
}
|
|
1688
|
+
// If not found, continue to next term
|
|
1689
|
+
continue;
|
|
1690
|
+
} else {
|
|
1691
|
+
// For non-term-mapping fields, search index keys directly
|
|
1692
|
+
const searchLower = String(term).toLowerCase();
|
|
1693
|
+
for (const key in fieldIndex) {
|
|
1694
|
+
if (key.toLowerCase() === searchLower) {
|
|
1695
|
+
const termData = fieldIndex[key];
|
|
1696
|
+
if (hasData(termData)) {
|
|
1697
|
+
return true; // Early exit: found a match
|
|
1698
|
+
}
|
|
1699
|
+
}
|
|
1700
|
+
}
|
|
1701
|
+
// If not found, continue to next term
|
|
1702
|
+
continue;
|
|
1703
|
+
}
|
|
1704
|
+
}
|
|
1705
|
+
|
|
1706
|
+
// Normal (case-sensitive) lookup
|
|
1707
|
+
const termKey = getTermKey(term, false);
|
|
1708
|
+
if (termKey === null) {
|
|
1709
|
+
continue; // Term not in mapping, try next
|
|
1710
|
+
}
|
|
1711
|
+
|
|
1712
|
+
// Direct lookup (fastest path) - O(1) hash lookup
|
|
1713
|
+
const termData = fieldIndex[termKey];
|
|
1714
|
+
if (hasData(termData)) {
|
|
1715
|
+
return true; // Early exit: found a match
|
|
1716
|
+
}
|
|
1717
|
+
}
|
|
1718
|
+
|
|
1719
|
+
return false;
|
|
1720
|
+
}
|
|
1721
|
+
|
|
1722
|
+
// With excludes, we need to collect candidates and filter
|
|
1723
|
+
const candidateLines = new Set();
|
|
1724
|
+
|
|
1725
|
+
for (const term of termsArray) {
|
|
1726
|
+
// Handle case-insensitive FIRST (before normal conversion)
|
|
1727
|
+
if (caseInsensitive && typeof term === 'string') {
|
|
1728
|
+
if (isTermMappingField && this.database?.termManager?.termToId) {
|
|
1729
|
+
// For term mapping fields, we need to find the term in termManager first
|
|
1730
|
+
// (case-insensitive), then convert to ID
|
|
1731
|
+
const searchLower = String(term).toLowerCase();
|
|
1732
|
+
let foundTermId = null;
|
|
1733
|
+
|
|
1734
|
+
// Search termManager for case-insensitive match
|
|
1735
|
+
for (const [termStr, termId] of this.database.termManager.termToId.entries()) {
|
|
1736
|
+
if (termStr.toLowerCase() === searchLower) {
|
|
1737
|
+
foundTermId = termId;
|
|
1738
|
+
break;
|
|
1739
|
+
}
|
|
1740
|
+
}
|
|
1741
|
+
|
|
1742
|
+
if (foundTermId !== null) {
|
|
1743
|
+
const termData = fieldIndex[String(foundTermId)];
|
|
1744
|
+
if (hasData(termData)) {
|
|
1745
|
+
// Add line numbers to candidates (need to expand for excludes check)
|
|
1746
|
+
const lineNumbers = this._getAllLineNumbers(termData);
|
|
1747
|
+
for (const line of lineNumbers) {
|
|
1748
|
+
candidateLines.add(line);
|
|
1749
|
+
}
|
|
1750
|
+
}
|
|
1751
|
+
}
|
|
1752
|
+
continue;
|
|
1753
|
+
} else {
|
|
1754
|
+
// For non-term-mapping fields, search index keys directly
|
|
1755
|
+
const searchLower = String(term).toLowerCase();
|
|
1756
|
+
for (const key in fieldIndex) {
|
|
1757
|
+
if (key.toLowerCase() === searchLower) {
|
|
1758
|
+
const termData = fieldIndex[key];
|
|
1759
|
+
if (hasData(termData)) {
|
|
1760
|
+
// Add line numbers to candidates
|
|
1761
|
+
const lineNumbers = this._getAllLineNumbers(termData);
|
|
1762
|
+
for (const line of lineNumbers) {
|
|
1763
|
+
candidateLines.add(line);
|
|
1764
|
+
}
|
|
1765
|
+
}
|
|
1766
|
+
}
|
|
1767
|
+
}
|
|
1768
|
+
continue;
|
|
1769
|
+
}
|
|
1770
|
+
}
|
|
1771
|
+
|
|
1772
|
+
// Normal (case-sensitive) lookup
|
|
1773
|
+
const termKey = getTermKey(term, false);
|
|
1774
|
+
if (termKey === null) {
|
|
1775
|
+
continue; // Term not in mapping, try next
|
|
1776
|
+
}
|
|
1777
|
+
|
|
1778
|
+
// Direct lookup
|
|
1779
|
+
const termData = fieldIndex[termKey];
|
|
1780
|
+
if (hasData(termData)) {
|
|
1781
|
+
// Add line numbers to candidates (need to expand for excludes check)
|
|
1782
|
+
const lineNumbers = this._getAllLineNumbers(termData);
|
|
1783
|
+
for (const line of lineNumbers) {
|
|
1784
|
+
candidateLines.add(line);
|
|
1785
|
+
}
|
|
1786
|
+
}
|
|
1787
|
+
}
|
|
1788
|
+
|
|
1789
|
+
// If no candidates found, return false
|
|
1790
|
+
if (candidateLines.size === 0) {
|
|
1791
|
+
return false;
|
|
1792
|
+
}
|
|
1793
|
+
|
|
1794
|
+
// Apply excludes
|
|
1795
|
+
for (const excludeTerm of excludes) {
|
|
1796
|
+
const excludeKey = getTermKey(excludeTerm, caseInsensitive);
|
|
1797
|
+
if (excludeKey === null) continue;
|
|
1798
|
+
|
|
1799
|
+
const excludeData = fieldIndex[excludeKey];
|
|
1800
|
+
if (!excludeData) continue;
|
|
1801
|
+
|
|
1802
|
+
const excludeLines = this._getAllLineNumbers(excludeData);
|
|
1803
|
+
for (const line of excludeLines) {
|
|
1804
|
+
candidateLines.delete(line);
|
|
1805
|
+
}
|
|
1806
|
+
|
|
1807
|
+
// Early exit if all candidates excluded
|
|
1808
|
+
if (candidateLines.size === 0) {
|
|
1809
|
+
return false;
|
|
1810
|
+
}
|
|
1811
|
+
}
|
|
1812
|
+
|
|
1813
|
+
return candidateLines.size > 0;
|
|
1814
|
+
}
|
|
1173
1815
|
|
|
1174
1816
|
// Ultra-fast load with minimal conversions
|
|
1175
1817
|
load(index) {
|
|
@@ -1238,6 +1880,11 @@ export default class IndexManager {
|
|
|
1238
1880
|
|
|
1239
1881
|
processedIndex.data[field] = {}
|
|
1240
1882
|
|
|
1883
|
+
// CRITICAL FIX: Check if this is a term mapping field for conversion
|
|
1884
|
+
const isTermMappingField = this.database?.termManager &&
|
|
1885
|
+
this.database.termManager.termMappingFields &&
|
|
1886
|
+
this.database.termManager.termMappingFields.includes(field)
|
|
1887
|
+
|
|
1241
1888
|
const terms = Object.keys(fieldData)
|
|
1242
1889
|
for(const term of terms) {
|
|
1243
1890
|
if (!term || typeof term !== 'string') {
|
|
@@ -1246,6 +1893,24 @@ export default class IndexManager {
|
|
|
1246
1893
|
|
|
1247
1894
|
const termData = fieldData[term]
|
|
1248
1895
|
|
|
1896
|
+
// CRITICAL FIX: Convert term strings to term IDs for term mapping fields
|
|
1897
|
+
// If the key is a string term (not a numeric ID), convert it to term ID
|
|
1898
|
+
let termKey = term
|
|
1899
|
+
if (isTermMappingField && typeof term === 'string' && !/^\d+$/.test(term)) {
|
|
1900
|
+
// Key is a term string, convert to term ID
|
|
1901
|
+
const termId = this.database?.termManager?.getTermIdWithoutIncrement(term)
|
|
1902
|
+
if (termId !== undefined) {
|
|
1903
|
+
termKey = String(termId)
|
|
1904
|
+
} else {
|
|
1905
|
+
// Term not found in termManager - skip this key (orphaned term from old index)
|
|
1906
|
+
// This can happen if termMapping wasn't loaded yet or term was removed
|
|
1907
|
+
if (this.opts?.debugMode) {
|
|
1908
|
+
console.log(`⚠️ IndexManager.load: Term "${term}" not found in termManager for field "${field}" - skipping (orphaned from old index)`)
|
|
1909
|
+
}
|
|
1910
|
+
continue
|
|
1911
|
+
}
|
|
1912
|
+
}
|
|
1913
|
+
|
|
1249
1914
|
// Convert various formats to new hybrid format
|
|
1250
1915
|
if (Array.isArray(termData)) {
|
|
1251
1916
|
// Check if it's the new compact format [setArray, rangesArray]
|
|
@@ -1261,13 +1926,13 @@ export default class IndexManager {
|
|
|
1261
1926
|
return range
|
|
1262
1927
|
}
|
|
1263
1928
|
})
|
|
1264
|
-
processedIndex.data[field][
|
|
1929
|
+
processedIndex.data[field][termKey] = {
|
|
1265
1930
|
set: new Set(termData[0]),
|
|
1266
1931
|
ranges: ranges
|
|
1267
1932
|
}
|
|
1268
1933
|
} else {
|
|
1269
1934
|
// Legacy array format (just set data)
|
|
1270
|
-
processedIndex.data[field][
|
|
1935
|
+
processedIndex.data[field][termKey] = { set: new Set(termData), ranges: [] }
|
|
1271
1936
|
}
|
|
1272
1937
|
} else if (termData && typeof termData === 'object') {
|
|
1273
1938
|
if (termData.set || termData.ranges) {
|
|
@@ -1281,14 +1946,14 @@ export default class IndexManager {
|
|
|
1281
1946
|
// Fallback to empty Set
|
|
1282
1947
|
setObject = new Set()
|
|
1283
1948
|
}
|
|
1284
|
-
processedIndex.data[field][
|
|
1949
|
+
processedIndex.data[field][termKey] = {
|
|
1285
1950
|
set: setObject,
|
|
1286
1951
|
ranges: hybridData.ranges || []
|
|
1287
1952
|
}
|
|
1288
1953
|
} else {
|
|
1289
1954
|
// Convert from Set format to hybrid
|
|
1290
1955
|
const numbers = Array.from(termData || [])
|
|
1291
|
-
processedIndex.data[field][
|
|
1956
|
+
processedIndex.data[field][termKey] = { set: new Set(numbers), ranges: [] }
|
|
1292
1957
|
}
|
|
1293
1958
|
}
|
|
1294
1959
|
}
|
|
@@ -1345,12 +2010,38 @@ export default class IndexManager {
|
|
|
1345
2010
|
toJSON() {
|
|
1346
2011
|
const serializable = { data: {} }
|
|
1347
2012
|
|
|
2013
|
+
// Check if this is a term mapping field for conversion
|
|
2014
|
+
const isTermMappingField = (field) => {
|
|
2015
|
+
return this.database?.termManager &&
|
|
2016
|
+
this.database.termManager.termMappingFields &&
|
|
2017
|
+
this.database.termManager.termMappingFields.includes(field)
|
|
2018
|
+
}
|
|
2019
|
+
|
|
1348
2020
|
for (const field in this.index.data) {
|
|
1349
2021
|
serializable.data[field] = {}
|
|
2022
|
+
const isTermField = isTermMappingField(field)
|
|
1350
2023
|
|
|
1351
2024
|
for (const term in this.index.data[field]) {
|
|
1352
2025
|
const hybridData = this.index.data[field][term]
|
|
1353
2026
|
|
|
2027
|
+
// CRITICAL FIX: Convert term strings to term IDs for term mapping fields
|
|
2028
|
+
// If the key is a string term (not a numeric ID), convert it to term ID
|
|
2029
|
+
let termKey = term
|
|
2030
|
+
if (isTermField && typeof term === 'string' && !/^\d+$/.test(term)) {
|
|
2031
|
+
// Key is a term string, convert to term ID
|
|
2032
|
+
const termId = this.database?.termManager?.getTermIdWithoutIncrement(term)
|
|
2033
|
+
if (termId !== undefined) {
|
|
2034
|
+
termKey = String(termId)
|
|
2035
|
+
} else {
|
|
2036
|
+
// Term not found in termManager, keep original key
|
|
2037
|
+
// This prevents data loss when term mapping is incomplete
|
|
2038
|
+
termKey = term
|
|
2039
|
+
if (this.opts?.debugMode) {
|
|
2040
|
+
console.log(`⚠️ IndexManager.toJSON: Term "${term}" not found in termManager for field "${field}" - using original key`)
|
|
2041
|
+
}
|
|
2042
|
+
}
|
|
2043
|
+
}
|
|
2044
|
+
|
|
1354
2045
|
// OPTIMIZATION: Create ranges before serialization if beneficial
|
|
1355
2046
|
if (hybridData.set && hybridData.set.size >= this.rangeThreshold) {
|
|
1356
2047
|
this._optimizeToRanges(hybridData)
|
|
@@ -1370,11 +2061,11 @@ export default class IndexManager {
|
|
|
1370
2061
|
if (ranges.length > 0) {
|
|
1371
2062
|
// Convert ranges to ultra-compact format: [start, count] instead of {start, count}
|
|
1372
2063
|
const compactRanges = ranges.map(range => [range.start, range.count])
|
|
1373
|
-
serializable.data[field][
|
|
2064
|
+
serializable.data[field][termKey] = [setArray, compactRanges]
|
|
1374
2065
|
} else {
|
|
1375
2066
|
// CRITICAL FIX: Always use the [setArray, []] format for consistency
|
|
1376
2067
|
// This ensures the load() method can properly deserialize the data
|
|
1377
|
-
serializable.data[field][
|
|
2068
|
+
serializable.data[field][termKey] = [setArray, []]
|
|
1378
2069
|
}
|
|
1379
2070
|
}
|
|
1380
2071
|
}
|