jexidb 2.0.3 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.babelrc +13 -0
- package/.gitattributes +2 -0
- package/CHANGELOG.md +132 -101
- package/LICENSE +21 -21
- package/README.md +301 -639
- package/babel.config.json +5 -0
- package/dist/Database.cjs +5204 -0
- package/docs/API.md +908 -241
- package/docs/EXAMPLES.md +701 -177
- package/docs/README.md +194 -184
- package/examples/iterate-usage-example.js +157 -0
- package/examples/simple-iterate-example.js +115 -0
- package/jest.config.js +24 -0
- package/package.json +63 -54
- package/scripts/README.md +47 -0
- package/scripts/benchmark-array-serialization.js +108 -0
- package/scripts/clean-test-files.js +75 -0
- package/scripts/prepare.js +31 -0
- package/scripts/run-tests.js +80 -0
- package/scripts/score-mode-demo.js +45 -0
- package/src/Database.mjs +5325 -0
- package/src/FileHandler.mjs +1140 -0
- package/src/OperationQueue.mjs +279 -0
- package/src/SchemaManager.mjs +268 -0
- package/src/Serializer.mjs +702 -0
- package/src/managers/ConcurrencyManager.mjs +257 -0
- package/src/managers/IndexManager.mjs +2094 -0
- package/src/managers/QueryManager.mjs +1490 -0
- package/src/managers/StatisticsManager.mjs +262 -0
- package/src/managers/StreamingProcessor.mjs +429 -0
- package/src/managers/TermManager.mjs +278 -0
- package/src/utils/operatorNormalizer.mjs +116 -0
- package/test/$not-operator-with-and.test.js +282 -0
- package/test/README.md +8 -0
- package/test/close-init-cycle.test.js +256 -0
- package/test/coverage-method.test.js +93 -0
- package/test/critical-bugs-fixes.test.js +1069 -0
- package/test/deserialize-corruption-fixes.test.js +296 -0
- package/test/exists-method.test.js +318 -0
- package/test/explicit-indexes-comparison.test.js +219 -0
- package/test/filehandler-non-adjacent-ranges-bug.test.js +175 -0
- package/test/index-line-number-regression.test.js +100 -0
- package/test/index-missing-index-data.test.js +91 -0
- package/test/index-persistence.test.js +491 -0
- package/test/index-serialization.test.js +314 -0
- package/test/indexed-query-mode.test.js +360 -0
- package/test/insert-session-auto-flush.test.js +353 -0
- package/test/iterate-method.test.js +272 -0
- package/test/legacy-operator-compat.test.js +154 -0
- package/test/query-operators.test.js +238 -0
- package/test/regex-array-fields.test.js +129 -0
- package/test/score-method.test.js +298 -0
- package/test/setup.js +17 -0
- package/test/term-mapping-minimal.test.js +154 -0
- package/test/term-mapping-simple.test.js +257 -0
- package/test/term-mapping.test.js +514 -0
- package/test/writebuffer-flush-resilience.test.js +204 -0
- package/dist/FileHandler.js +0 -688
- package/dist/IndexManager.js +0 -353
- package/dist/IntegrityChecker.js +0 -364
- package/dist/JSONLDatabase.js +0 -1333
- package/dist/index.js +0 -617
- package/docs/MIGRATION.md +0 -295
- package/examples/auto-save-example.js +0 -158
- package/examples/cjs-usage.cjs +0 -82
- package/examples/close-vs-delete-example.js +0 -71
- package/examples/esm-usage.js +0 -113
- package/examples/example-columns.idx.jdb +0 -0
- package/examples/example-columns.jdb +0 -9
- package/examples/example-options.idx.jdb +0 -0
- package/examples/example-options.jdb +0 -0
- package/examples/example-users.idx.jdb +0 -0
- package/examples/example-users.jdb +0 -5
- package/examples/simple-test.js +0 -55
- package/src/FileHandler.js +0 -674
- package/src/IndexManager.js +0 -363
- package/src/IntegrityChecker.js +0 -379
- package/src/JSONLDatabase.js +0 -1391
- package/src/index.js +0 -608
|
@@ -0,0 +1,2094 @@
|
|
|
1
|
+
import { Mutex } from 'async-mutex'
|
|
2
|
+
import { normalizeCriteriaOperators } from '../utils/operatorNormalizer.mjs'
|
|
3
|
+
|
|
4
|
+
export default class IndexManager {
|
|
5
|
+
constructor(opts, databaseMutex = null, database = null) {
|
|
6
|
+
this.opts = Object.assign({}, opts)
|
|
7
|
+
this.index = Object.assign({data: {}}, this.opts.index)
|
|
8
|
+
this.totalLines = 0
|
|
9
|
+
this.rangeThreshold = 10 // Sensible threshold: 10+ consecutive numbers justify ranges
|
|
10
|
+
this.binarySearchThreshold = 32 // Much higher for better performance
|
|
11
|
+
this.database = database // Reference to database for term manager access
|
|
12
|
+
|
|
13
|
+
// CRITICAL: Use database mutex to prevent deadlocks
|
|
14
|
+
// If no database mutex provided, create a local one (for backward compatibility)
|
|
15
|
+
this.mutex = databaseMutex || new Mutex()
|
|
16
|
+
|
|
17
|
+
this.indexedFields = []
|
|
18
|
+
this.setIndexesConfig(this.opts.indexes)
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
setTotalLines(total) {
|
|
22
|
+
this.totalLines = total
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Update indexes configuration and ensure internal structures stay in sync
|
|
27
|
+
* @param {Object|Array<string>} indexes
|
|
28
|
+
*/
|
|
29
|
+
setIndexesConfig(indexes) {
|
|
30
|
+
if (!indexes) {
|
|
31
|
+
this.opts.indexes = undefined
|
|
32
|
+
this.indexedFields = []
|
|
33
|
+
return
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
if (Array.isArray(indexes)) {
|
|
37
|
+
const fields = indexes.map(field => String(field))
|
|
38
|
+
this.indexedFields = fields
|
|
39
|
+
|
|
40
|
+
const normalizedConfig = {}
|
|
41
|
+
for (const field of fields) {
|
|
42
|
+
const existingConfig = (!Array.isArray(this.opts.indexes) && typeof this.opts.indexes === 'object') ? this.opts.indexes[field] : undefined
|
|
43
|
+
normalizedConfig[field] = existingConfig ?? 'auto'
|
|
44
|
+
if (!this.index.data[field]) {
|
|
45
|
+
this.index.data[field] = {}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
this.opts.indexes = normalizedConfig
|
|
49
|
+
return
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (typeof indexes === 'object') {
|
|
53
|
+
this.opts.indexes = Object.assign({}, indexes)
|
|
54
|
+
this.indexedFields = Object.keys(this.opts.indexes)
|
|
55
|
+
|
|
56
|
+
for (const field of this.indexedFields) {
|
|
57
|
+
if (!this.index.data[field]) {
|
|
58
|
+
this.index.data[field] = {}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Check if a field is configured as an index
|
|
66
|
+
* @param {string} field - Field name
|
|
67
|
+
* @returns {boolean}
|
|
68
|
+
*/
|
|
69
|
+
isFieldIndexed(field) {
|
|
70
|
+
if (!field) return false
|
|
71
|
+
if (!Array.isArray(this.indexedFields)) {
|
|
72
|
+
return false
|
|
73
|
+
}
|
|
74
|
+
return this.indexedFields.includes(field)
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Determine whether the index has usable data for a given field
|
|
79
|
+
* @param {string} field - Field name
|
|
80
|
+
* @returns {boolean}
|
|
81
|
+
*/
|
|
82
|
+
hasUsableIndexData(field) {
|
|
83
|
+
if (!field) return false
|
|
84
|
+
const fieldData = this.index?.data?.[field]
|
|
85
|
+
if (!fieldData || typeof fieldData !== 'object') {
|
|
86
|
+
return false
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
for (const key in fieldData) {
|
|
90
|
+
if (!Object.prototype.hasOwnProperty.call(fieldData, key)) continue
|
|
91
|
+
const entry = fieldData[key]
|
|
92
|
+
if (!entry) continue
|
|
93
|
+
|
|
94
|
+
if (entry.set && typeof entry.set.size === 'number' && entry.set.size > 0) {
|
|
95
|
+
return true
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (Array.isArray(entry.ranges) && entry.ranges.length > 0) {
|
|
99
|
+
const hasRangeData = entry.ranges.some(range => {
|
|
100
|
+
if (range === null || typeof range === 'undefined') {
|
|
101
|
+
return false
|
|
102
|
+
}
|
|
103
|
+
if (typeof range === 'object') {
|
|
104
|
+
const count = typeof range.count === 'number' ? range.count : 0
|
|
105
|
+
return count > 0
|
|
106
|
+
}
|
|
107
|
+
// When ranges are stored as individual numbers
|
|
108
|
+
return true
|
|
109
|
+
})
|
|
110
|
+
|
|
111
|
+
if (hasRangeData) {
|
|
112
|
+
return true
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return false
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Ultra-fast range conversion - only for very large datasets
|
|
121
|
+
_toRanges(numbers) {
|
|
122
|
+
if (numbers.length === 0) return []
|
|
123
|
+
if (numbers.length < this.rangeThreshold) return numbers // Keep as-is for small arrays
|
|
124
|
+
|
|
125
|
+
const sorted = numbers.sort((a, b) => a - b) // Sort in-place
|
|
126
|
+
const ranges = []
|
|
127
|
+
let start = sorted[0]
|
|
128
|
+
let count = 1
|
|
129
|
+
|
|
130
|
+
for (let i = 1; i < sorted.length; i++) {
|
|
131
|
+
if (sorted[i] === sorted[i-1] + 1) {
|
|
132
|
+
count++
|
|
133
|
+
} else {
|
|
134
|
+
// End of consecutive sequence
|
|
135
|
+
if (count >= this.rangeThreshold) {
|
|
136
|
+
ranges.push({start, count})
|
|
137
|
+
} else {
|
|
138
|
+
// Add individual numbers for small sequences
|
|
139
|
+
for (let j = start; j < start + count; j++) {
|
|
140
|
+
ranges.push(j)
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
start = sorted[i]
|
|
144
|
+
count = 1
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Handle last sequence
|
|
149
|
+
if (count >= this.rangeThreshold) {
|
|
150
|
+
ranges.push({start, count})
|
|
151
|
+
} else {
|
|
152
|
+
for (let j = start; j < start + count; j++) {
|
|
153
|
+
ranges.push(j)
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
return ranges
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Ultra-fast range expansion
|
|
161
|
+
_fromRanges(ranges) {
|
|
162
|
+
if (!ranges || ranges.length === 0) return []
|
|
163
|
+
|
|
164
|
+
const numbers = []
|
|
165
|
+
for (const item of ranges) {
|
|
166
|
+
if (typeof item === 'object' && item.start !== undefined) {
|
|
167
|
+
// It's a range - use direct loop for maximum speed
|
|
168
|
+
const end = item.start + item.count
|
|
169
|
+
for (let i = item.start; i < end; i++) {
|
|
170
|
+
numbers.push(i)
|
|
171
|
+
}
|
|
172
|
+
} else {
|
|
173
|
+
// It's an individual number
|
|
174
|
+
numbers.push(item)
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return numbers
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// Ultra-fast lookup - optimized for Set operations
|
|
181
|
+
_hasLineNumber(hybridData, lineNumber) {
|
|
182
|
+
if (!hybridData) return false
|
|
183
|
+
|
|
184
|
+
// Check in Set first (O(1)) - most common case
|
|
185
|
+
if (hybridData.set && hybridData.set.has(lineNumber)) {
|
|
186
|
+
return true
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// Check in ranges only if necessary
|
|
190
|
+
if (hybridData.ranges && hybridData.ranges.length > 0) {
|
|
191
|
+
return this._searchInRanges(hybridData.ranges, lineNumber)
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
return false
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// Optimized search strategy
|
|
198
|
+
_searchInRanges(ranges, lineNumber) {
|
|
199
|
+
if (ranges.length < this.binarySearchThreshold) {
|
|
200
|
+
// Linear search for small ranges
|
|
201
|
+
return this._linearSearchRanges(ranges, lineNumber)
|
|
202
|
+
} else {
|
|
203
|
+
// Binary search for large ranges
|
|
204
|
+
return this._binarySearchRanges(ranges, lineNumber)
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Ultra-fast linear search
|
|
209
|
+
_linearSearchRanges(ranges, lineNumber) {
|
|
210
|
+
for (const item of ranges) {
|
|
211
|
+
if (typeof item === 'object' && item.start !== undefined) {
|
|
212
|
+
// It's a range
|
|
213
|
+
if (lineNumber >= item.start && lineNumber < item.start + item.count) {
|
|
214
|
+
return true
|
|
215
|
+
}
|
|
216
|
+
} else if (item === lineNumber) {
|
|
217
|
+
// It's an individual number
|
|
218
|
+
return true
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
return false
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// Optimized binary search
|
|
225
|
+
_binarySearchRanges(ranges, lineNumber) {
|
|
226
|
+
let left = 0
|
|
227
|
+
let right = ranges.length - 1
|
|
228
|
+
|
|
229
|
+
while (left <= right) {
|
|
230
|
+
const mid = Math.floor((left + right) / 2)
|
|
231
|
+
const range = ranges[mid]
|
|
232
|
+
|
|
233
|
+
if (typeof range === 'object' && range.start !== undefined) {
|
|
234
|
+
// It's a range
|
|
235
|
+
if (lineNumber >= range.start && lineNumber < range.start + range.count) {
|
|
236
|
+
return true
|
|
237
|
+
} else if (lineNumber < range.start) {
|
|
238
|
+
right = mid - 1
|
|
239
|
+
} else {
|
|
240
|
+
left = mid + 1
|
|
241
|
+
}
|
|
242
|
+
} else {
|
|
243
|
+
// It's an individual number
|
|
244
|
+
if (range === lineNumber) {
|
|
245
|
+
return true
|
|
246
|
+
} else if (range < lineNumber) {
|
|
247
|
+
left = mid + 1
|
|
248
|
+
} else {
|
|
249
|
+
right = mid - 1
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
return false
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// Ultra-fast add operation - minimal overhead
|
|
258
|
+
_addLineNumber(hybridData, lineNumber) {
|
|
259
|
+
// Initialize structure if needed
|
|
260
|
+
if (!hybridData) {
|
|
261
|
+
hybridData = { set: new Set(), ranges: [] }
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// Add to Set directly (fastest path)
|
|
265
|
+
if (!hybridData.set) {
|
|
266
|
+
hybridData.set = new Set()
|
|
267
|
+
}
|
|
268
|
+
hybridData.set.add(lineNumber)
|
|
269
|
+
|
|
270
|
+
// Optimize to ranges when Set gets reasonably large
|
|
271
|
+
if (hybridData.set.size >= this.rangeThreshold * 2) { // 20 elements
|
|
272
|
+
if (this.opts.debugMode) {
|
|
273
|
+
console.log(`🔧 Triggering range optimization: Set size ${hybridData.set.size} >= threshold ${this.rangeThreshold * 2}`)
|
|
274
|
+
}
|
|
275
|
+
this._optimizeToRanges(hybridData)
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
return hybridData
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// Ultra-fast remove operation
|
|
282
|
+
_removeLineNumber(hybridData, lineNumber) {
|
|
283
|
+
if (!hybridData) {
|
|
284
|
+
return hybridData
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// Remove from Set (fast path)
|
|
288
|
+
if (hybridData.set) {
|
|
289
|
+
hybridData.set.delete(lineNumber)
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// Remove from ranges (less common)
|
|
293
|
+
if (hybridData.ranges) {
|
|
294
|
+
hybridData.ranges = this._removeFromRanges(hybridData.ranges, lineNumber)
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
return hybridData
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// Optimized range removal
|
|
301
|
+
_removeFromRanges(ranges, lineNumber) {
|
|
302
|
+
if (!ranges || ranges.length === 0) return ranges
|
|
303
|
+
|
|
304
|
+
const newRanges = []
|
|
305
|
+
|
|
306
|
+
for (const item of ranges) {
|
|
307
|
+
if (typeof item === 'object' && item.start !== undefined) {
|
|
308
|
+
// It's a range
|
|
309
|
+
if (lineNumber >= item.start && lineNumber < item.start + item.count) {
|
|
310
|
+
// Split range if needed
|
|
311
|
+
if (lineNumber === item.start) {
|
|
312
|
+
// Remove first element
|
|
313
|
+
if (item.count > 1) {
|
|
314
|
+
newRanges.push({ start: item.start + 1, count: item.count - 1 })
|
|
315
|
+
}
|
|
316
|
+
} else if (lineNumber === item.start + item.count - 1) {
|
|
317
|
+
// Remove last element
|
|
318
|
+
if (item.count > 1) {
|
|
319
|
+
newRanges.push({ start: item.start, count: item.count - 1 })
|
|
320
|
+
}
|
|
321
|
+
} else {
|
|
322
|
+
// Remove from middle - split into two ranges
|
|
323
|
+
const beforeCount = lineNumber - item.start
|
|
324
|
+
const afterCount = item.count - beforeCount - 1
|
|
325
|
+
|
|
326
|
+
if (beforeCount >= this.rangeThreshold) {
|
|
327
|
+
newRanges.push({ start: item.start, count: beforeCount })
|
|
328
|
+
} else {
|
|
329
|
+
// Add individual numbers for small sequences
|
|
330
|
+
for (let i = item.start; i < lineNumber; i++) {
|
|
331
|
+
newRanges.push(i)
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
if (afterCount >= this.rangeThreshold) {
|
|
336
|
+
newRanges.push({ start: lineNumber + 1, count: afterCount })
|
|
337
|
+
} else {
|
|
338
|
+
// Add individual numbers for small sequences
|
|
339
|
+
for (let i = lineNumber + 1; i < item.start + item.count; i++) {
|
|
340
|
+
newRanges.push(i)
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
} else {
|
|
345
|
+
newRanges.push(item)
|
|
346
|
+
}
|
|
347
|
+
} else if (item !== lineNumber) {
|
|
348
|
+
// It's an individual number
|
|
349
|
+
newRanges.push(item)
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
return newRanges
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// Ultra-lazy range conversion - only when absolutely necessary
|
|
357
|
+
_optimizeToRanges(hybridData) {
|
|
358
|
+
if (!hybridData.set || hybridData.set.size === 0) {
|
|
359
|
+
return
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
if (this.opts.debugMode) {
|
|
363
|
+
console.log(`🔧 Starting range optimization for Set with ${hybridData.set.size} elements`)
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
// Only convert if we have enough data to make it worthwhile
|
|
367
|
+
if (hybridData.set.size < this.rangeThreshold) {
|
|
368
|
+
return
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
// Convert Set to array and find consecutive sequences
|
|
372
|
+
const numbers = Array.from(hybridData.set).sort((a, b) => a - b)
|
|
373
|
+
const ranges = []
|
|
374
|
+
|
|
375
|
+
let start = numbers[0]
|
|
376
|
+
let count = 1
|
|
377
|
+
|
|
378
|
+
for (let i = 1; i < numbers.length; i++) {
|
|
379
|
+
if (numbers[i] === numbers[i-1] + 1) {
|
|
380
|
+
count++
|
|
381
|
+
} else {
|
|
382
|
+
// End of consecutive sequence
|
|
383
|
+
if (count >= this.rangeThreshold) {
|
|
384
|
+
ranges.push({start, count})
|
|
385
|
+
// Remove these numbers from Set
|
|
386
|
+
for (let j = start; j < start + count; j++) {
|
|
387
|
+
hybridData.set.delete(j)
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
start = numbers[i]
|
|
391
|
+
count = 1
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
// Handle last sequence
|
|
396
|
+
if (count >= this.rangeThreshold) {
|
|
397
|
+
ranges.push({start, count})
|
|
398
|
+
for (let j = start; j < start + count; j++) {
|
|
399
|
+
hybridData.set.delete(j)
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
// Add new ranges to existing ranges
|
|
404
|
+
if (ranges.length > 0) {
|
|
405
|
+
if (!hybridData.ranges) {
|
|
406
|
+
hybridData.ranges = []
|
|
407
|
+
}
|
|
408
|
+
hybridData.ranges.push(...ranges)
|
|
409
|
+
// Keep ranges sorted for efficient binary search
|
|
410
|
+
hybridData.ranges.sort((a, b) => {
|
|
411
|
+
const aStart = typeof a === 'object' ? a.start : a
|
|
412
|
+
const bStart = typeof b === 'object' ? b.start : b
|
|
413
|
+
return aStart - bStart
|
|
414
|
+
})
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
// Ultra-fast get all line numbers
|
|
419
|
+
_getAllLineNumbers(hybridData) {
|
|
420
|
+
if (!hybridData) return []
|
|
421
|
+
|
|
422
|
+
// Use generator for lazy evaluation and better memory efficiency
|
|
423
|
+
return Array.from(this._getAllLineNumbersGenerator(hybridData))
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
// OPTIMIZATION: Generator-based approach for better memory efficiency
|
|
427
|
+
*_getAllLineNumbersGenerator(hybridData) {
|
|
428
|
+
const normalizeLineNumber = (value) => {
|
|
429
|
+
if (typeof value === 'number') {
|
|
430
|
+
return value
|
|
431
|
+
}
|
|
432
|
+
if (typeof value === 'string') {
|
|
433
|
+
const parsed = Number(value)
|
|
434
|
+
return Number.isNaN(parsed) ? value : parsed
|
|
435
|
+
}
|
|
436
|
+
if (typeof value === 'bigint') {
|
|
437
|
+
const maxSafe = BigInt(Number.MAX_SAFE_INTEGER)
|
|
438
|
+
return value <= maxSafe ? Number(value) : value
|
|
439
|
+
}
|
|
440
|
+
return value
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
// Yield from Set (fastest path)
|
|
444
|
+
if (hybridData.set) {
|
|
445
|
+
for (const num of hybridData.set) {
|
|
446
|
+
yield normalizeLineNumber(num)
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
// Yield from ranges (optimized)
|
|
451
|
+
if (hybridData.ranges) {
|
|
452
|
+
for (const item of hybridData.ranges) {
|
|
453
|
+
if (typeof item === 'object' && item.start !== undefined) {
|
|
454
|
+
// It's a range - use direct loop for better performance
|
|
455
|
+
const end = item.start + item.count
|
|
456
|
+
for (let i = item.start; i < end; i++) {
|
|
457
|
+
yield normalizeLineNumber(i)
|
|
458
|
+
}
|
|
459
|
+
} else {
|
|
460
|
+
// It's an individual number
|
|
461
|
+
yield normalizeLineNumber(item)
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
// OPTIMIZATION 6: Ultra-fast add operation with incremental index updates
|
|
468
|
+
async add(row, lineNumber) {
|
|
469
|
+
if (typeof row !== 'object' || !row) {
|
|
470
|
+
throw new Error('Invalid \'row\' parameter, it must be an object')
|
|
471
|
+
}
|
|
472
|
+
if (typeof lineNumber !== 'number') {
|
|
473
|
+
throw new Error('Invalid line number')
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
// OPTIMIZATION 6: Use direct field access with minimal operations
|
|
477
|
+
const data = this.index.data
|
|
478
|
+
|
|
479
|
+
// OPTIMIZATION 6: Pre-allocate field structures for better performance
|
|
480
|
+
const fields = Object.keys(this.opts.indexes || {})
|
|
481
|
+
for (const field of fields) {
|
|
482
|
+
// PERFORMANCE: Check if this is a term mapping field once
|
|
483
|
+
const isTermMappingField = this.database?.termManager &&
|
|
484
|
+
this.database.termManager.termMappingFields &&
|
|
485
|
+
this.database.termManager.termMappingFields.includes(field)
|
|
486
|
+
|
|
487
|
+
// CRITICAL FIX: For term mapping fields, prefer ${field}Ids if available
|
|
488
|
+
// Records processed by processTermMapping have term IDs in ${field}Ids
|
|
489
|
+
// Records loaded from file have term IDs directly in ${field} (after restoreTermIdsAfterDeserialization)
|
|
490
|
+
let value
|
|
491
|
+
if (isTermMappingField) {
|
|
492
|
+
const termIdsField = `${field}Ids`
|
|
493
|
+
const termIds = row[termIdsField]
|
|
494
|
+
if (termIds && Array.isArray(termIds) && termIds.length > 0) {
|
|
495
|
+
// Use term IDs from ${field}Ids (preferred - from processTermMapping)
|
|
496
|
+
value = termIds
|
|
497
|
+
} else {
|
|
498
|
+
// Fallback: use field directly (for records loaded from file that have term IDs in field)
|
|
499
|
+
value = row[field]
|
|
500
|
+
}
|
|
501
|
+
} else {
|
|
502
|
+
value = row[field]
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
if (value !== undefined && value !== null) {
|
|
506
|
+
// OPTIMIZATION 6: Initialize field structure if it doesn't exist
|
|
507
|
+
if (!data[field]) {
|
|
508
|
+
data[field] = {}
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
const values = Array.isArray(value) ? value : [value]
|
|
512
|
+
for (const val of values) {
|
|
513
|
+
let key
|
|
514
|
+
|
|
515
|
+
if (isTermMappingField && typeof val === 'number') {
|
|
516
|
+
// For term mapping fields, values are already term IDs
|
|
517
|
+
key = String(val)
|
|
518
|
+
} else if (isTermMappingField && typeof val === 'string') {
|
|
519
|
+
// Fallback: convert string to term ID
|
|
520
|
+
// CRITICAL: During indexing (add), we should use getTermId() to create IDs if needed
|
|
521
|
+
// This is different from queries where we use getTermIdWithoutIncrement() to avoid creating new IDs
|
|
522
|
+
const termId = this.database.termManager.getTermId(val)
|
|
523
|
+
key = String(termId)
|
|
524
|
+
} else {
|
|
525
|
+
// For non-term-mapping fields (including array:number), use values directly
|
|
526
|
+
key = String(val)
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
// OPTIMIZATION 6: Use direct assignment for better performance
|
|
530
|
+
if (!data[field][key]) {
|
|
531
|
+
data[field][key] = { set: new Set(), ranges: [] }
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
// OPTIMIZATION 6: Direct Set operation - fastest possible
|
|
535
|
+
data[field][key].set.add(lineNumber)
|
|
536
|
+
|
|
537
|
+
// OPTIMIZATION 6: Lazy range optimization - only when beneficial
|
|
538
|
+
if (data[field][key].set.size >= this.rangeThreshold * 3) {
|
|
539
|
+
this._optimizeToRanges(data[field][key])
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
/**
|
|
547
|
+
* OPTIMIZATION 6: Add multiple records to the index in batch with optimized operations
|
|
548
|
+
* @param {Array} records - Records to add
|
|
549
|
+
* @param {number} startLineNumber - Starting line number
|
|
550
|
+
*/
|
|
551
|
+
async addBatch(records, startLineNumber) {
|
|
552
|
+
if (!records || !records.length) return
|
|
553
|
+
|
|
554
|
+
// OPTIMIZATION 6: Pre-allocate index structures for better performance
|
|
555
|
+
const data = this.index.data
|
|
556
|
+
const fields = Object.keys(this.opts.indexes || {})
|
|
557
|
+
|
|
558
|
+
for (const field of fields) {
|
|
559
|
+
if (!data[field]) {
|
|
560
|
+
data[field] = {}
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
// OPTIMIZATION 6: Use Map for batch processing to reduce lookups
|
|
565
|
+
const fieldUpdates = new Map()
|
|
566
|
+
|
|
567
|
+
// OPTIMIZATION 6: Process all records in batch with optimized data structures
|
|
568
|
+
for (let i = 0; i < records.length; i++) {
|
|
569
|
+
const row = records[i]
|
|
570
|
+
const lineNumber = startLineNumber + i
|
|
571
|
+
|
|
572
|
+
for (const field of fields) {
|
|
573
|
+
// PERFORMANCE: Check if this is a term mapping field once
|
|
574
|
+
const isTermMappingField = this.database?.termManager &&
|
|
575
|
+
this.database.termManager.termMappingFields &&
|
|
576
|
+
this.database.termManager.termMappingFields.includes(field)
|
|
577
|
+
|
|
578
|
+
// CRITICAL FIX: For term mapping fields, prefer ${field}Ids if available
|
|
579
|
+
// Records processed by processTermMapping have term IDs in ${field}Ids
|
|
580
|
+
// Records loaded from file have term IDs directly in ${field} (after restoreTermIdsAfterDeserialization)
|
|
581
|
+
let value
|
|
582
|
+
if (isTermMappingField) {
|
|
583
|
+
const termIdsField = `${field}Ids`
|
|
584
|
+
const termIds = row[termIdsField]
|
|
585
|
+
if (termIds && Array.isArray(termIds) && termIds.length > 0) {
|
|
586
|
+
// Use term IDs from ${field}Ids (preferred - from processTermMapping)
|
|
587
|
+
value = termIds
|
|
588
|
+
} else {
|
|
589
|
+
// Fallback: use field directly (for records loaded from file that have term IDs in field)
|
|
590
|
+
value = row[field]
|
|
591
|
+
}
|
|
592
|
+
} else {
|
|
593
|
+
value = row[field]
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
if (value !== undefined && value !== null) {
|
|
597
|
+
const values = Array.isArray(value) ? value : [value]
|
|
598
|
+
for (const val of values) {
|
|
599
|
+
let key
|
|
600
|
+
|
|
601
|
+
if (isTermMappingField && typeof val === 'number') {
|
|
602
|
+
// For term mapping fields, values are already term IDs
|
|
603
|
+
key = String(val)
|
|
604
|
+
} else if (isTermMappingField && typeof val === 'string') {
|
|
605
|
+
// Fallback: convert string to term ID
|
|
606
|
+
// CRITICAL: During indexing (addBatch), we should use getTermId() to create IDs if needed
|
|
607
|
+
// This is different from queries where we use getTermIdWithoutIncrement() to avoid creating new IDs
|
|
608
|
+
const termId = this.database.termManager.getTermId(val)
|
|
609
|
+
key = String(termId)
|
|
610
|
+
} else {
|
|
611
|
+
// For non-term-mapping fields (including array:number), use values directly
|
|
612
|
+
key = String(val)
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
// OPTIMIZATION 6: Use Map for efficient batch updates
|
|
616
|
+
if (!fieldUpdates.has(field)) {
|
|
617
|
+
fieldUpdates.set(field, new Map())
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
const fieldMap = fieldUpdates.get(field)
|
|
621
|
+
if (!fieldMap.has(key)) {
|
|
622
|
+
fieldMap.set(key, new Set())
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
fieldMap.get(key).add(lineNumber)
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
// OPTIMIZATION 6: Apply all updates in batch for better performance
|
|
632
|
+
for (const [field, fieldMap] of fieldUpdates) {
|
|
633
|
+
for (const [key, lineNumbers] of fieldMap) {
|
|
634
|
+
if (!data[field][key]) {
|
|
635
|
+
data[field][key] = { set: new Set(), ranges: [] }
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
// OPTIMIZATION 6: Add all line numbers at once
|
|
639
|
+
for (const lineNumber of lineNumbers) {
|
|
640
|
+
data[field][key].set.add(lineNumber)
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
// OPTIMIZATION 6: Lazy range optimization - only when beneficial
|
|
644
|
+
if (data[field][key].set.size >= this.rangeThreshold * 3) {
|
|
645
|
+
this._optimizeToRanges(data[field][key])
|
|
646
|
+
}
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
// Ultra-fast dry remove
|
|
652
|
+
dryRemove(ln) {
|
|
653
|
+
const data = this.index.data
|
|
654
|
+
for (const field in data) {
|
|
655
|
+
for (const value in data[field]) {
|
|
656
|
+
// Direct Set operation - fastest possible
|
|
657
|
+
if (data[field][value].set) {
|
|
658
|
+
data[field][value].set.delete(ln)
|
|
659
|
+
}
|
|
660
|
+
if (data[field][value].ranges) {
|
|
661
|
+
data[field][value].ranges = this._removeFromRanges(data[field][value].ranges, ln)
|
|
662
|
+
}
|
|
663
|
+
// Remove empty entries
|
|
664
|
+
if ((!data[field][value].set || data[field][value].set.size === 0) &&
|
|
665
|
+
(!data[field][value].ranges || data[field][value].ranges.length === 0)) {
|
|
666
|
+
delete data[field][value]
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
// Cleanup method to free memory
|
|
674
|
+
cleanup() {
|
|
675
|
+
const data = this.index.data
|
|
676
|
+
for (const field in data) {
|
|
677
|
+
for (const value in data[field]) {
|
|
678
|
+
if (data[field][value].set) {
|
|
679
|
+
if (typeof data[field][value].set.clearAll === 'function') {
|
|
680
|
+
data[field][value].set.clearAll()
|
|
681
|
+
} else if (typeof data[field][value].set.clear === 'function') {
|
|
682
|
+
data[field][value].set.clear()
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
if (data[field][value].ranges) {
|
|
686
|
+
data[field][value].ranges.length = 0
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
// Clear the entire field
|
|
690
|
+
data[field] = {}
|
|
691
|
+
}
|
|
692
|
+
// Clear all data
|
|
693
|
+
this.index.data = {}
|
|
694
|
+
this.totalLines = 0
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
// Clear all indexes
|
|
698
|
+
clear() {
|
|
699
|
+
this.index.data = {}
|
|
700
|
+
this.totalLines = 0
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
|
|
705
|
+
// Update a record in the index
|
|
706
|
+
async update(oldRecord, newRecord, lineNumber = null) {
|
|
707
|
+
if (!oldRecord || !newRecord) return
|
|
708
|
+
|
|
709
|
+
// Remove old record by ID
|
|
710
|
+
await this.remove(oldRecord)
|
|
711
|
+
|
|
712
|
+
// Add new record with provided line number or use hash of the ID
|
|
713
|
+
const actualLineNumber = lineNumber !== null ? lineNumber : this._getIdAsNumber(newRecord.id)
|
|
714
|
+
await this.add(newRecord, actualLineNumber)
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
// Convert string ID to number for line number
|
|
718
|
+
_getIdAsNumber(id) {
|
|
719
|
+
if (typeof id === 'number') return id
|
|
720
|
+
if (typeof id === 'string') {
|
|
721
|
+
// Simple hash function to convert string to number
|
|
722
|
+
let hash = 0
|
|
723
|
+
for (let i = 0; i < id.length; i++) {
|
|
724
|
+
const char = id.charCodeAt(i)
|
|
725
|
+
hash = ((hash << 5) - hash) + char
|
|
726
|
+
hash = hash & hash // Convert to 32-bit integer
|
|
727
|
+
}
|
|
728
|
+
return Math.abs(hash)
|
|
729
|
+
}
|
|
730
|
+
return 0
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
// Remove a record from the index
|
|
734
|
+
async remove(record) {
|
|
735
|
+
if (!record) return
|
|
736
|
+
|
|
737
|
+
// If record is an array of line numbers, use the original method
|
|
738
|
+
if (Array.isArray(record)) {
|
|
739
|
+
return this._removeLineNumbers(record)
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
// If record is an object, remove by record data
|
|
743
|
+
if (typeof record === 'object' && record.id) {
|
|
744
|
+
return await this._removeRecord(record)
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
// Remove a specific record from the index
|
|
749
|
+
async _removeRecord(record) {
|
|
750
|
+
if (!record) return
|
|
751
|
+
|
|
752
|
+
const data = this.index.data
|
|
753
|
+
const database = this.database
|
|
754
|
+
const persistedCount = Array.isArray(database?.offsets) ? database.offsets.length : 0
|
|
755
|
+
const lineMatchCache = new Map()
|
|
756
|
+
|
|
757
|
+
const doesLineNumberBelongToRecord = async (lineNumber) => {
|
|
758
|
+
if (lineMatchCache.has(lineNumber)) {
|
|
759
|
+
return lineMatchCache.get(lineNumber)
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
let belongs = false
|
|
763
|
+
|
|
764
|
+
try {
|
|
765
|
+
if (lineNumber >= persistedCount) {
|
|
766
|
+
const writeBufferIndex = lineNumber - persistedCount
|
|
767
|
+
const candidate = database?.writeBuffer?.[writeBufferIndex]
|
|
768
|
+
belongs = !!candidate && candidate.id === record.id
|
|
769
|
+
} else if (lineNumber >= 0) {
|
|
770
|
+
const range = database?.locate?.(lineNumber)
|
|
771
|
+
if (range && database.fileHandler && database.serializer) {
|
|
772
|
+
const [start, end] = range
|
|
773
|
+
const buffer = await database.fileHandler.readRange(start, end)
|
|
774
|
+
if (buffer && buffer.length > 0) {
|
|
775
|
+
let line = buffer.toString('utf8')
|
|
776
|
+
if (line) {
|
|
777
|
+
line = line.trim()
|
|
778
|
+
if (line.length > 0) {
|
|
779
|
+
const storedRecord = database.serializer.deserialize(line)
|
|
780
|
+
belongs = storedRecord && storedRecord.id === record.id
|
|
781
|
+
}
|
|
782
|
+
}
|
|
783
|
+
}
|
|
784
|
+
}
|
|
785
|
+
}
|
|
786
|
+
} catch (error) {
|
|
787
|
+
belongs = false
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
lineMatchCache.set(lineNumber, belongs)
|
|
791
|
+
return belongs
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
for (const field in data) {
|
|
795
|
+
if (record[field] !== undefined && record[field] !== null) {
|
|
796
|
+
const values = Array.isArray(record[field]) ? record[field] : [record[field]]
|
|
797
|
+
for (const val of values) {
|
|
798
|
+
let key
|
|
799
|
+
|
|
800
|
+
// Check if this is a term mapping field (array:string fields only)
|
|
801
|
+
const isTermMappingField = this.database?.termManager &&
|
|
802
|
+
this.database.termManager.termMappingFields &&
|
|
803
|
+
this.database.termManager.termMappingFields.includes(field)
|
|
804
|
+
|
|
805
|
+
if (isTermMappingField && typeof val === 'number') {
|
|
806
|
+
// For term mapping fields (array:string), the values are already term IDs
|
|
807
|
+
key = String(val)
|
|
808
|
+
if (this.database.opts.debugMode) {
|
|
809
|
+
console.log(`🔍 IndexManager._removeRecord: Using term ID ${val} directly for field "${field}"`)
|
|
810
|
+
}
|
|
811
|
+
} else if (isTermMappingField && typeof val === 'string') {
|
|
812
|
+
// For term mapping fields (array:string), convert string to term ID
|
|
813
|
+
const termId = this.database.termManager.getTermIdWithoutIncrement(val)
|
|
814
|
+
key = String(termId)
|
|
815
|
+
if (this.database.opts.debugMode) {
|
|
816
|
+
console.log(`🔍 IndexManager._removeRecord: Using term ID ${termId} for term "${val}"`)
|
|
817
|
+
}
|
|
818
|
+
} else {
|
|
819
|
+
// For non-term-mapping fields (including array:number), use values directly
|
|
820
|
+
key = String(val)
|
|
821
|
+
if (this.database?.opts?.debugMode) {
|
|
822
|
+
console.log(`🔍 IndexManager._removeRecord: Using value "${val}" directly for field "${field}"`)
|
|
823
|
+
}
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
// Note: TermManager notification is handled by Database.mjs
|
|
827
|
+
// to avoid double decrementation during updates
|
|
828
|
+
|
|
829
|
+
const indexEntry = data[field][key]
|
|
830
|
+
if (indexEntry) {
|
|
831
|
+
const lineNumbers = this._getAllLineNumbers(indexEntry)
|
|
832
|
+
const filteredLineNumbers = []
|
|
833
|
+
|
|
834
|
+
for (const lineNumber of lineNumbers) {
|
|
835
|
+
if (!(await doesLineNumberBelongToRecord(lineNumber))) {
|
|
836
|
+
filteredLineNumbers.push(lineNumber)
|
|
837
|
+
}
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
if (filteredLineNumbers.length === 0) {
|
|
841
|
+
delete data[field][key]
|
|
842
|
+
} else {
|
|
843
|
+
// Rebuild the index value with filtered line numbers
|
|
844
|
+
data[field][key].set = new Set(filteredLineNumbers)
|
|
845
|
+
data[field][key].ranges = []
|
|
846
|
+
}
|
|
847
|
+
}
|
|
848
|
+
}
|
|
849
|
+
}
|
|
850
|
+
}
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
// Ultra-fast remove with batch processing (renamed from remove)
|
|
854
|
+
_removeLineNumbers(lineNumbers) {
|
|
855
|
+
if (!lineNumbers || lineNumbers.length === 0) return
|
|
856
|
+
|
|
857
|
+
lineNumbers.sort((a, b) => a - b) // Sort ascending for efficient processing
|
|
858
|
+
|
|
859
|
+
const data = this.index.data
|
|
860
|
+
for (const field in data) {
|
|
861
|
+
for (const value in data[field]) {
|
|
862
|
+
const numbers = this._getAllLineNumbers(data[field][value])
|
|
863
|
+
const newNumbers = []
|
|
864
|
+
|
|
865
|
+
for (const ln of numbers) {
|
|
866
|
+
let offset = 0
|
|
867
|
+
for (const lineNumber of lineNumbers) {
|
|
868
|
+
if (lineNumber < ln) {
|
|
869
|
+
offset++
|
|
870
|
+
} else if (lineNumber === ln) {
|
|
871
|
+
offset = -1 // Mark for removal
|
|
872
|
+
break
|
|
873
|
+
}
|
|
874
|
+
}
|
|
875
|
+
if (offset >= 0) {
|
|
876
|
+
newNumbers.push(ln - offset) // Update the value
|
|
877
|
+
}
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
if (newNumbers.length > 0) {
|
|
881
|
+
// Rebuild hybrid structure with new numbers
|
|
882
|
+
data[field][value] = { set: new Set(), ranges: [] }
|
|
883
|
+
for (const num of newNumbers) {
|
|
884
|
+
data[field][value] = this._addLineNumber(data[field][value], num)
|
|
885
|
+
}
|
|
886
|
+
} else {
|
|
887
|
+
delete data[field][value]
|
|
888
|
+
}
|
|
889
|
+
}
|
|
890
|
+
}
|
|
891
|
+
}
|
|
892
|
+
|
|
893
|
+
// Ultra-fast replace with batch processing
|
|
894
|
+
replace(map) {
|
|
895
|
+
if (!map || map.size === 0) return
|
|
896
|
+
|
|
897
|
+
const data = this.index.data
|
|
898
|
+
for (const field in data) {
|
|
899
|
+
for (const value in data[field]) {
|
|
900
|
+
const numbers = this._getAllLineNumbers(data[field][value])
|
|
901
|
+
const newNumbers = []
|
|
902
|
+
|
|
903
|
+
for (const lineNumber of numbers) {
|
|
904
|
+
if (map.has(lineNumber)) {
|
|
905
|
+
newNumbers.push(map.get(lineNumber))
|
|
906
|
+
} else {
|
|
907
|
+
newNumbers.push(lineNumber)
|
|
908
|
+
}
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
// Rebuild hybrid structure with new numbers
|
|
912
|
+
data[field][value] = { set: new Set(), ranges: [] }
|
|
913
|
+
for (const num of newNumbers) {
|
|
914
|
+
data[field][value] = this._addLineNumber(data[field][value], num)
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
}
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
// Ultra-fast query with early exit and smart processing
|
|
921
|
+
query(criteria, options = {}) {
|
|
922
|
+
if (typeof options === 'boolean') {
|
|
923
|
+
options = { matchAny: options };
|
|
924
|
+
}
|
|
925
|
+
const { matchAny = false, caseInsensitive = false } = options;
|
|
926
|
+
|
|
927
|
+
if (!criteria) {
|
|
928
|
+
// Return all line numbers when no criteria provided
|
|
929
|
+
return new Set(Array.from({ length: this.totalLines || 0 }, (_, i) => i));
|
|
930
|
+
}
|
|
931
|
+
|
|
932
|
+
// Handle $not operator
|
|
933
|
+
if (criteria.$not && typeof criteria.$not === 'object') {
|
|
934
|
+
// Get all possible line numbers from database offsets or totalLines
|
|
935
|
+
const totalRecords = this.database?.offsets?.length || this.totalLines || 0;
|
|
936
|
+
const allLines = new Set(Array.from({ length: totalRecords }, (_, i) => i));
|
|
937
|
+
|
|
938
|
+
// Get line numbers matching the $not condition
|
|
939
|
+
const notLines = this.query(criteria.$not, options);
|
|
940
|
+
|
|
941
|
+
// Return complement (all lines except those matching $not condition)
|
|
942
|
+
const result = new Set([...allLines].filter(x => !notLines.has(x)));
|
|
943
|
+
|
|
944
|
+
// If there are other conditions besides $not, we need to intersect with them
|
|
945
|
+
const otherCriteria = { ...criteria };
|
|
946
|
+
delete otherCriteria.$not;
|
|
947
|
+
|
|
948
|
+
if (Object.keys(otherCriteria).length > 0) {
|
|
949
|
+
const otherResults = this.query(otherCriteria, options);
|
|
950
|
+
return new Set([...result].filter(x => otherResults.has(x)));
|
|
951
|
+
}
|
|
952
|
+
|
|
953
|
+
return result;
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
// Handle $and queries with parallel processing optimization
|
|
957
|
+
if (criteria.$and && Array.isArray(criteria.$and)) {
|
|
958
|
+
// OPTIMIZATION: Process conditions in parallel for better performance
|
|
959
|
+
if (criteria.$and.length > 1) {
|
|
960
|
+
// Process all conditions in parallel (synchronous since query is not async)
|
|
961
|
+
const conditionResults = criteria.$and.map(andCondition =>
|
|
962
|
+
this.query(andCondition, options)
|
|
963
|
+
);
|
|
964
|
+
|
|
965
|
+
// Intersect all results for AND logic
|
|
966
|
+
let result = conditionResults[0];
|
|
967
|
+
for (let i = 1; i < conditionResults.length; i++) {
|
|
968
|
+
result = new Set([...result].filter(x => conditionResults[i].has(x)));
|
|
969
|
+
}
|
|
970
|
+
|
|
971
|
+
// IMPORTANT: Check if there are other fields besides $and at the root level
|
|
972
|
+
// If so, we need to intersect with them too
|
|
973
|
+
const otherCriteria = { ...criteria };
|
|
974
|
+
delete otherCriteria.$and;
|
|
975
|
+
|
|
976
|
+
if (Object.keys(otherCriteria).length > 0) {
|
|
977
|
+
const otherResults = this.query(otherCriteria, options);
|
|
978
|
+
result = new Set([...result].filter(x => otherResults.has(x)));
|
|
979
|
+
}
|
|
980
|
+
|
|
981
|
+
return result || new Set();
|
|
982
|
+
} else {
|
|
983
|
+
// Single condition - check for other criteria at root level
|
|
984
|
+
const andResult = this.query(criteria.$and[0], options);
|
|
985
|
+
|
|
986
|
+
const otherCriteria = { ...criteria };
|
|
987
|
+
delete otherCriteria.$and;
|
|
988
|
+
|
|
989
|
+
if (Object.keys(otherCriteria).length > 0) {
|
|
990
|
+
const otherResults = this.query(otherCriteria, options);
|
|
991
|
+
return new Set([...andResult].filter(x => otherResults.has(x)));
|
|
992
|
+
}
|
|
993
|
+
|
|
994
|
+
return andResult;
|
|
995
|
+
}
|
|
996
|
+
}
|
|
997
|
+
|
|
998
|
+
const fields = Object.keys(criteria);
|
|
999
|
+
if (!fields.length) {
|
|
1000
|
+
// Return all line numbers when criteria is empty object
|
|
1001
|
+
return new Set(Array.from({ length: this.totalLines || 0 }, (_, i) => i));
|
|
1002
|
+
}
|
|
1003
|
+
|
|
1004
|
+
let matchingLines = matchAny ? new Set() : null;
|
|
1005
|
+
const data = this.index.data
|
|
1006
|
+
|
|
1007
|
+
for (const field of fields) {
|
|
1008
|
+
// Skip logical operators - they are handled separately
|
|
1009
|
+
if (field.startsWith('$')) continue;
|
|
1010
|
+
|
|
1011
|
+
if (typeof data[field] === 'undefined') continue;
|
|
1012
|
+
|
|
1013
|
+
const originalCriteriaValue = criteria[field];
|
|
1014
|
+
const criteriaValue = normalizeCriteriaOperators(originalCriteriaValue, { target: 'legacy', preserveOriginal: true });
|
|
1015
|
+
let lineNumbersForField = new Set();
|
|
1016
|
+
const isNumericField = this.opts.indexes[field] === 'number';
|
|
1017
|
+
|
|
1018
|
+
// Handle RegExp values directly (MUST check before object check since RegExp is an object)
|
|
1019
|
+
if (criteriaValue instanceof RegExp) {
|
|
1020
|
+
// RegExp cannot be efficiently queried using indices - fall back to streaming
|
|
1021
|
+
// This will be handled by the QueryManager's streaming strategy
|
|
1022
|
+
continue;
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
if (typeof criteriaValue === 'object' && !Array.isArray(criteriaValue)) {
|
|
1026
|
+
const fieldIndex = data[field];
|
|
1027
|
+
|
|
1028
|
+
// Handle $in operator for array queries
|
|
1029
|
+
if (criteriaValue.$in !== undefined) {
|
|
1030
|
+
const inValues = Array.isArray(criteriaValue.$in) ? criteriaValue.$in : [criteriaValue.$in];
|
|
1031
|
+
|
|
1032
|
+
// PERFORMANCE: Cache term mapping field check once
|
|
1033
|
+
const isTermMappingField = this.database?.termManager &&
|
|
1034
|
+
this.database.termManager.termMappingFields &&
|
|
1035
|
+
this.database.termManager.termMappingFields.includes(field)
|
|
1036
|
+
|
|
1037
|
+
// PERFORMANCE: Track if any term was found and matched
|
|
1038
|
+
let foundAnyMatch = false
|
|
1039
|
+
|
|
1040
|
+
for (const inValue of inValues) {
|
|
1041
|
+
// SPACE OPTIMIZATION: Convert search term to term ID for lookup
|
|
1042
|
+
let searchTermId
|
|
1043
|
+
|
|
1044
|
+
if (isTermMappingField && typeof inValue === 'number') {
|
|
1045
|
+
// For term mapping fields (array:string), the search value is already a term ID
|
|
1046
|
+
searchTermId = String(inValue)
|
|
1047
|
+
} else if (isTermMappingField && typeof inValue === 'string') {
|
|
1048
|
+
// For term mapping fields (array:string), convert string to term ID
|
|
1049
|
+
const termId = this.database?.termManager?.getTermIdWithoutIncrement(String(inValue))
|
|
1050
|
+
if (termId === undefined) {
|
|
1051
|
+
// Term not found in termManager - skip this search value
|
|
1052
|
+
// This means the term was never saved to the database
|
|
1053
|
+
if (this.opts?.debugMode) {
|
|
1054
|
+
console.log(`⚠️ Term "${inValue}" not found in termManager for field "${field}" - skipping`)
|
|
1055
|
+
}
|
|
1056
|
+
continue // Skip this value, no matches possible
|
|
1057
|
+
}
|
|
1058
|
+
searchTermId = String(termId)
|
|
1059
|
+
} else {
|
|
1060
|
+
// For non-term-mapping fields (including array:number), use values directly
|
|
1061
|
+
searchTermId = String(inValue)
|
|
1062
|
+
}
|
|
1063
|
+
|
|
1064
|
+
// PERFORMANCE: Direct lookup instead of iteration
|
|
1065
|
+
let matched = false
|
|
1066
|
+
if (caseInsensitive && typeof inValue === 'string') {
|
|
1067
|
+
const searchLower = searchTermId.toLowerCase()
|
|
1068
|
+
for (const value in fieldIndex) {
|
|
1069
|
+
if (value.toLowerCase() === searchLower) {
|
|
1070
|
+
const numbers = this._getAllLineNumbers(fieldIndex[value]);
|
|
1071
|
+
for (const lineNumber of numbers) {
|
|
1072
|
+
lineNumbersForField.add(lineNumber);
|
|
1073
|
+
}
|
|
1074
|
+
matched = true
|
|
1075
|
+
foundAnyMatch = true
|
|
1076
|
+
}
|
|
1077
|
+
}
|
|
1078
|
+
} else {
|
|
1079
|
+
const indexData = fieldIndex[searchTermId]
|
|
1080
|
+
if (indexData) {
|
|
1081
|
+
const numbers = this._getAllLineNumbers(indexData);
|
|
1082
|
+
for (const lineNumber of numbers) {
|
|
1083
|
+
lineNumbersForField.add(lineNumber);
|
|
1084
|
+
}
|
|
1085
|
+
matched = true
|
|
1086
|
+
foundAnyMatch = true
|
|
1087
|
+
}
|
|
1088
|
+
}
|
|
1089
|
+
}
|
|
1090
|
+
|
|
1091
|
+
// CRITICAL FIX: If no matches found at all (all terms were unknown or not in index),
|
|
1092
|
+
// lineNumbersForField remains empty which is correct (no results for $in)
|
|
1093
|
+
// This is handled correctly by the caller - empty Set means no matches
|
|
1094
|
+
}
|
|
1095
|
+
// Handle $nin operator (not in) - returns complement of $in
|
|
1096
|
+
else if (criteriaValue.$nin !== undefined) {
|
|
1097
|
+
const ninValues = Array.isArray(criteriaValue.$nin) ? criteriaValue.$nin : [criteriaValue.$nin];
|
|
1098
|
+
|
|
1099
|
+
// Get all possible line numbers
|
|
1100
|
+
const totalRecords = this.database?.offsets?.length || this.totalLines || 0;
|
|
1101
|
+
const allLines = new Set(Array.from({ length: totalRecords }, (_, i) => i));
|
|
1102
|
+
|
|
1103
|
+
// Get line numbers that match any of the $nin values
|
|
1104
|
+
const matchingLines = new Set();
|
|
1105
|
+
|
|
1106
|
+
// PERFORMANCE: Cache term mapping field check once
|
|
1107
|
+
const isTermMappingField = this.database?.termManager &&
|
|
1108
|
+
this.database.termManager.termMappingFields &&
|
|
1109
|
+
this.database.termManager.termMappingFields.includes(field)
|
|
1110
|
+
|
|
1111
|
+
for (const ninValue of ninValues) {
|
|
1112
|
+
// SPACE OPTIMIZATION: Convert search term to term ID for lookup
|
|
1113
|
+
let searchTermId
|
|
1114
|
+
|
|
1115
|
+
if (isTermMappingField && typeof ninValue === 'number') {
|
|
1116
|
+
// For term mapping fields (array:string), the search value is already a term ID
|
|
1117
|
+
searchTermId = String(ninValue)
|
|
1118
|
+
} else if (isTermMappingField && typeof ninValue === 'string') {
|
|
1119
|
+
// For term mapping fields (array:string), convert string to term ID
|
|
1120
|
+
const termId = this.database?.termManager?.getTermIdWithoutIncrement(String(ninValue))
|
|
1121
|
+
if (termId === undefined) {
|
|
1122
|
+
// Term not found - skip this value (can't exclude what doesn't exist)
|
|
1123
|
+
if (this.opts?.debugMode) {
|
|
1124
|
+
console.log(`⚠️ Term "${ninValue}" not found in termManager for field "${field}" - skipping`)
|
|
1125
|
+
}
|
|
1126
|
+
continue
|
|
1127
|
+
}
|
|
1128
|
+
searchTermId = String(termId)
|
|
1129
|
+
} else {
|
|
1130
|
+
// For non-term-mapping fields (including array:number), use values directly
|
|
1131
|
+
searchTermId = String(ninValue)
|
|
1132
|
+
}
|
|
1133
|
+
|
|
1134
|
+
// PERFORMANCE: Direct lookup instead of iteration
|
|
1135
|
+
if (caseInsensitive && typeof ninValue === 'string') {
|
|
1136
|
+
const searchLower = searchTermId.toLowerCase()
|
|
1137
|
+
for (const value in fieldIndex) {
|
|
1138
|
+
if (value.toLowerCase() === searchLower) {
|
|
1139
|
+
const numbers = this._getAllLineNumbers(fieldIndex[value]);
|
|
1140
|
+
for (const lineNumber of numbers) {
|
|
1141
|
+
matchingLines.add(lineNumber);
|
|
1142
|
+
}
|
|
1143
|
+
}
|
|
1144
|
+
}
|
|
1145
|
+
} else {
|
|
1146
|
+
const indexData = fieldIndex[searchTermId]
|
|
1147
|
+
if (indexData) {
|
|
1148
|
+
const numbers = this._getAllLineNumbers(indexData);
|
|
1149
|
+
for (const lineNumber of numbers) {
|
|
1150
|
+
matchingLines.add(lineNumber);
|
|
1151
|
+
}
|
|
1152
|
+
}
|
|
1153
|
+
}
|
|
1154
|
+
}
|
|
1155
|
+
|
|
1156
|
+
// Return complement: all lines EXCEPT those matching $nin values
|
|
1157
|
+
lineNumbersForField = new Set([...allLines].filter(x => !matchingLines.has(x)));
|
|
1158
|
+
}
|
|
1159
|
+
// Handle $contains operator for array queries
|
|
1160
|
+
else if (criteriaValue.$contains !== undefined) {
|
|
1161
|
+
const containsValue = criteriaValue.$contains;
|
|
1162
|
+
// Handle case-insensitive for $contains
|
|
1163
|
+
if (caseInsensitive && typeof containsValue === 'string') {
|
|
1164
|
+
for (const value in fieldIndex) {
|
|
1165
|
+
if (value.toLowerCase() === containsValue.toLowerCase()) {
|
|
1166
|
+
const numbers = this._getAllLineNumbers(fieldIndex[value]);
|
|
1167
|
+
for (const lineNumber of numbers) {
|
|
1168
|
+
lineNumbersForField.add(lineNumber);
|
|
1169
|
+
}
|
|
1170
|
+
}
|
|
1171
|
+
}
|
|
1172
|
+
} else {
|
|
1173
|
+
if (fieldIndex[containsValue]) {
|
|
1174
|
+
const numbers = this._getAllLineNumbers(fieldIndex[containsValue]);
|
|
1175
|
+
for (const lineNumber of numbers) {
|
|
1176
|
+
lineNumbersForField.add(lineNumber);
|
|
1177
|
+
}
|
|
1178
|
+
}
|
|
1179
|
+
}
|
|
1180
|
+
}
|
|
1181
|
+
// Handle $all operator for array queries - FIXED FOR TERM MAPPING
|
|
1182
|
+
else if (criteriaValue.$all !== undefined) {
|
|
1183
|
+
const allValues = Array.isArray(criteriaValue.$all) ? criteriaValue.$all : [criteriaValue.$all];
|
|
1184
|
+
|
|
1185
|
+
const isTermMappingField = this.database?.termManager &&
|
|
1186
|
+
this.database.termManager.termMappingFields &&
|
|
1187
|
+
this.database.termManager.termMappingFields.includes(field)
|
|
1188
|
+
|
|
1189
|
+
const normalizeValue = (value) => {
|
|
1190
|
+
if (isTermMappingField) {
|
|
1191
|
+
if (typeof value === 'number') {
|
|
1192
|
+
return String(value)
|
|
1193
|
+
}
|
|
1194
|
+
if (typeof value === 'string') {
|
|
1195
|
+
const termId = this.database?.termManager?.getTermIdWithoutIncrement(value)
|
|
1196
|
+
if (termId !== undefined) {
|
|
1197
|
+
return String(termId)
|
|
1198
|
+
}
|
|
1199
|
+
return null
|
|
1200
|
+
}
|
|
1201
|
+
return null
|
|
1202
|
+
}
|
|
1203
|
+
return String(value)
|
|
1204
|
+
}
|
|
1205
|
+
|
|
1206
|
+
const normalizedValues = []
|
|
1207
|
+
for (const value of allValues) {
|
|
1208
|
+
const normalized = normalizeValue(value)
|
|
1209
|
+
if (normalized === null) {
|
|
1210
|
+
// Term not found in term manager, no matches possible
|
|
1211
|
+
return lineNumbersForField
|
|
1212
|
+
}
|
|
1213
|
+
normalizedValues.push(normalized)
|
|
1214
|
+
}
|
|
1215
|
+
|
|
1216
|
+
// Early exit optimization
|
|
1217
|
+
if (normalizedValues.length === 0) {
|
|
1218
|
+
// Empty $all matches everything
|
|
1219
|
+
for (const value in fieldIndex) {
|
|
1220
|
+
const numbers = this._getAllLineNumbers(fieldIndex[value]);
|
|
1221
|
+
for (const lineNumber of numbers) {
|
|
1222
|
+
lineNumbersForField.add(lineNumber);
|
|
1223
|
+
}
|
|
1224
|
+
}
|
|
1225
|
+
} else {
|
|
1226
|
+
// For term mapping, we need to find records that contain ALL specified terms
|
|
1227
|
+
// This requires a different approach than simple field matching
|
|
1228
|
+
|
|
1229
|
+
// First, get all line numbers that contain each individual term
|
|
1230
|
+
const termLineNumbers = new Map();
|
|
1231
|
+
for (const term of normalizedValues) {
|
|
1232
|
+
if (fieldIndex[term]) {
|
|
1233
|
+
termLineNumbers.set(term, new Set(this._getAllLineNumbers(fieldIndex[term])));
|
|
1234
|
+
} else {
|
|
1235
|
+
// If any term doesn't exist, no records can match $all
|
|
1236
|
+
termLineNumbers.set(term, new Set());
|
|
1237
|
+
}
|
|
1238
|
+
}
|
|
1239
|
+
|
|
1240
|
+
// Find intersection of all term line numbers
|
|
1241
|
+
if (termLineNumbers.size > 0) {
|
|
1242
|
+
const allTermSets = Array.from(termLineNumbers.values());
|
|
1243
|
+
let intersection = allTermSets[0];
|
|
1244
|
+
|
|
1245
|
+
for (let i = 1; i < allTermSets.length; i++) {
|
|
1246
|
+
intersection = new Set([...intersection].filter(x => allTermSets[i].has(x)));
|
|
1247
|
+
}
|
|
1248
|
+
|
|
1249
|
+
// Add all line numbers from intersection
|
|
1250
|
+
for (const lineNumber of intersection) {
|
|
1251
|
+
lineNumbersForField.add(lineNumber);
|
|
1252
|
+
}
|
|
1253
|
+
}
|
|
1254
|
+
}
|
|
1255
|
+
}
|
|
1256
|
+
// Handle other operators
|
|
1257
|
+
else {
|
|
1258
|
+
for (const value in fieldIndex) {
|
|
1259
|
+
let includeValue = true;
|
|
1260
|
+
if (isNumericField) {
|
|
1261
|
+
const numericValue = parseFloat(value);
|
|
1262
|
+
if (!isNaN(numericValue)) {
|
|
1263
|
+
if (criteriaValue['>'] !== undefined && numericValue <= criteriaValue['>']) {
|
|
1264
|
+
includeValue = false;
|
|
1265
|
+
}
|
|
1266
|
+
if (criteriaValue['>='] !== undefined && numericValue < criteriaValue['>=']) {
|
|
1267
|
+
includeValue = false;
|
|
1268
|
+
}
|
|
1269
|
+
if (criteriaValue['<'] !== undefined && numericValue >= criteriaValue['<']) {
|
|
1270
|
+
includeValue = false;
|
|
1271
|
+
}
|
|
1272
|
+
if (criteriaValue['<='] !== undefined && numericValue > criteriaValue['<=']) {
|
|
1273
|
+
includeValue = false;
|
|
1274
|
+
}
|
|
1275
|
+
if (criteriaValue['!='] !== undefined) {
|
|
1276
|
+
const excludeValues = Array.isArray(criteriaValue['!='])
|
|
1277
|
+
? criteriaValue['!=']
|
|
1278
|
+
: [criteriaValue['!=']];
|
|
1279
|
+
if (excludeValues.includes(numericValue)) {
|
|
1280
|
+
includeValue = false;
|
|
1281
|
+
}
|
|
1282
|
+
}
|
|
1283
|
+
}
|
|
1284
|
+
} else {
|
|
1285
|
+
if (criteriaValue['contains'] !== undefined && typeof value === 'string') {
|
|
1286
|
+
const term = String(criteriaValue['contains']);
|
|
1287
|
+
if (caseInsensitive) {
|
|
1288
|
+
if (!value.toLowerCase().includes(term.toLowerCase())) {
|
|
1289
|
+
includeValue = false;
|
|
1290
|
+
}
|
|
1291
|
+
} else {
|
|
1292
|
+
if (!value.includes(term)) {
|
|
1293
|
+
includeValue = false;
|
|
1294
|
+
}
|
|
1295
|
+
}
|
|
1296
|
+
}
|
|
1297
|
+
if (criteriaValue['regex'] !== undefined) {
|
|
1298
|
+
let regex;
|
|
1299
|
+
if (typeof criteriaValue['regex'] === 'string') {
|
|
1300
|
+
regex = new RegExp(criteriaValue['regex'], caseInsensitive ? 'i' : '');
|
|
1301
|
+
} else if (criteriaValue['regex'] instanceof RegExp) {
|
|
1302
|
+
if (caseInsensitive && !criteriaValue['regex'].ignoreCase) {
|
|
1303
|
+
const flags = criteriaValue['regex'].flags.includes('i')
|
|
1304
|
+
? criteriaValue['regex'].flags
|
|
1305
|
+
: criteriaValue['regex'].flags + 'i';
|
|
1306
|
+
regex = new RegExp(criteriaValue['regex'].source, flags);
|
|
1307
|
+
} else {
|
|
1308
|
+
regex = criteriaValue['regex'];
|
|
1309
|
+
}
|
|
1310
|
+
}
|
|
1311
|
+
if (regex) {
|
|
1312
|
+
// For array fields, test regex against each element
|
|
1313
|
+
if (Array.isArray(value)) {
|
|
1314
|
+
if (!value.some(element => regex.test(String(element)))) {
|
|
1315
|
+
includeValue = false;
|
|
1316
|
+
}
|
|
1317
|
+
} else {
|
|
1318
|
+
// For non-array fields, test regex against the value directly
|
|
1319
|
+
if (!regex.test(String(value))) {
|
|
1320
|
+
includeValue = false;
|
|
1321
|
+
}
|
|
1322
|
+
}
|
|
1323
|
+
}
|
|
1324
|
+
}
|
|
1325
|
+
if (criteriaValue['!='] !== undefined) {
|
|
1326
|
+
const excludeValues = Array.isArray(criteriaValue['!='])
|
|
1327
|
+
? criteriaValue['!=']
|
|
1328
|
+
: [criteriaValue['!=']];
|
|
1329
|
+
if (excludeValues.includes(value)) {
|
|
1330
|
+
includeValue = false;
|
|
1331
|
+
}
|
|
1332
|
+
}
|
|
1333
|
+
}
|
|
1334
|
+
|
|
1335
|
+
if (includeValue) {
|
|
1336
|
+
const numbers = this._getAllLineNumbers(fieldIndex[value]);
|
|
1337
|
+
for (const lineNumber of numbers) {
|
|
1338
|
+
lineNumbersForField.add(lineNumber);
|
|
1339
|
+
}
|
|
1340
|
+
}
|
|
1341
|
+
}
|
|
1342
|
+
}
|
|
1343
|
+
} else {
|
|
1344
|
+
// Simple equality comparison - handle array queries
|
|
1345
|
+
const values = Array.isArray(criteriaValue) ? criteriaValue : [criteriaValue];
|
|
1346
|
+
const fieldData = data[field];
|
|
1347
|
+
for (const searchValue of values) {
|
|
1348
|
+
// SPACE OPTIMIZATION: Convert search term to term ID for lookup
|
|
1349
|
+
let searchTermId
|
|
1350
|
+
|
|
1351
|
+
// PERFORMANCE: Cache term mapping field check once per field
|
|
1352
|
+
const isTermMappingField = this.database?.termManager &&
|
|
1353
|
+
this.database.termManager.termMappingFields &&
|
|
1354
|
+
this.database.termManager.termMappingFields.includes(field)
|
|
1355
|
+
|
|
1356
|
+
if (isTermMappingField && typeof searchValue === 'number') {
|
|
1357
|
+
// For term mapping fields (array:string), the search value is already a term ID
|
|
1358
|
+
searchTermId = String(searchValue)
|
|
1359
|
+
} else if (isTermMappingField && typeof searchValue === 'string') {
|
|
1360
|
+
// For term mapping fields (array:string), convert string to term ID
|
|
1361
|
+
const termId = this.database?.termManager?.getTermIdWithoutIncrement(String(searchValue))
|
|
1362
|
+
if (termId === undefined) {
|
|
1363
|
+
// Term not found - skip this value
|
|
1364
|
+
if (this.opts?.debugMode) {
|
|
1365
|
+
console.log(`⚠️ Term "${searchValue}" not found in termManager for field "${field}" - skipping`)
|
|
1366
|
+
}
|
|
1367
|
+
continue // Skip this value, no matches possible
|
|
1368
|
+
}
|
|
1369
|
+
searchTermId = String(termId)
|
|
1370
|
+
} else {
|
|
1371
|
+
// For non-term-mapping fields (including array:number), use values directly
|
|
1372
|
+
searchTermId = String(searchValue)
|
|
1373
|
+
}
|
|
1374
|
+
|
|
1375
|
+
for (const key in fieldData) {
|
|
1376
|
+
let match = false;
|
|
1377
|
+
if (isNumericField) {
|
|
1378
|
+
// Convert both parts to number
|
|
1379
|
+
match = Number(key) === Number(searchValue);
|
|
1380
|
+
} else {
|
|
1381
|
+
// SPACE OPTIMIZATION: Compare term IDs instead of full terms
|
|
1382
|
+
if (caseInsensitive) {
|
|
1383
|
+
// For case-insensitive, we need to check if the search term ID matches any key
|
|
1384
|
+
match = key === String(searchTermId);
|
|
1385
|
+
} else {
|
|
1386
|
+
match = key === String(searchTermId);
|
|
1387
|
+
}
|
|
1388
|
+
}
|
|
1389
|
+
if (match) {
|
|
1390
|
+
const numbers = this._getAllLineNumbers(fieldData[key]);
|
|
1391
|
+
for (const lineNumber of numbers) {
|
|
1392
|
+
lineNumbersForField.add(lineNumber);
|
|
1393
|
+
}
|
|
1394
|
+
}
|
|
1395
|
+
}
|
|
1396
|
+
}
|
|
1397
|
+
}
|
|
1398
|
+
|
|
1399
|
+
// Consolidate results from each field
|
|
1400
|
+
if (matchAny) {
|
|
1401
|
+
matchingLines = new Set([...matchingLines, ...lineNumbersForField]);
|
|
1402
|
+
} else {
|
|
1403
|
+
if (matchingLines === null) {
|
|
1404
|
+
matchingLines = lineNumbersForField;
|
|
1405
|
+
} else {
|
|
1406
|
+
matchingLines = new Set([...matchingLines].filter(n => lineNumbersForField.has(n)));
|
|
1407
|
+
}
|
|
1408
|
+
if (!matchingLines.size) {
|
|
1409
|
+
return new Set();
|
|
1410
|
+
}
|
|
1411
|
+
}
|
|
1412
|
+
}
|
|
1413
|
+
return matchingLines || new Set();
|
|
1414
|
+
}
|
|
1415
|
+
|
|
1416
|
+
/**
|
|
1417
|
+
* Check if any records exist for given field and terms (index-only, ultra-fast)
|
|
1418
|
+
* Stops at first match for maximum performance - no disk I/O required
|
|
1419
|
+
*
|
|
1420
|
+
* @param {string} fieldName - Indexed field name (e.g., 'nameTerms', 'groupTerms')
|
|
1421
|
+
* @param {string|Array<string>} terms - Single term or array of terms to check
|
|
1422
|
+
* @param {Object} options - Options: { $all: true/false, caseInsensitive: true/false, excludes: Array<string> }
|
|
1423
|
+
* @returns {boolean} - True if at least one match exists
|
|
1424
|
+
*
|
|
1425
|
+
* @example
|
|
1426
|
+
* // Check if any record has 'channel' in nameTerms
|
|
1427
|
+
* indexManager.exists('nameTerms', 'channel')
|
|
1428
|
+
*
|
|
1429
|
+
* @example
|
|
1430
|
+
* // Check if any record has ALL terms ['a', 'e'] in nameTerms ($all)
|
|
1431
|
+
* indexManager.exists('nameTerms', ['a', 'e'], { $all: true })
|
|
1432
|
+
*
|
|
1433
|
+
* @example
|
|
1434
|
+
* // Check if any record has ANY of the terms ['channel', 'tv'] in nameTerms
|
|
1435
|
+
* indexManager.exists('nameTerms', ['channel', 'tv'], { $all: false })
|
|
1436
|
+
*
|
|
1437
|
+
* @example
|
|
1438
|
+
* // Check if any record has 'tv' but NOT 'globo' in nameTerms
|
|
1439
|
+
* indexManager.exists('nameTerms', 'tv', { excludes: ['globo'] })
|
|
1440
|
+
*
|
|
1441
|
+
* @example
|
|
1442
|
+
* // Check if any record has ['tv', 'news'] but NOT 'sports' in nameTerms
|
|
1443
|
+
* indexManager.exists('nameTerms', ['tv', 'news'], { $all: true, excludes: ['sports'] })
|
|
1444
|
+
*/
|
|
1445
|
+
exists(fieldName, terms, options = {}) {
|
|
1446
|
+
// Early exit: validate fieldName
|
|
1447
|
+
if (!fieldName || typeof fieldName !== 'string') {
|
|
1448
|
+
return false;
|
|
1449
|
+
}
|
|
1450
|
+
|
|
1451
|
+
// Early exit: check if field is indexed
|
|
1452
|
+
if (!this.isFieldIndexed(fieldName)) {
|
|
1453
|
+
return false;
|
|
1454
|
+
}
|
|
1455
|
+
|
|
1456
|
+
const fieldIndex = this.index.data[fieldName];
|
|
1457
|
+
if (!fieldIndex || typeof fieldIndex !== 'object') {
|
|
1458
|
+
return false;
|
|
1459
|
+
}
|
|
1460
|
+
|
|
1461
|
+
// Normalize terms to array
|
|
1462
|
+
const termsArray = Array.isArray(terms) ? terms : [terms];
|
|
1463
|
+
if (termsArray.length === 0) {
|
|
1464
|
+
return false;
|
|
1465
|
+
}
|
|
1466
|
+
|
|
1467
|
+
const { $all = false, caseInsensitive = false, excludes = [] } = options;
|
|
1468
|
+
const hasExcludes = Array.isArray(excludes) && excludes.length > 0;
|
|
1469
|
+
const isTermMappingField = this.database?.termManager &&
|
|
1470
|
+
this.database.termManager.termMappingFields &&
|
|
1471
|
+
this.database.termManager.termMappingFields.includes(fieldName);
|
|
1472
|
+
|
|
1473
|
+
// Helper: check if termData has any line numbers (ULTRA LIGHT - no expansion)
|
|
1474
|
+
const hasData = (termData) => {
|
|
1475
|
+
if (!termData) return false;
|
|
1476
|
+
// Check Set size (O(1))
|
|
1477
|
+
if (termData.set && termData.set.size > 0) {
|
|
1478
|
+
return true;
|
|
1479
|
+
}
|
|
1480
|
+
// Check ranges length (O(1))
|
|
1481
|
+
if (termData.ranges && termData.ranges.length > 0) {
|
|
1482
|
+
return true;
|
|
1483
|
+
}
|
|
1484
|
+
return false;
|
|
1485
|
+
};
|
|
1486
|
+
|
|
1487
|
+
// Helper: get term key with term mapping and case-insensitive support
|
|
1488
|
+
const getTermKey = (term, useCaseInsensitive = false) => {
|
|
1489
|
+
if (isTermMappingField && typeof term === 'string') {
|
|
1490
|
+
let termId;
|
|
1491
|
+
if (useCaseInsensitive) {
|
|
1492
|
+
// For case-insensitive, search termManager for case-insensitive match
|
|
1493
|
+
const searchLower = String(term).toLowerCase();
|
|
1494
|
+
termId = null;
|
|
1495
|
+
if (this.database?.termManager?.termToId) {
|
|
1496
|
+
for (const [termStr, id] of this.database.termManager.termToId.entries()) {
|
|
1497
|
+
if (termStr.toLowerCase() === searchLower) {
|
|
1498
|
+
termId = id;
|
|
1499
|
+
break;
|
|
1500
|
+
}
|
|
1501
|
+
}
|
|
1502
|
+
}
|
|
1503
|
+
} else {
|
|
1504
|
+
termId = this.database?.termManager?.getTermIdWithoutIncrement(String(term));
|
|
1505
|
+
}
|
|
1506
|
+
|
|
1507
|
+
if (termId === undefined || termId === null) {
|
|
1508
|
+
return null;
|
|
1509
|
+
}
|
|
1510
|
+
return String(termId);
|
|
1511
|
+
}
|
|
1512
|
+
|
|
1513
|
+
// For non-term-mapping fields
|
|
1514
|
+
if (useCaseInsensitive && typeof term === 'string') {
|
|
1515
|
+
const searchLower = String(term).toLowerCase();
|
|
1516
|
+
for (const key in fieldIndex) {
|
|
1517
|
+
if (key.toLowerCase() === searchLower) {
|
|
1518
|
+
return key;
|
|
1519
|
+
}
|
|
1520
|
+
}
|
|
1521
|
+
return null;
|
|
1522
|
+
}
|
|
1523
|
+
|
|
1524
|
+
return String(term);
|
|
1525
|
+
};
|
|
1526
|
+
|
|
1527
|
+
// Handle $all (all terms must exist and have intersection)
|
|
1528
|
+
if ($all) {
|
|
1529
|
+
// Collect term data for all terms first (with early exit)
|
|
1530
|
+
const termDataArray = [];
|
|
1531
|
+
|
|
1532
|
+
for (const term of termsArray) {
|
|
1533
|
+
// Get term key (with term mapping if applicable)
|
|
1534
|
+
let termKey;
|
|
1535
|
+
if (isTermMappingField && typeof term === 'string') {
|
|
1536
|
+
let termId;
|
|
1537
|
+
if (caseInsensitive) {
|
|
1538
|
+
// For case-insensitive, search termManager for case-insensitive match
|
|
1539
|
+
const searchLower = String(term).toLowerCase();
|
|
1540
|
+
termId = null;
|
|
1541
|
+
for (const [termStr, id] of this.database.termManager.termToId.entries()) {
|
|
1542
|
+
if (termStr.toLowerCase() === searchLower) {
|
|
1543
|
+
termId = id;
|
|
1544
|
+
break;
|
|
1545
|
+
}
|
|
1546
|
+
}
|
|
1547
|
+
} else {
|
|
1548
|
+
termId = this.database?.termManager?.getTermIdWithoutIncrement(String(term));
|
|
1549
|
+
}
|
|
1550
|
+
|
|
1551
|
+
if (termId === undefined || termId === null) {
|
|
1552
|
+
return false; // Early exit: term doesn't exist in mapping
|
|
1553
|
+
}
|
|
1554
|
+
termKey = String(termId);
|
|
1555
|
+
} else {
|
|
1556
|
+
termKey = String(term);
|
|
1557
|
+
// For non-term-mapping fields with case-insensitive, search index keys
|
|
1558
|
+
if (caseInsensitive && typeof term === 'string') {
|
|
1559
|
+
const searchLower = termKey.toLowerCase();
|
|
1560
|
+
let foundKey = null;
|
|
1561
|
+
for (const key in fieldIndex) {
|
|
1562
|
+
if (key.toLowerCase() === searchLower) {
|
|
1563
|
+
foundKey = key;
|
|
1564
|
+
break;
|
|
1565
|
+
}
|
|
1566
|
+
}
|
|
1567
|
+
if (foundKey === null) {
|
|
1568
|
+
return false; // Early exit: term doesn't exist
|
|
1569
|
+
}
|
|
1570
|
+
termKey = foundKey;
|
|
1571
|
+
}
|
|
1572
|
+
}
|
|
1573
|
+
|
|
1574
|
+
// Check if term exists in index
|
|
1575
|
+
const termData = fieldIndex[termKey];
|
|
1576
|
+
if (!termData || !hasData(termData)) {
|
|
1577
|
+
return false; // Early exit: term doesn't exist or has no data
|
|
1578
|
+
}
|
|
1579
|
+
|
|
1580
|
+
termDataArray.push(termData);
|
|
1581
|
+
}
|
|
1582
|
+
|
|
1583
|
+
// If we got here, all terms exist and have data
|
|
1584
|
+
// Now check if there's intersection (only if more than one term)
|
|
1585
|
+
if (termDataArray.length === 1) {
|
|
1586
|
+
// Single term - check excludes if any
|
|
1587
|
+
if (!hasExcludes) {
|
|
1588
|
+
return true; // Single term, already verified it has data, no excludes
|
|
1589
|
+
}
|
|
1590
|
+
// Need to check excludes - expand line numbers
|
|
1591
|
+
const lineNumbers = this._getAllLineNumbers(termDataArray[0]);
|
|
1592
|
+
const candidateLines = new Set(lineNumbers);
|
|
1593
|
+
|
|
1594
|
+
// Remove lines that have exclude terms
|
|
1595
|
+
for (const excludeTerm of excludes) {
|
|
1596
|
+
const excludeKey = getTermKey(excludeTerm, caseInsensitive);
|
|
1597
|
+
if (excludeKey === null) continue;
|
|
1598
|
+
|
|
1599
|
+
const excludeData = fieldIndex[excludeKey];
|
|
1600
|
+
if (!excludeData) continue;
|
|
1601
|
+
|
|
1602
|
+
const excludeLines = this._getAllLineNumbers(excludeData);
|
|
1603
|
+
for (const line of excludeLines) {
|
|
1604
|
+
candidateLines.delete(line);
|
|
1605
|
+
}
|
|
1606
|
+
|
|
1607
|
+
// Early exit if all candidates excluded
|
|
1608
|
+
if (candidateLines.size === 0) {
|
|
1609
|
+
return false;
|
|
1610
|
+
}
|
|
1611
|
+
}
|
|
1612
|
+
|
|
1613
|
+
return candidateLines.size > 0;
|
|
1614
|
+
}
|
|
1615
|
+
|
|
1616
|
+
// For multiple terms, we need to check intersection
|
|
1617
|
+
// But we want to do this as lightly as possible
|
|
1618
|
+
// Get line numbers only for intersection check (unavoidable for $all)
|
|
1619
|
+
const termLineNumberSets = [];
|
|
1620
|
+
for (const termData of termDataArray) {
|
|
1621
|
+
const lineNumbers = this._getAllLineNumbers(termData);
|
|
1622
|
+
if (lineNumbers.length === 0) {
|
|
1623
|
+
return false; // Early exit: no line numbers (shouldn't happen, but safety check)
|
|
1624
|
+
}
|
|
1625
|
+
termLineNumberSets.push(new Set(lineNumbers));
|
|
1626
|
+
}
|
|
1627
|
+
|
|
1628
|
+
// Calculate intersection incrementally with early exit
|
|
1629
|
+
let intersection = termLineNumberSets[0];
|
|
1630
|
+
for (let i = 1; i < termLineNumberSets.length; i++) {
|
|
1631
|
+
// Filter intersection to only include items in current set
|
|
1632
|
+
intersection = new Set([...intersection].filter(x => termLineNumberSets[i].has(x)));
|
|
1633
|
+
if (intersection.size === 0) {
|
|
1634
|
+
return false; // Early exit: intersection is empty
|
|
1635
|
+
}
|
|
1636
|
+
}
|
|
1637
|
+
|
|
1638
|
+
// Apply excludes if any
|
|
1639
|
+
if (hasExcludes) {
|
|
1640
|
+
for (const excludeTerm of excludes) {
|
|
1641
|
+
const excludeKey = getTermKey(excludeTerm, caseInsensitive);
|
|
1642
|
+
if (excludeKey === null) continue;
|
|
1643
|
+
|
|
1644
|
+
const excludeData = fieldIndex[excludeKey];
|
|
1645
|
+
if (!excludeData) continue;
|
|
1646
|
+
|
|
1647
|
+
const excludeLines = this._getAllLineNumbers(excludeData);
|
|
1648
|
+
for (const line of excludeLines) {
|
|
1649
|
+
intersection.delete(line);
|
|
1650
|
+
}
|
|
1651
|
+
|
|
1652
|
+
// Early exit if all candidates excluded
|
|
1653
|
+
if (intersection.size === 0) {
|
|
1654
|
+
return false;
|
|
1655
|
+
}
|
|
1656
|
+
}
|
|
1657
|
+
}
|
|
1658
|
+
|
|
1659
|
+
return intersection.size > 0;
|
|
1660
|
+
}
|
|
1661
|
+
|
|
1662
|
+
// Handle $in behavior (any term exists) - default - ULTRA LIGHT
|
|
1663
|
+
// If no excludes, use ultra-fast path (no expansion needed)
|
|
1664
|
+
if (!hasExcludes) {
|
|
1665
|
+
for (const term of termsArray) {
|
|
1666
|
+
// Handle case-insensitive FIRST (before normal conversion)
|
|
1667
|
+
if (caseInsensitive && typeof term === 'string') {
|
|
1668
|
+
if (isTermMappingField && this.database?.termManager?.termToId) {
|
|
1669
|
+
// For term mapping fields, we need to find the term in termManager first
|
|
1670
|
+
// (case-insensitive), then convert to ID
|
|
1671
|
+
const searchLower = String(term).toLowerCase();
|
|
1672
|
+
let foundTermId = null;
|
|
1673
|
+
|
|
1674
|
+
// Search termManager for case-insensitive match
|
|
1675
|
+
for (const [termStr, termId] of this.database.termManager.termToId.entries()) {
|
|
1676
|
+
if (termStr.toLowerCase() === searchLower) {
|
|
1677
|
+
foundTermId = termId;
|
|
1678
|
+
break;
|
|
1679
|
+
}
|
|
1680
|
+
}
|
|
1681
|
+
|
|
1682
|
+
if (foundTermId !== null) {
|
|
1683
|
+
const termData = fieldIndex[String(foundTermId)];
|
|
1684
|
+
if (hasData(termData)) {
|
|
1685
|
+
return true; // Early exit: found a match
|
|
1686
|
+
}
|
|
1687
|
+
}
|
|
1688
|
+
// If not found, continue to next term
|
|
1689
|
+
continue;
|
|
1690
|
+
} else {
|
|
1691
|
+
// For non-term-mapping fields, search index keys directly
|
|
1692
|
+
const searchLower = String(term).toLowerCase();
|
|
1693
|
+
for (const key in fieldIndex) {
|
|
1694
|
+
if (key.toLowerCase() === searchLower) {
|
|
1695
|
+
const termData = fieldIndex[key];
|
|
1696
|
+
if (hasData(termData)) {
|
|
1697
|
+
return true; // Early exit: found a match
|
|
1698
|
+
}
|
|
1699
|
+
}
|
|
1700
|
+
}
|
|
1701
|
+
// If not found, continue to next term
|
|
1702
|
+
continue;
|
|
1703
|
+
}
|
|
1704
|
+
}
|
|
1705
|
+
|
|
1706
|
+
// Normal (case-sensitive) lookup
|
|
1707
|
+
const termKey = getTermKey(term, false);
|
|
1708
|
+
if (termKey === null) {
|
|
1709
|
+
continue; // Term not in mapping, try next
|
|
1710
|
+
}
|
|
1711
|
+
|
|
1712
|
+
// Direct lookup (fastest path) - O(1) hash lookup
|
|
1713
|
+
const termData = fieldIndex[termKey];
|
|
1714
|
+
if (hasData(termData)) {
|
|
1715
|
+
return true; // Early exit: found a match
|
|
1716
|
+
}
|
|
1717
|
+
}
|
|
1718
|
+
|
|
1719
|
+
return false;
|
|
1720
|
+
}
|
|
1721
|
+
|
|
1722
|
+
// With excludes, we need to collect candidates and filter
|
|
1723
|
+
const candidateLines = new Set();
|
|
1724
|
+
|
|
1725
|
+
for (const term of termsArray) {
|
|
1726
|
+
// Handle case-insensitive FIRST (before normal conversion)
|
|
1727
|
+
if (caseInsensitive && typeof term === 'string') {
|
|
1728
|
+
if (isTermMappingField && this.database?.termManager?.termToId) {
|
|
1729
|
+
// For term mapping fields, we need to find the term in termManager first
|
|
1730
|
+
// (case-insensitive), then convert to ID
|
|
1731
|
+
const searchLower = String(term).toLowerCase();
|
|
1732
|
+
let foundTermId = null;
|
|
1733
|
+
|
|
1734
|
+
// Search termManager for case-insensitive match
|
|
1735
|
+
for (const [termStr, termId] of this.database.termManager.termToId.entries()) {
|
|
1736
|
+
if (termStr.toLowerCase() === searchLower) {
|
|
1737
|
+
foundTermId = termId;
|
|
1738
|
+
break;
|
|
1739
|
+
}
|
|
1740
|
+
}
|
|
1741
|
+
|
|
1742
|
+
if (foundTermId !== null) {
|
|
1743
|
+
const termData = fieldIndex[String(foundTermId)];
|
|
1744
|
+
if (hasData(termData)) {
|
|
1745
|
+
// Add line numbers to candidates (need to expand for excludes check)
|
|
1746
|
+
const lineNumbers = this._getAllLineNumbers(termData);
|
|
1747
|
+
for (const line of lineNumbers) {
|
|
1748
|
+
candidateLines.add(line);
|
|
1749
|
+
}
|
|
1750
|
+
}
|
|
1751
|
+
}
|
|
1752
|
+
continue;
|
|
1753
|
+
} else {
|
|
1754
|
+
// For non-term-mapping fields, search index keys directly
|
|
1755
|
+
const searchLower = String(term).toLowerCase();
|
|
1756
|
+
for (const key in fieldIndex) {
|
|
1757
|
+
if (key.toLowerCase() === searchLower) {
|
|
1758
|
+
const termData = fieldIndex[key];
|
|
1759
|
+
if (hasData(termData)) {
|
|
1760
|
+
// Add line numbers to candidates
|
|
1761
|
+
const lineNumbers = this._getAllLineNumbers(termData);
|
|
1762
|
+
for (const line of lineNumbers) {
|
|
1763
|
+
candidateLines.add(line);
|
|
1764
|
+
}
|
|
1765
|
+
}
|
|
1766
|
+
}
|
|
1767
|
+
}
|
|
1768
|
+
continue;
|
|
1769
|
+
}
|
|
1770
|
+
}
|
|
1771
|
+
|
|
1772
|
+
// Normal (case-sensitive) lookup
|
|
1773
|
+
const termKey = getTermKey(term, false);
|
|
1774
|
+
if (termKey === null) {
|
|
1775
|
+
continue; // Term not in mapping, try next
|
|
1776
|
+
}
|
|
1777
|
+
|
|
1778
|
+
// Direct lookup
|
|
1779
|
+
const termData = fieldIndex[termKey];
|
|
1780
|
+
if (hasData(termData)) {
|
|
1781
|
+
// Add line numbers to candidates (need to expand for excludes check)
|
|
1782
|
+
const lineNumbers = this._getAllLineNumbers(termData);
|
|
1783
|
+
for (const line of lineNumbers) {
|
|
1784
|
+
candidateLines.add(line);
|
|
1785
|
+
}
|
|
1786
|
+
}
|
|
1787
|
+
}
|
|
1788
|
+
|
|
1789
|
+
// If no candidates found, return false
|
|
1790
|
+
if (candidateLines.size === 0) {
|
|
1791
|
+
return false;
|
|
1792
|
+
}
|
|
1793
|
+
|
|
1794
|
+
// Apply excludes
|
|
1795
|
+
for (const excludeTerm of excludes) {
|
|
1796
|
+
const excludeKey = getTermKey(excludeTerm, caseInsensitive);
|
|
1797
|
+
if (excludeKey === null) continue;
|
|
1798
|
+
|
|
1799
|
+
const excludeData = fieldIndex[excludeKey];
|
|
1800
|
+
if (!excludeData) continue;
|
|
1801
|
+
|
|
1802
|
+
const excludeLines = this._getAllLineNumbers(excludeData);
|
|
1803
|
+
for (const line of excludeLines) {
|
|
1804
|
+
candidateLines.delete(line);
|
|
1805
|
+
}
|
|
1806
|
+
|
|
1807
|
+
// Early exit if all candidates excluded
|
|
1808
|
+
if (candidateLines.size === 0) {
|
|
1809
|
+
return false;
|
|
1810
|
+
}
|
|
1811
|
+
}
|
|
1812
|
+
|
|
1813
|
+
return candidateLines.size > 0;
|
|
1814
|
+
}
|
|
1815
|
+
|
|
1816
|
+
// Ultra-fast load with minimal conversions
|
|
1817
|
+
load(index) {
|
|
1818
|
+
// CRITICAL FIX: Check if index is already loaded by looking for actual data, not just empty field structures
|
|
1819
|
+
if (this.index && this.index.data) {
|
|
1820
|
+
let hasActualData = false
|
|
1821
|
+
for (const field in this.index.data) {
|
|
1822
|
+
const fieldData = this.index.data[field]
|
|
1823
|
+
if (fieldData && Object.keys(fieldData).length > 0) {
|
|
1824
|
+
// Check if any field has actual index entries with data
|
|
1825
|
+
for (const key in fieldData) {
|
|
1826
|
+
const entry = fieldData[key]
|
|
1827
|
+
if (entry && ((entry.set && entry.set.size > 0) || (entry.ranges && entry.ranges.length > 0))) {
|
|
1828
|
+
hasActualData = true
|
|
1829
|
+
break
|
|
1830
|
+
}
|
|
1831
|
+
}
|
|
1832
|
+
if (hasActualData) break
|
|
1833
|
+
}
|
|
1834
|
+
}
|
|
1835
|
+
|
|
1836
|
+
if (hasActualData) {
|
|
1837
|
+
if (this.opts.debugMode) {
|
|
1838
|
+
console.log('🔍 IndexManager.load: Index already loaded with actual data, skipping')
|
|
1839
|
+
}
|
|
1840
|
+
return
|
|
1841
|
+
}
|
|
1842
|
+
}
|
|
1843
|
+
|
|
1844
|
+
// CRITICAL FIX: Add comprehensive null/undefined validation
|
|
1845
|
+
if (!index || typeof index !== 'object') {
|
|
1846
|
+
if (this.opts.debugMode) {
|
|
1847
|
+
console.log(`🔍 IndexManager.load: Invalid index data provided (${typeof index}), using defaults`)
|
|
1848
|
+
}
|
|
1849
|
+
return this._initializeDefaults()
|
|
1850
|
+
}
|
|
1851
|
+
|
|
1852
|
+
if (!index.data || typeof index.data !== 'object') {
|
|
1853
|
+
if (this.opts.debugMode) {
|
|
1854
|
+
console.log(`🔍 IndexManager.load: Invalid index.data provided (${typeof index.data}), using defaults`)
|
|
1855
|
+
}
|
|
1856
|
+
return this._initializeDefaults()
|
|
1857
|
+
}
|
|
1858
|
+
|
|
1859
|
+
// CRITICAL FIX: Only log if there are actual fields to load
|
|
1860
|
+
if (this.opts.debugMode && Object.keys(index.data).length > 0) {
|
|
1861
|
+
console.log(`🔍 IndexManager.load: Loading index with fields: ${Object.keys(index.data).join(', ')}`)
|
|
1862
|
+
}
|
|
1863
|
+
|
|
1864
|
+
// Create a deep copy to avoid reference issues
|
|
1865
|
+
const processedIndex = {
|
|
1866
|
+
data: {}
|
|
1867
|
+
}
|
|
1868
|
+
|
|
1869
|
+
// CRITICAL FIX: Add null/undefined checks for field iteration
|
|
1870
|
+
const fields = Object.keys(index.data)
|
|
1871
|
+
for(const field of fields) {
|
|
1872
|
+
if (!field || typeof field !== 'string') {
|
|
1873
|
+
continue // Skip invalid field names
|
|
1874
|
+
}
|
|
1875
|
+
|
|
1876
|
+
const fieldData = index.data[field]
|
|
1877
|
+
if (!fieldData || typeof fieldData !== 'object') {
|
|
1878
|
+
continue // Skip invalid field data
|
|
1879
|
+
}
|
|
1880
|
+
|
|
1881
|
+
processedIndex.data[field] = {}
|
|
1882
|
+
|
|
1883
|
+
// CRITICAL FIX: Check if this is a term mapping field for conversion
|
|
1884
|
+
const isTermMappingField = this.database?.termManager &&
|
|
1885
|
+
this.database.termManager.termMappingFields &&
|
|
1886
|
+
this.database.termManager.termMappingFields.includes(field)
|
|
1887
|
+
|
|
1888
|
+
const terms = Object.keys(fieldData)
|
|
1889
|
+
for(const term of terms) {
|
|
1890
|
+
if (!term || typeof term !== 'string') {
|
|
1891
|
+
continue // Skip invalid term names
|
|
1892
|
+
}
|
|
1893
|
+
|
|
1894
|
+
const termData = fieldData[term]
|
|
1895
|
+
|
|
1896
|
+
// CRITICAL FIX: Convert term strings to term IDs for term mapping fields
|
|
1897
|
+
// If the key is a string term (not a numeric ID), convert it to term ID
|
|
1898
|
+
let termKey = term
|
|
1899
|
+
if (isTermMappingField && typeof term === 'string' && !/^\d+$/.test(term)) {
|
|
1900
|
+
// Key is a term string, convert to term ID
|
|
1901
|
+
const termId = this.database?.termManager?.getTermIdWithoutIncrement(term)
|
|
1902
|
+
if (termId !== undefined) {
|
|
1903
|
+
termKey = String(termId)
|
|
1904
|
+
} else {
|
|
1905
|
+
// Term not found in termManager - skip this key (orphaned term from old index)
|
|
1906
|
+
// This can happen if termMapping wasn't loaded yet or term was removed
|
|
1907
|
+
if (this.opts?.debugMode) {
|
|
1908
|
+
console.log(`⚠️ IndexManager.load: Term "${term}" not found in termManager for field "${field}" - skipping (orphaned from old index)`)
|
|
1909
|
+
}
|
|
1910
|
+
continue
|
|
1911
|
+
}
|
|
1912
|
+
}
|
|
1913
|
+
|
|
1914
|
+
// Convert various formats to new hybrid format
|
|
1915
|
+
if (Array.isArray(termData)) {
|
|
1916
|
+
// Check if it's the new compact format [setArray, rangesArray]
|
|
1917
|
+
if (termData.length === 2 && Array.isArray(termData[0]) && Array.isArray(termData[1])) {
|
|
1918
|
+
// New compact format: [setArray, rangesArray]
|
|
1919
|
+
// Convert ultra-compact ranges [start, count] back to {start, count}
|
|
1920
|
+
const ranges = termData[1].map(range => {
|
|
1921
|
+
if (Array.isArray(range) && range.length === 2) {
|
|
1922
|
+
// Ultra-compact format: [start, count]
|
|
1923
|
+
return { start: range[0], count: range[1] }
|
|
1924
|
+
} else {
|
|
1925
|
+
// Legacy format: {start, count}
|
|
1926
|
+
return range
|
|
1927
|
+
}
|
|
1928
|
+
})
|
|
1929
|
+
processedIndex.data[field][termKey] = {
|
|
1930
|
+
set: new Set(termData[0]),
|
|
1931
|
+
ranges: ranges
|
|
1932
|
+
}
|
|
1933
|
+
} else {
|
|
1934
|
+
// Legacy array format (just set data)
|
|
1935
|
+
processedIndex.data[field][termKey] = { set: new Set(termData), ranges: [] }
|
|
1936
|
+
}
|
|
1937
|
+
} else if (termData && typeof termData === 'object') {
|
|
1938
|
+
if (termData.set || termData.ranges) {
|
|
1939
|
+
// Legacy hybrid format - convert set array back to Set
|
|
1940
|
+
const hybridData = termData
|
|
1941
|
+
let setObject
|
|
1942
|
+
if (Array.isArray(hybridData.set)) {
|
|
1943
|
+
// Convert array back to Set
|
|
1944
|
+
setObject = new Set(hybridData.set)
|
|
1945
|
+
} else {
|
|
1946
|
+
// Fallback to empty Set
|
|
1947
|
+
setObject = new Set()
|
|
1948
|
+
}
|
|
1949
|
+
processedIndex.data[field][termKey] = {
|
|
1950
|
+
set: setObject,
|
|
1951
|
+
ranges: hybridData.ranges || []
|
|
1952
|
+
}
|
|
1953
|
+
} else {
|
|
1954
|
+
// Convert from Set format to hybrid
|
|
1955
|
+
const numbers = Array.from(termData || [])
|
|
1956
|
+
processedIndex.data[field][termKey] = { set: new Set(numbers), ranges: [] }
|
|
1957
|
+
}
|
|
1958
|
+
}
|
|
1959
|
+
}
|
|
1960
|
+
}
|
|
1961
|
+
|
|
1962
|
+
// Preserve initialized fields if no data was loaded
|
|
1963
|
+
if (!processedIndex.data || Object.keys(processedIndex.data).length === 0) {
|
|
1964
|
+
// CRITICAL FIX: Only log if debug mode is enabled and there are actual fields
|
|
1965
|
+
if (this.opts.debugMode && this.index.data && Object.keys(this.index.data).length > 0) {
|
|
1966
|
+
console.log(`🔍 IndexManager.load: No data loaded, preserving initialized fields: ${Object.keys(this.index.data).join(', ')}`)
|
|
1967
|
+
}
|
|
1968
|
+
// Keep the current index with initialized fields
|
|
1969
|
+
return
|
|
1970
|
+
}
|
|
1971
|
+
|
|
1972
|
+
this.index = processedIndex
|
|
1973
|
+
}
|
|
1974
|
+
|
|
1975
|
+
/**
|
|
1976
|
+
* CRITICAL FIX: Initialize default index structure when invalid data is provided
|
|
1977
|
+
* This prevents TypeError when Object.keys() is called on null/undefined
|
|
1978
|
+
*/
|
|
1979
|
+
_initializeDefaults() {
|
|
1980
|
+
if (this.opts.debugMode) {
|
|
1981
|
+
console.log(`🔍 IndexManager._initializeDefaults: Initializing default index structure`)
|
|
1982
|
+
}
|
|
1983
|
+
|
|
1984
|
+
// Initialize empty index structure
|
|
1985
|
+
this.index = { data: {} }
|
|
1986
|
+
|
|
1987
|
+
// Initialize fields from options if available
|
|
1988
|
+
if (this.opts.indexes && typeof this.opts.indexes === 'object') {
|
|
1989
|
+
const fields = Object.keys(this.opts.indexes)
|
|
1990
|
+
for (const field of fields) {
|
|
1991
|
+
if (field && typeof field === 'string') {
|
|
1992
|
+
this.index.data[field] = {}
|
|
1993
|
+
}
|
|
1994
|
+
}
|
|
1995
|
+
}
|
|
1996
|
+
|
|
1997
|
+
if (this.opts.debugMode) {
|
|
1998
|
+
console.log(`🔍 IndexManager._initializeDefaults: Initialized with fields: ${Object.keys(this.index.data).join(', ')}`)
|
|
1999
|
+
}
|
|
2000
|
+
}
|
|
2001
|
+
|
|
2002
|
+
readColumnIndex(column) {
|
|
2003
|
+
return new Set((this.index.data && this.index.data[column]) ? Object.keys(this.index.data[column]) : [])
|
|
2004
|
+
}
|
|
2005
|
+
|
|
2006
|
+
/**
|
|
2007
|
+
* Convert index to JSON-serializable format for debugging and export
|
|
2008
|
+
* This resolves the issue where Sets appear as empty objects in JSON.stringify
|
|
2009
|
+
*/
|
|
2010
|
+
toJSON() {
|
|
2011
|
+
const serializable = { data: {} }
|
|
2012
|
+
|
|
2013
|
+
// Check if this is a term mapping field for conversion
|
|
2014
|
+
const isTermMappingField = (field) => {
|
|
2015
|
+
return this.database?.termManager &&
|
|
2016
|
+
this.database.termManager.termMappingFields &&
|
|
2017
|
+
this.database.termManager.termMappingFields.includes(field)
|
|
2018
|
+
}
|
|
2019
|
+
|
|
2020
|
+
for (const field in this.index.data) {
|
|
2021
|
+
serializable.data[field] = {}
|
|
2022
|
+
const isTermField = isTermMappingField(field)
|
|
2023
|
+
|
|
2024
|
+
for (const term in this.index.data[field]) {
|
|
2025
|
+
const hybridData = this.index.data[field][term]
|
|
2026
|
+
|
|
2027
|
+
// CRITICAL FIX: Convert term strings to term IDs for term mapping fields
|
|
2028
|
+
// If the key is a string term (not a numeric ID), convert it to term ID
|
|
2029
|
+
let termKey = term
|
|
2030
|
+
if (isTermField && typeof term === 'string' && !/^\d+$/.test(term)) {
|
|
2031
|
+
// Key is a term string, convert to term ID
|
|
2032
|
+
const termId = this.database?.termManager?.getTermIdWithoutIncrement(term)
|
|
2033
|
+
if (termId !== undefined) {
|
|
2034
|
+
termKey = String(termId)
|
|
2035
|
+
} else {
|
|
2036
|
+
// Term not found in termManager, keep original key
|
|
2037
|
+
// This prevents data loss when term mapping is incomplete
|
|
2038
|
+
termKey = term
|
|
2039
|
+
if (this.opts?.debugMode) {
|
|
2040
|
+
console.log(`⚠️ IndexManager.toJSON: Term "${term}" not found in termManager for field "${field}" - using original key`)
|
|
2041
|
+
}
|
|
2042
|
+
}
|
|
2043
|
+
}
|
|
2044
|
+
|
|
2045
|
+
// OPTIMIZATION: Create ranges before serialization if beneficial
|
|
2046
|
+
if (hybridData.set && hybridData.set.size >= this.rangeThreshold) {
|
|
2047
|
+
this._optimizeToRanges(hybridData)
|
|
2048
|
+
}
|
|
2049
|
+
|
|
2050
|
+
// Convert hybrid structure to serializable format
|
|
2051
|
+
let setArray = []
|
|
2052
|
+
if (hybridData.set) {
|
|
2053
|
+
if (typeof hybridData.set.size !== 'undefined') {
|
|
2054
|
+
// Regular Set
|
|
2055
|
+
setArray = Array.from(hybridData.set)
|
|
2056
|
+
}
|
|
2057
|
+
}
|
|
2058
|
+
|
|
2059
|
+
// Use ultra-compact format: [setArray, rangesArray] to save space
|
|
2060
|
+
const ranges = hybridData.ranges || []
|
|
2061
|
+
if (ranges.length > 0) {
|
|
2062
|
+
// Convert ranges to ultra-compact format: [start, count] instead of {start, count}
|
|
2063
|
+
const compactRanges = ranges.map(range => [range.start, range.count])
|
|
2064
|
+
serializable.data[field][termKey] = [setArray, compactRanges]
|
|
2065
|
+
} else {
|
|
2066
|
+
// CRITICAL FIX: Always use the [setArray, []] format for consistency
|
|
2067
|
+
// This ensures the load() method can properly deserialize the data
|
|
2068
|
+
serializable.data[field][termKey] = [setArray, []]
|
|
2069
|
+
}
|
|
2070
|
+
}
|
|
2071
|
+
}
|
|
2072
|
+
|
|
2073
|
+
return serializable
|
|
2074
|
+
}
|
|
2075
|
+
|
|
2076
|
+
/**
|
|
2077
|
+
* Get a JSON string representation of the index
|
|
2078
|
+
* This properly handles Sets unlike the default JSON.stringify
|
|
2079
|
+
*/
|
|
2080
|
+
toString() {
|
|
2081
|
+
return JSON.stringify(this.toJSON(), null, 2)
|
|
2082
|
+
}
|
|
2083
|
+
|
|
2084
|
+
// Simplified term mapping methods - just basic functionality
|
|
2085
|
+
|
|
2086
|
+
/**
|
|
2087
|
+
* Rebuild index (stub for compatibility)
|
|
2088
|
+
*/
|
|
2089
|
+
async rebuild() {
|
|
2090
|
+
// Stub implementation for compatibility
|
|
2091
|
+
return Promise.resolve()
|
|
2092
|
+
}
|
|
2093
|
+
}
|
|
2094
|
+
|