jexidb 2.1.0 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Database.cjs +9253 -437
- package/package.json +9 -2
- package/src/Database.mjs +1572 -212
- package/src/FileHandler.mjs +83 -44
- package/src/OperationQueue.mjs +23 -23
- package/src/SchemaManager.mjs +325 -268
- package/src/Serializer.mjs +234 -24
- package/src/managers/IndexManager.mjs +778 -87
- package/src/managers/QueryManager.mjs +340 -67
- package/src/managers/TermManager.mjs +7 -7
- package/src/utils/operatorNormalizer.mjs +116 -0
- package/.babelrc +0 -13
- package/.gitattributes +0 -2
- package/CHANGELOG.md +0 -140
- package/babel.config.json +0 -5
- package/docs/API.md +0 -1051
- package/docs/EXAMPLES.md +0 -701
- package/docs/README.md +0 -194
- package/examples/iterate-usage-example.js +0 -157
- package/examples/simple-iterate-example.js +0 -115
- package/jest.config.js +0 -24
- package/scripts/README.md +0 -47
- package/scripts/clean-test-files.js +0 -75
- package/scripts/prepare.js +0 -31
- package/scripts/run-tests.js +0 -80
- package/test/$not-operator-with-and.test.js +0 -282
- package/test/README.md +0 -8
- package/test/close-init-cycle.test.js +0 -256
- package/test/critical-bugs-fixes.test.js +0 -1069
- package/test/index-persistence.test.js +0 -306
- package/test/index-serialization.test.js +0 -314
- package/test/indexed-query-mode.test.js +0 -360
- package/test/iterate-method.test.js +0 -272
- package/test/query-operators.test.js +0 -238
- package/test/regex-array-fields.test.js +0 -129
- package/test/score-method.test.js +0 -238
- package/test/setup.js +0 -17
- package/test/term-mapping-minimal.test.js +0 -154
- package/test/term-mapping-simple.test.js +0 -257
- package/test/term-mapping.test.js +0 -514
- package/test/writebuffer-flush-resilience.test.js +0 -204
package/src/Database.mjs
CHANGED
|
@@ -76,12 +76,16 @@ class InsertSession {
|
|
|
76
76
|
constructor(database, sessionOptions = {}) {
|
|
77
77
|
this.database = database
|
|
78
78
|
this.batchSize = sessionOptions.batchSize || 100
|
|
79
|
+
this.enableAutoSave = sessionOptions.enableAutoSave !== undefined ? sessionOptions.enableAutoSave : true
|
|
79
80
|
this.totalInserted = 0
|
|
80
81
|
this.flushing = false
|
|
81
82
|
this.batches = [] // Array of batches to avoid slice() in flush()
|
|
82
83
|
this.currentBatch = [] // Current batch being filled
|
|
83
84
|
this.sessionId = Math.random().toString(36).substr(2, 9)
|
|
84
85
|
|
|
86
|
+
// Track pending auto-flush operations
|
|
87
|
+
this.pendingAutoFlushes = new Set()
|
|
88
|
+
|
|
85
89
|
// Register this session as active
|
|
86
90
|
this.database.activeInsertSessions.add(this)
|
|
87
91
|
}
|
|
@@ -103,46 +107,153 @@ class InsertSession {
|
|
|
103
107
|
this.currentBatch.push(finalRecord)
|
|
104
108
|
this.totalInserted++
|
|
105
109
|
|
|
106
|
-
// If batch is full, move it to batches array
|
|
110
|
+
// If batch is full, move it to batches array and trigger auto-flush
|
|
107
111
|
if (this.currentBatch.length >= this.batchSize) {
|
|
108
112
|
this.batches.push(this.currentBatch)
|
|
109
113
|
this.currentBatch = []
|
|
114
|
+
|
|
115
|
+
// Auto-flush in background (non-blocking)
|
|
116
|
+
// This ensures batches are flushed automatically without blocking add()
|
|
117
|
+
this.autoFlush().catch(err => {
|
|
118
|
+
// Log error but don't throw - we don't want to break the add() flow
|
|
119
|
+
console.error('Auto-flush error in InsertSession:', err)
|
|
120
|
+
})
|
|
110
121
|
}
|
|
111
122
|
|
|
112
123
|
return finalRecord
|
|
113
124
|
}
|
|
114
125
|
|
|
115
|
-
async
|
|
116
|
-
//
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
// Prevent concurrent flushes
|
|
126
|
+
async autoFlush() {
|
|
127
|
+
// Only flush if not already flushing
|
|
128
|
+
// This method will process all pending batches
|
|
120
129
|
if (this.flushing) return
|
|
130
|
+
|
|
131
|
+
// Create a promise for this auto-flush operation
|
|
132
|
+
const flushPromise = this._doFlush()
|
|
133
|
+
this.pendingAutoFlushes.add(flushPromise)
|
|
134
|
+
|
|
135
|
+
// Remove from pending set when complete (success or error)
|
|
136
|
+
flushPromise
|
|
137
|
+
.then(() => {
|
|
138
|
+
this.pendingAutoFlushes.delete(flushPromise)
|
|
139
|
+
})
|
|
140
|
+
.catch((err) => {
|
|
141
|
+
this.pendingAutoFlushes.delete(flushPromise)
|
|
142
|
+
throw err
|
|
143
|
+
})
|
|
144
|
+
|
|
145
|
+
return flushPromise
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
async _doFlush() {
|
|
149
|
+
// Check if database is destroyed or closed before starting
|
|
150
|
+
if (this.database.destroyed || this.database.closed) {
|
|
151
|
+
// Clear batches if database is closed/destroyed
|
|
152
|
+
this.batches = []
|
|
153
|
+
this.currentBatch = []
|
|
154
|
+
return
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Prevent concurrent flushes - if already flushing, wait for it
|
|
158
|
+
if (this.flushing) {
|
|
159
|
+
// Wait for the current flush to complete
|
|
160
|
+
while (this.flushing) {
|
|
161
|
+
await new Promise(resolve => setTimeout(resolve, 1))
|
|
162
|
+
}
|
|
163
|
+
// After waiting, check if there's anything left to flush
|
|
164
|
+
// If another flush completed everything, we're done
|
|
165
|
+
if (this.batches.length === 0 && this.currentBatch.length === 0) return
|
|
166
|
+
|
|
167
|
+
// Check again if database was closed during wait
|
|
168
|
+
if (this.database.destroyed || this.database.closed) {
|
|
169
|
+
this.batches = []
|
|
170
|
+
this.currentBatch = []
|
|
171
|
+
return
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
121
175
|
this.flushing = true
|
|
122
176
|
|
|
123
177
|
try {
|
|
124
|
-
// Process
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
178
|
+
// Process continuously until queue is completely empty
|
|
179
|
+
// This handles the case where new data is added during the flush
|
|
180
|
+
while (this.batches.length > 0 || this.currentBatch.length > 0) {
|
|
181
|
+
// Check if database was closed during processing
|
|
182
|
+
if (this.database.destroyed || this.database.closed) {
|
|
183
|
+
// Clear remaining batches
|
|
184
|
+
this.batches = []
|
|
185
|
+
this.currentBatch = []
|
|
186
|
+
return
|
|
187
|
+
}
|
|
128
188
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
189
|
+
// Process all complete batches that exist at this moment
|
|
190
|
+
// Note: new batches may be added to this.batches during this loop
|
|
191
|
+
const batchesToProcess = this.batches.length
|
|
192
|
+
for (let i = 0; i < batchesToProcess; i++) {
|
|
193
|
+
// Check again before each batch
|
|
194
|
+
if (this.database.destroyed || this.database.closed) {
|
|
195
|
+
this.batches = []
|
|
196
|
+
this.currentBatch = []
|
|
197
|
+
return
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
const batch = this.batches.shift() // Remove from front
|
|
201
|
+
await this.database.insertBatch(batch)
|
|
202
|
+
}
|
|
133
203
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
204
|
+
// Process current batch if it has data
|
|
205
|
+
// Note: new records may be added to currentBatch during processing
|
|
206
|
+
if (this.currentBatch.length > 0) {
|
|
207
|
+
// Check if database was closed
|
|
208
|
+
if (this.database.destroyed || this.database.closed) {
|
|
209
|
+
this.batches = []
|
|
210
|
+
this.currentBatch = []
|
|
211
|
+
return
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Check if currentBatch reached batchSize during processing
|
|
215
|
+
if (this.currentBatch.length >= this.batchSize) {
|
|
216
|
+
// Move it to batches array and process in next iteration
|
|
217
|
+
this.batches.push(this.currentBatch)
|
|
218
|
+
this.currentBatch = []
|
|
219
|
+
continue
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// Process the current batch
|
|
223
|
+
const batchToProcess = this.currentBatch
|
|
224
|
+
this.currentBatch = [] // Clear before processing to allow new adds
|
|
225
|
+
await this.database.insertBatch(batchToProcess)
|
|
226
|
+
}
|
|
227
|
+
}
|
|
137
228
|
} finally {
|
|
138
229
|
this.flushing = false
|
|
139
230
|
}
|
|
140
231
|
}
|
|
141
232
|
|
|
233
|
+
async flush() {
|
|
234
|
+
// Wait for any pending auto-flushes to complete first
|
|
235
|
+
await this.waitForAutoFlushes()
|
|
236
|
+
|
|
237
|
+
// Then do a final flush to ensure everything is processed
|
|
238
|
+
await this._doFlush()
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
async waitForAutoFlushes() {
|
|
242
|
+
// Wait for all pending auto-flush operations to complete
|
|
243
|
+
if (this.pendingAutoFlushes.size > 0) {
|
|
244
|
+
await Promise.all(Array.from(this.pendingAutoFlushes))
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
142
248
|
async commit() {
|
|
143
249
|
// CRITICAL FIX: Make session auto-reusable by removing committed state
|
|
144
250
|
// Allow multiple commits on the same session
|
|
145
251
|
|
|
252
|
+
// First, wait for all pending auto-flushes to complete
|
|
253
|
+
await this.waitForAutoFlushes()
|
|
254
|
+
|
|
255
|
+
// Then flush any remaining data (including currentBatch)
|
|
256
|
+
// This ensures everything is inserted before commit returns
|
|
146
257
|
await this.flush()
|
|
147
258
|
|
|
148
259
|
// Reset session state for next commit cycle
|
|
@@ -158,6 +269,9 @@ class InsertSession {
|
|
|
158
269
|
const startTime = Date.now()
|
|
159
270
|
const hasTimeout = maxWaitTime !== null && maxWaitTime !== undefined
|
|
160
271
|
|
|
272
|
+
// Wait for auto-flushes first
|
|
273
|
+
await this.waitForAutoFlushes()
|
|
274
|
+
|
|
161
275
|
while (this.flushing || this.batches.length > 0 || this.currentBatch.length > 0) {
|
|
162
276
|
// Check timeout only if we have one
|
|
163
277
|
if (hasTimeout && (Date.now() - startTime) >= maxWaitTime) {
|
|
@@ -174,7 +288,10 @@ class InsertSession {
|
|
|
174
288
|
* Check if this session has pending operations
|
|
175
289
|
*/
|
|
176
290
|
hasPendingOperations() {
|
|
177
|
-
return this.
|
|
291
|
+
return this.pendingAutoFlushes.size > 0 ||
|
|
292
|
+
this.flushing ||
|
|
293
|
+
this.batches.length > 0 ||
|
|
294
|
+
this.currentBatch.length > 0
|
|
178
295
|
}
|
|
179
296
|
|
|
180
297
|
/**
|
|
@@ -189,6 +306,7 @@ class InsertSession {
|
|
|
189
306
|
this.currentBatch = []
|
|
190
307
|
this.totalInserted = 0
|
|
191
308
|
this.flushing = false
|
|
309
|
+
this.pendingAutoFlushes.clear()
|
|
192
310
|
}
|
|
193
311
|
}
|
|
194
312
|
|
|
@@ -238,6 +356,8 @@ class Database extends EventEmitter {
|
|
|
238
356
|
streamingThreshold: opts.streamingThreshold || 0.8, // Use streaming when limit > 80% of total records
|
|
239
357
|
// Serialization options
|
|
240
358
|
enableArraySerialization: opts.enableArraySerialization !== false, // Enable array serialization by default
|
|
359
|
+
// Index rebuild options
|
|
360
|
+
allowIndexRebuild: opts.allowIndexRebuild === true, // Allow automatic index rebuild when corrupted (default false - throws error)
|
|
241
361
|
}, opts)
|
|
242
362
|
|
|
243
363
|
// CRITICAL FIX: Initialize AbortController for lifecycle management
|
|
@@ -264,6 +384,8 @@ class Database extends EventEmitter {
|
|
|
264
384
|
this.isSaving = false
|
|
265
385
|
this.lastSaveTime = null
|
|
266
386
|
this.initialized = false
|
|
387
|
+
this._offsetRecoveryInProgress = false
|
|
388
|
+
this.writeBufferTotalSize = 0
|
|
267
389
|
|
|
268
390
|
|
|
269
391
|
// Initialize managers
|
|
@@ -310,10 +432,11 @@ class Database extends EventEmitter {
|
|
|
310
432
|
|
|
311
433
|
// Validate indexes array (new format) - but only if we have fields
|
|
312
434
|
if (this.opts.originalIndexes && Array.isArray(this.opts.originalIndexes)) {
|
|
313
|
-
if (
|
|
314
|
-
|
|
435
|
+
if (this.opts.fields) {
|
|
436
|
+
this.validateIndexFields(this.opts.originalIndexes)
|
|
437
|
+
} else if (this.opts.debugMode) {
|
|
438
|
+
console.log('⚠️ Skipping index field validation because no fields configuration was provided')
|
|
315
439
|
}
|
|
316
|
-
this.validateIndexFields(this.opts.originalIndexes)
|
|
317
440
|
}
|
|
318
441
|
|
|
319
442
|
if (this.opts.debugMode) {
|
|
@@ -330,10 +453,14 @@ class Database extends EventEmitter {
|
|
|
330
453
|
* Validate field types
|
|
331
454
|
*/
|
|
332
455
|
validateFieldTypes(fields, configType) {
|
|
333
|
-
const supportedTypes = ['string', 'number', 'boolean', 'array:string', 'array:number', 'array:boolean', 'array', 'object']
|
|
456
|
+
const supportedTypes = ['string', 'number', 'boolean', 'array:string', 'array:number', 'array:boolean', 'array', 'object', 'auto']
|
|
334
457
|
const errors = []
|
|
335
458
|
|
|
336
459
|
for (const [fieldName, fieldType] of Object.entries(fields)) {
|
|
460
|
+
if (fieldType === 'auto') {
|
|
461
|
+
continue
|
|
462
|
+
}
|
|
463
|
+
|
|
337
464
|
// Check if type is supported
|
|
338
465
|
if (!supportedTypes.includes(fieldType)) {
|
|
339
466
|
errors.push(`Unsupported ${configType} type '${fieldType}' for field '${fieldName}'. Supported types: ${supportedTypes.join(', ')}`)
|
|
@@ -383,26 +510,24 @@ class Database extends EventEmitter {
|
|
|
383
510
|
* Prepare index configuration for IndexManager
|
|
384
511
|
*/
|
|
385
512
|
prepareIndexConfiguration() {
|
|
386
|
-
|
|
387
|
-
if (this.opts.fields && Array.isArray(this.opts.indexes)) {
|
|
388
|
-
// New format: { fields: {...}, indexes: [...] }
|
|
513
|
+
if (Array.isArray(this.opts.indexes)) {
|
|
389
514
|
const indexedFields = {}
|
|
390
|
-
const originalIndexes = [...this.opts.indexes]
|
|
391
|
-
|
|
515
|
+
const originalIndexes = [...this.opts.indexes]
|
|
516
|
+
const hasFieldConfig = this.opts.fields && typeof this.opts.fields === 'object'
|
|
517
|
+
|
|
392
518
|
for (const fieldName of this.opts.indexes) {
|
|
393
|
-
if (this.opts.fields[fieldName]) {
|
|
519
|
+
if (hasFieldConfig && this.opts.fields[fieldName]) {
|
|
394
520
|
indexedFields[fieldName] = this.opts.fields[fieldName]
|
|
521
|
+
} else {
|
|
522
|
+
indexedFields[fieldName] = 'auto'
|
|
395
523
|
}
|
|
396
524
|
}
|
|
397
|
-
|
|
398
|
-
// Store original indexes for validation
|
|
525
|
+
|
|
399
526
|
this.opts.originalIndexes = originalIndexes
|
|
400
|
-
|
|
401
|
-
// Replace indexes array with object for IndexManager
|
|
402
527
|
this.opts.indexes = indexedFields
|
|
403
|
-
|
|
528
|
+
|
|
404
529
|
if (this.opts.debugMode) {
|
|
405
|
-
console.log(`🔍
|
|
530
|
+
console.log(`🔍 Normalized indexes array to object: ${Object.keys(indexedFields).join(', ')} [${this.instanceId}]`)
|
|
406
531
|
}
|
|
407
532
|
}
|
|
408
533
|
// Legacy format (indexes as object) is already compatible
|
|
@@ -420,6 +545,32 @@ class Database extends EventEmitter {
|
|
|
420
545
|
return
|
|
421
546
|
}
|
|
422
547
|
|
|
548
|
+
// Handle legacy 'schema' option migration
|
|
549
|
+
if (this.opts.schema) {
|
|
550
|
+
// If fields is already provided and valid, ignore schema
|
|
551
|
+
if (this.opts.fields && typeof this.opts.fields === 'object' && Object.keys(this.opts.fields).length > 0) {
|
|
552
|
+
if (this.opts.debugMode) {
|
|
553
|
+
console.log(`⚠️ Both 'schema' and 'fields' options provided. Ignoring 'schema' and using 'fields'. [${this.instanceId}]`)
|
|
554
|
+
}
|
|
555
|
+
} else if (Array.isArray(this.opts.schema)) {
|
|
556
|
+
// Schema as array is no longer supported
|
|
557
|
+
throw new Error('The "schema" option as an array is no longer supported. Please use "fields" as an object instead. Example: { fields: { id: "number", name: "string" } }')
|
|
558
|
+
} else if (typeof this.opts.schema === 'object' && this.opts.schema !== null) {
|
|
559
|
+
// Schema as object - migrate to fields
|
|
560
|
+
this.opts.fields = { ...this.opts.schema }
|
|
561
|
+
if (this.opts.debugMode) {
|
|
562
|
+
console.log(`⚠️ Migrated 'schema' option to 'fields'. Please update your code to use 'fields' instead of 'schema'. [${this.instanceId}]`)
|
|
563
|
+
}
|
|
564
|
+
} else {
|
|
565
|
+
throw new Error('The "schema" option must be an object. Example: { schema: { id: "number", name: "string" } }')
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
// Validate that fields is provided (mandatory)
|
|
570
|
+
if (!this.opts.fields || typeof this.opts.fields !== 'object' || Object.keys(this.opts.fields).length === 0) {
|
|
571
|
+
throw new Error('The "fields" option is mandatory and must be an object with at least one field definition. Example: { fields: { id: "number", name: "string" } }')
|
|
572
|
+
}
|
|
573
|
+
|
|
423
574
|
// CRITICAL FIX: Initialize serializer first - this was missing and causing crashes
|
|
424
575
|
this.serializer = new Serializer(this.opts)
|
|
425
576
|
|
|
@@ -436,6 +587,19 @@ class Database extends EventEmitter {
|
|
|
436
587
|
this.termManager.termMappingFields = termMappingFields
|
|
437
588
|
this.opts.termMapping = true // Always enable term mapping for optimal performance
|
|
438
589
|
|
|
590
|
+
// Validation: Ensure all array:string indexed fields are in term mapping fields
|
|
591
|
+
if (this.opts.indexes) {
|
|
592
|
+
const arrayStringFields = []
|
|
593
|
+
for (const [field, type] of Object.entries(this.opts.indexes)) {
|
|
594
|
+
if (type === 'array:string' && !termMappingFields.includes(field)) {
|
|
595
|
+
arrayStringFields.push(field)
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
if (arrayStringFields.length > 0) {
|
|
599
|
+
console.warn(`⚠️ Warning: The following array:string indexed fields were not added to term mapping: ${arrayStringFields.join(', ')}. This may impact performance.`)
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
|
|
439
603
|
if (this.opts.debugMode) {
|
|
440
604
|
if (termMappingFields.length > 0) {
|
|
441
605
|
console.log(`🔍 TermManager initialized for fields: ${termMappingFields.join(', ')} [${this.instanceId}]`)
|
|
@@ -462,6 +626,7 @@ class Database extends EventEmitter {
|
|
|
462
626
|
this.writeBuffer = []
|
|
463
627
|
this.writeBufferOffsets = [] // Track offsets for writeBuffer records
|
|
464
628
|
this.writeBufferSizes = [] // Track sizes for writeBuffer records
|
|
629
|
+
this.writeBufferTotalSize = 0
|
|
465
630
|
this.isInsideOperationQueue = false // Flag to prevent deadlock in save() calls
|
|
466
631
|
|
|
467
632
|
// Initialize other managers
|
|
@@ -483,8 +648,8 @@ class Database extends EventEmitter {
|
|
|
483
648
|
const termMappingFields = []
|
|
484
649
|
|
|
485
650
|
for (const [field, type] of Object.entries(this.opts.indexes)) {
|
|
486
|
-
// Fields that should use term mapping
|
|
487
|
-
if (type === 'array:string'
|
|
651
|
+
// Fields that should use term mapping (only array fields)
|
|
652
|
+
if (type === 'array:string') {
|
|
488
653
|
termMappingFields.push(field)
|
|
489
654
|
}
|
|
490
655
|
}
|
|
@@ -704,6 +869,9 @@ class Database extends EventEmitter {
|
|
|
704
869
|
// Don't load the entire file - just initialize empty state
|
|
705
870
|
// The actual record count will come from loaded offsets
|
|
706
871
|
this.writeBuffer = [] // writeBuffer is only for new unsaved records
|
|
872
|
+
this.writeBufferOffsets = []
|
|
873
|
+
this.writeBufferSizes = []
|
|
874
|
+
this.writeBufferTotalSize = 0
|
|
707
875
|
|
|
708
876
|
// recordCount will be determined from loaded offsets
|
|
709
877
|
// If no offsets were loaded, we'll count records only if needed
|
|
@@ -713,13 +881,55 @@ class Database extends EventEmitter {
|
|
|
713
881
|
const idxPath = this.normalizedFile.replace('.jdb', '.idx.jdb')
|
|
714
882
|
try {
|
|
715
883
|
const idxFileHandler = new FileHandler(idxPath, this.fileMutex, this.opts)
|
|
884
|
+
|
|
885
|
+
// Check if file exists BEFORE trying to read it
|
|
886
|
+
const fileExists = await idxFileHandler.exists()
|
|
887
|
+
if (!fileExists) {
|
|
888
|
+
// File doesn't exist - this will be handled by catch block
|
|
889
|
+
throw new Error('Index file does not exist')
|
|
890
|
+
}
|
|
891
|
+
|
|
716
892
|
const idxData = await idxFileHandler.readAll()
|
|
717
|
-
|
|
893
|
+
|
|
894
|
+
// If file exists but is empty or has no content, treat as corrupted
|
|
895
|
+
if (!idxData || !idxData.trim()) {
|
|
896
|
+
// File exists but is empty - treat as corrupted
|
|
897
|
+
const fileExists = await this.fileHandler.exists()
|
|
898
|
+
if (fileExists) {
|
|
899
|
+
const stats = await this.fileHandler.getFileStats()
|
|
900
|
+
if (stats && stats.size > 0) {
|
|
901
|
+
// Data file has content but index is empty - corrupted
|
|
902
|
+
if (!this.opts.allowIndexRebuild) {
|
|
903
|
+
throw new Error(
|
|
904
|
+
`Index file is corrupted: ${idxPath} exists but contains no index data, ` +
|
|
905
|
+
`while the data file has ${stats.size} bytes. ` +
|
|
906
|
+
`Set allowIndexRebuild: true to automatically rebuild the index, ` +
|
|
907
|
+
`or manually fix/delete the corrupted index file.`
|
|
908
|
+
)
|
|
909
|
+
}
|
|
910
|
+
// Schedule rebuild if allowed
|
|
911
|
+
if (this.opts.debugMode) {
|
|
912
|
+
console.log(`⚠️ Index file exists but is empty while data file has ${stats.size} bytes - scheduling rebuild`)
|
|
913
|
+
}
|
|
914
|
+
this._scheduleIndexRebuild()
|
|
915
|
+
// Continue execution - rebuild will happen on first query
|
|
916
|
+
// Don't return - let the code continue to load other things if needed
|
|
917
|
+
}
|
|
918
|
+
}
|
|
919
|
+
// If data file is also empty, just continue (no error needed)
|
|
920
|
+
// Don't return - let the code continue to load other things if needed
|
|
921
|
+
} else {
|
|
922
|
+
// File has content - parse and load it
|
|
718
923
|
const parsedIdxData = JSON.parse(idxData)
|
|
719
924
|
|
|
720
925
|
// Always load offsets if available (even without indexed fields)
|
|
721
926
|
if (parsedIdxData.offsets && Array.isArray(parsedIdxData.offsets)) {
|
|
722
927
|
this.offsets = parsedIdxData.offsets
|
|
928
|
+
// CRITICAL FIX: Update IndexManager totalLines to match offsets length
|
|
929
|
+
// This ensures queries and length property work correctly even if offsets are reset later
|
|
930
|
+
if (this.indexManager && this.offsets.length > 0) {
|
|
931
|
+
this.indexManager.setTotalLines(this.offsets.length)
|
|
932
|
+
}
|
|
723
933
|
if (this.opts.debugMode) {
|
|
724
934
|
console.log(`📂 Loaded ${this.offsets.length} offsets from ${idxPath}`)
|
|
725
935
|
}
|
|
@@ -733,24 +943,8 @@ class Database extends EventEmitter {
|
|
|
733
943
|
}
|
|
734
944
|
}
|
|
735
945
|
|
|
736
|
-
// Load index data only if available and we have indexed fields
|
|
737
|
-
if (parsedIdxData && parsedIdxData.index && this.indexManager.indexedFields && this.indexManager.indexedFields.length > 0) {
|
|
738
|
-
this.indexManager.load(parsedIdxData.index)
|
|
739
|
-
|
|
740
|
-
// Load term mapping data from .idx file if it exists
|
|
741
|
-
if (parsedIdxData.termMapping && this.termManager) {
|
|
742
|
-
await this.termManager.loadTerms(parsedIdxData.termMapping)
|
|
743
|
-
if (this.opts.debugMode) {
|
|
744
|
-
console.log(`📂 Loaded term mapping from ${idxPath}`)
|
|
745
|
-
}
|
|
746
|
-
}
|
|
747
|
-
|
|
748
|
-
if (this.opts.debugMode) {
|
|
749
|
-
console.log(`📂 Loaded index data from ${idxPath}`)
|
|
750
|
-
}
|
|
751
|
-
}
|
|
752
|
-
|
|
753
946
|
// Load configuration from .idx file if database exists
|
|
947
|
+
// CRITICAL: Load config FIRST so indexes are available for term mapping detection
|
|
754
948
|
if (parsedIdxData.config) {
|
|
755
949
|
const config = parsedIdxData.config
|
|
756
950
|
|
|
@@ -764,11 +958,94 @@ class Database extends EventEmitter {
|
|
|
764
958
|
|
|
765
959
|
if (config.indexes) {
|
|
766
960
|
this.opts.indexes = config.indexes
|
|
961
|
+
if (this.indexManager) {
|
|
962
|
+
this.indexManager.setIndexesConfig(config.indexes)
|
|
963
|
+
}
|
|
767
964
|
if (this.opts.debugMode) {
|
|
768
965
|
console.log(`📂 Loaded indexes config from ${idxPath}:`, Object.keys(config.indexes))
|
|
769
966
|
}
|
|
770
967
|
}
|
|
771
968
|
|
|
969
|
+
// CRITICAL FIX: Update term mapping fields AFTER loading indexes from config
|
|
970
|
+
// This ensures termManager knows which fields use term mapping
|
|
971
|
+
// (getTermMappingFields() was called during init() before indexes were loaded)
|
|
972
|
+
if (this.termManager && config.indexes) {
|
|
973
|
+
const termMappingFields = this.getTermMappingFields()
|
|
974
|
+
this.termManager.termMappingFields = termMappingFields
|
|
975
|
+
if (this.opts.debugMode && termMappingFields.length > 0) {
|
|
976
|
+
console.log(`🔍 Updated term mapping fields after loading indexes: ${termMappingFields.join(', ')}`)
|
|
977
|
+
}
|
|
978
|
+
}
|
|
979
|
+
}
|
|
980
|
+
|
|
981
|
+
// Load term mapping data from .idx file if it exists
|
|
982
|
+
// CRITICAL: Load termMapping even if index is empty (terms are needed for queries)
|
|
983
|
+
// NOTE: termMappingFields should already be set above from config.indexes
|
|
984
|
+
if (parsedIdxData.termMapping && this.termManager && this.termManager.termMappingFields && this.termManager.termMappingFields.length > 0) {
|
|
985
|
+
await this.termManager.loadTerms(parsedIdxData.termMapping)
|
|
986
|
+
if (this.opts.debugMode) {
|
|
987
|
+
console.log(`📂 Loaded term mapping from ${idxPath} (${Object.keys(parsedIdxData.termMapping).length} terms)`)
|
|
988
|
+
}
|
|
989
|
+
}
|
|
990
|
+
|
|
991
|
+
// Load index data only if available and we have indexed fields
|
|
992
|
+
if (parsedIdxData && parsedIdxData.index && this.indexManager.indexedFields && this.indexManager.indexedFields.length > 0) {
|
|
993
|
+
this.indexManager.load(parsedIdxData.index)
|
|
994
|
+
|
|
995
|
+
if (this.opts.debugMode) {
|
|
996
|
+
console.log(`📂 Loaded index data from ${idxPath}`)
|
|
997
|
+
}
|
|
998
|
+
|
|
999
|
+
// Check if loaded index is actually empty (corrupted)
|
|
1000
|
+
let hasAnyIndexData = false
|
|
1001
|
+
for (const field of this.indexManager.indexedFields) {
|
|
1002
|
+
if (this.indexManager.hasUsableIndexData(field)) {
|
|
1003
|
+
hasAnyIndexData = true
|
|
1004
|
+
break
|
|
1005
|
+
}
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
if (this.opts.debugMode) {
|
|
1009
|
+
console.log(`📊 Index check: hasAnyIndexData=${hasAnyIndexData}, indexedFields=${this.indexManager.indexedFields.join(',')}`)
|
|
1010
|
+
}
|
|
1011
|
+
|
|
1012
|
+
// Schedule rebuild if index is empty AND file exists with data
|
|
1013
|
+
if (!hasAnyIndexData) {
|
|
1014
|
+
// Check if the actual .jdb file has data
|
|
1015
|
+
const fileExists = await this.fileHandler.exists()
|
|
1016
|
+
if (this.opts.debugMode) {
|
|
1017
|
+
console.log(`📊 File check: exists=${fileExists}`)
|
|
1018
|
+
}
|
|
1019
|
+
if (fileExists) {
|
|
1020
|
+
const stats = await this.fileHandler.getFileStats()
|
|
1021
|
+
if (this.opts.debugMode) {
|
|
1022
|
+
console.log(`📊 File stats: size=${stats?.size}`)
|
|
1023
|
+
}
|
|
1024
|
+
if (stats && stats.size > 0) {
|
|
1025
|
+
// File has data but index is empty - corrupted index detected
|
|
1026
|
+
if (!this.opts.allowIndexRebuild) {
|
|
1027
|
+
const idxPath = this.normalizedFile.replace('.jdb', '.idx.jdb')
|
|
1028
|
+
throw new Error(
|
|
1029
|
+
`Index file is corrupted: ${idxPath} exists but contains no index data, ` +
|
|
1030
|
+
`while the data file has ${stats.size} bytes. ` +
|
|
1031
|
+
`Set allowIndexRebuild: true to automatically rebuild the index, ` +
|
|
1032
|
+
`or manually fix/delete the corrupted index file.`
|
|
1033
|
+
)
|
|
1034
|
+
}
|
|
1035
|
+
// Schedule rebuild if allowed
|
|
1036
|
+
if (this.opts.debugMode) {
|
|
1037
|
+
console.log(`⚠️ Index loaded but empty while file has ${stats.size} bytes - scheduling rebuild`)
|
|
1038
|
+
}
|
|
1039
|
+
this._scheduleIndexRebuild()
|
|
1040
|
+
}
|
|
1041
|
+
}
|
|
1042
|
+
}
|
|
1043
|
+
}
|
|
1044
|
+
|
|
1045
|
+
// Continue with remaining config loading
|
|
1046
|
+
if (parsedIdxData.config) {
|
|
1047
|
+
const config = parsedIdxData.config
|
|
1048
|
+
|
|
772
1049
|
if (config.originalIndexes) {
|
|
773
1050
|
this.opts.originalIndexes = config.originalIndexes
|
|
774
1051
|
if (this.opts.debugMode) {
|
|
@@ -776,22 +1053,107 @@ class Database extends EventEmitter {
|
|
|
776
1053
|
}
|
|
777
1054
|
}
|
|
778
1055
|
|
|
779
|
-
// Reinitialize schema from saved configuration
|
|
780
|
-
|
|
1056
|
+
// Reinitialize schema from saved configuration (only if fields not provided)
|
|
1057
|
+
// Note: fields option takes precedence over saved schema
|
|
1058
|
+
if (!this.opts.fields && config.schema && this.serializer) {
|
|
781
1059
|
this.serializer.initializeSchema(config.schema)
|
|
782
1060
|
if (this.opts.debugMode) {
|
|
783
1061
|
console.log(`📂 Loaded schema from ${idxPath}:`, config.schema.join(', '))
|
|
784
1062
|
}
|
|
1063
|
+
} else if (this.opts.fields && this.serializer) {
|
|
1064
|
+
// Use fields option instead of saved schema
|
|
1065
|
+
const fieldNames = Object.keys(this.opts.fields)
|
|
1066
|
+
if (fieldNames.length > 0) {
|
|
1067
|
+
this.serializer.initializeSchema(fieldNames)
|
|
1068
|
+
if (this.opts.debugMode) {
|
|
1069
|
+
console.log(`📂 Schema initialized from fields option:`, fieldNames.join(', '))
|
|
1070
|
+
}
|
|
1071
|
+
}
|
|
785
1072
|
}
|
|
786
1073
|
}
|
|
787
1074
|
}
|
|
788
1075
|
} catch (idxError) {
|
|
789
1076
|
// Index file doesn't exist or is corrupted, rebuild from data
|
|
1077
|
+
// BUT: if error is about rebuild being disabled, re-throw it immediately
|
|
1078
|
+
if (idxError.message && (idxError.message.includes('allowIndexRebuild') || idxError.message.includes('corrupted'))) {
|
|
1079
|
+
throw idxError
|
|
1080
|
+
}
|
|
1081
|
+
|
|
1082
|
+
// If error is "Index file does not exist", check if we should throw or rebuild
|
|
1083
|
+
if (idxError.message && idxError.message.includes('does not exist')) {
|
|
1084
|
+
// Check if the actual .jdb file has data that needs indexing
|
|
1085
|
+
try {
|
|
1086
|
+
const fileExists = await this.fileHandler.exists()
|
|
1087
|
+
if (fileExists) {
|
|
1088
|
+
const stats = await this.fileHandler.getFileStats()
|
|
1089
|
+
if (stats && stats.size > 0) {
|
|
1090
|
+
// File has data but index is missing
|
|
1091
|
+
if (!this.opts.allowIndexRebuild) {
|
|
1092
|
+
const idxPath = this.normalizedFile.replace('.jdb', '.idx.jdb')
|
|
1093
|
+
throw new Error(
|
|
1094
|
+
`Index file is missing or corrupted: ${idxPath} does not exist or is invalid, ` +
|
|
1095
|
+
`while the data file has ${stats.size} bytes. ` +
|
|
1096
|
+
`Set allowIndexRebuild: true to automatically rebuild the index, ` +
|
|
1097
|
+
`or manually create/fix the index file.`
|
|
1098
|
+
)
|
|
1099
|
+
}
|
|
1100
|
+
// Schedule rebuild if allowed
|
|
1101
|
+
if (this.opts.debugMode) {
|
|
1102
|
+
console.log(`⚠️ .jdb file has ${stats.size} bytes but index missing - scheduling rebuild`)
|
|
1103
|
+
}
|
|
1104
|
+
this._scheduleIndexRebuild()
|
|
1105
|
+
return // Exit early
|
|
1106
|
+
}
|
|
1107
|
+
}
|
|
1108
|
+
} catch (statsError) {
|
|
1109
|
+
if (this.opts.debugMode) {
|
|
1110
|
+
console.log('⚠️ Could not check file stats:', statsError.message)
|
|
1111
|
+
}
|
|
1112
|
+
// Re-throw if it's our error about rebuild being disabled
|
|
1113
|
+
if (statsError.message && statsError.message.includes('allowIndexRebuild')) {
|
|
1114
|
+
throw statsError
|
|
1115
|
+
}
|
|
1116
|
+
}
|
|
1117
|
+
// If no data file or empty, just continue (no error needed)
|
|
1118
|
+
return
|
|
1119
|
+
}
|
|
1120
|
+
|
|
790
1121
|
if (this.opts.debugMode) {
|
|
791
|
-
console.log('📂 No index file found,
|
|
1122
|
+
console.log('📂 No index file found or corrupted, checking if rebuild is needed...')
|
|
1123
|
+
}
|
|
1124
|
+
|
|
1125
|
+
// Check if the actual .jdb file has data that needs indexing
|
|
1126
|
+
try {
|
|
1127
|
+
const fileExists = await this.fileHandler.exists()
|
|
1128
|
+
if (fileExists) {
|
|
1129
|
+
const stats = await this.fileHandler.getFileStats()
|
|
1130
|
+
if (stats && stats.size > 0) {
|
|
1131
|
+
// File has data but index is missing or corrupted
|
|
1132
|
+
if (!this.opts.allowIndexRebuild) {
|
|
1133
|
+
const idxPath = this.normalizedFile.replace('.jdb', '.idx.jdb')
|
|
1134
|
+
throw new Error(
|
|
1135
|
+
`Index file is missing or corrupted: ${idxPath} does not exist or is invalid, ` +
|
|
1136
|
+
`while the data file has ${stats.size} bytes. ` +
|
|
1137
|
+
`Set allowIndexRebuild: true to automatically rebuild the index, ` +
|
|
1138
|
+
`or manually create/fix the index file.`
|
|
1139
|
+
)
|
|
1140
|
+
}
|
|
1141
|
+
// Schedule rebuild if allowed
|
|
1142
|
+
if (this.opts.debugMode) {
|
|
1143
|
+
console.log(`⚠️ .jdb file has ${stats.size} bytes but index missing - scheduling rebuild`)
|
|
1144
|
+
}
|
|
1145
|
+
this._scheduleIndexRebuild()
|
|
1146
|
+
}
|
|
1147
|
+
}
|
|
1148
|
+
} catch (statsError) {
|
|
1149
|
+
if (this.opts.debugMode) {
|
|
1150
|
+
console.log('⚠️ Could not check file stats:', statsError.message)
|
|
1151
|
+
}
|
|
1152
|
+
// Re-throw if it's our error about rebuild being disabled
|
|
1153
|
+
if (statsError.message && statsError.message.includes('allowIndexRebuild')) {
|
|
1154
|
+
throw statsError
|
|
1155
|
+
}
|
|
792
1156
|
}
|
|
793
|
-
// We can't rebuild index without violating no-memory-storage rule
|
|
794
|
-
// Index will be rebuilt as needed during queries
|
|
795
1157
|
}
|
|
796
1158
|
} else {
|
|
797
1159
|
// No indexed fields, no need to rebuild indexes
|
|
@@ -820,6 +1182,28 @@ class Database extends EventEmitter {
|
|
|
820
1182
|
console.log(`💾 save() called: writeBuffer.length=${this.writeBuffer.length}, offsets.length=${this.offsets.length}`)
|
|
821
1183
|
}
|
|
822
1184
|
|
|
1185
|
+
// CRITICAL FIX: Wait for all active insert sessions to complete their auto-flushes
|
|
1186
|
+
// This prevents race conditions where save() writes data while auto-flushes are still adding to writeBuffer
|
|
1187
|
+
if (this.activeInsertSessions && this.activeInsertSessions.size > 0) {
|
|
1188
|
+
if (this.opts.debugMode) {
|
|
1189
|
+
console.log(`⏳ save(): Waiting for ${this.activeInsertSessions.size} active insert sessions to complete auto-flushes`)
|
|
1190
|
+
}
|
|
1191
|
+
|
|
1192
|
+
const sessionPromises = Array.from(this.activeInsertSessions).map(session =>
|
|
1193
|
+
session.waitForAutoFlushes().catch(err => {
|
|
1194
|
+
if (this.opts.debugMode) {
|
|
1195
|
+
console.warn(`⚠️ save(): Error waiting for insert session: ${err.message}`)
|
|
1196
|
+
}
|
|
1197
|
+
})
|
|
1198
|
+
)
|
|
1199
|
+
|
|
1200
|
+
await Promise.all(sessionPromises)
|
|
1201
|
+
|
|
1202
|
+
if (this.opts.debugMode) {
|
|
1203
|
+
console.log(`✅ save(): All insert sessions completed auto-flushes`)
|
|
1204
|
+
}
|
|
1205
|
+
}
|
|
1206
|
+
|
|
823
1207
|
// Auto-save removed - no need to pause anything
|
|
824
1208
|
|
|
825
1209
|
try {
|
|
@@ -915,7 +1299,8 @@ class Database extends EventEmitter {
|
|
|
915
1299
|
|
|
916
1300
|
// CRITICAL FIX: Capture writeBuffer and deletedIds at the start to prevent race conditions
|
|
917
1301
|
const writeBufferSnapshot = [...this.writeBuffer]
|
|
918
|
-
|
|
1302
|
+
// CRITICAL FIX: Normalize deleted IDs to strings for consistent comparison
|
|
1303
|
+
const deletedIdsSnapshot = new Set(Array.from(this.deletedIds).map(id => String(id)))
|
|
919
1304
|
|
|
920
1305
|
// OPTIMIZATION: Process pending index updates in batch before save
|
|
921
1306
|
if (this.pendingIndexUpdates && this.pendingIndexUpdates.length > 0) {
|
|
@@ -964,10 +1349,12 @@ class Database extends EventEmitter {
|
|
|
964
1349
|
let orphanedCount = 0
|
|
965
1350
|
|
|
966
1351
|
// Check if there are new records to save (after flush, writeBuffer should be empty)
|
|
1352
|
+
// CRITICAL FIX: Also check writeBufferSnapshot.length > 0 to handle updates/deletes
|
|
1353
|
+
// that were in writeBuffer before flush but are now in snapshot
|
|
967
1354
|
if (this.opts.debugMode) {
|
|
968
1355
|
console.log(`💾 Save: writeBuffer.length=${this.writeBuffer.length}, writeBufferSnapshot.length=${writeBufferSnapshot.length}`)
|
|
969
1356
|
}
|
|
970
|
-
if (this.writeBuffer.length > 0) {
|
|
1357
|
+
if (this.writeBuffer.length > 0 || writeBufferSnapshot.length > 0) {
|
|
971
1358
|
if (this.opts.debugMode) {
|
|
972
1359
|
console.log(`💾 Save: WriteBuffer has ${writeBufferSnapshot.length} records, using streaming approach`)
|
|
973
1360
|
}
|
|
@@ -1001,21 +1388,20 @@ class Database extends EventEmitter {
|
|
|
1001
1388
|
// Add streaming operation
|
|
1002
1389
|
parallelOperations.push(
|
|
1003
1390
|
this._streamExistingRecords(deletedIdsSnapshot, writeBufferSnapshot).then(existingRecords => {
|
|
1391
|
+
// CRITICAL FIX: _streamExistingRecords already handles updates via updatedRecordsMap
|
|
1392
|
+
// So existingRecords already contains updated records from writeBufferSnapshot
|
|
1393
|
+
// We only need to add records from writeBufferSnapshot that are NEW (not updates)
|
|
1004
1394
|
allData = [...existingRecords]
|
|
1005
1395
|
|
|
1006
|
-
// OPTIMIZATION: Use
|
|
1007
|
-
|
|
1396
|
+
// OPTIMIZATION: Use Set for faster lookups of existing record IDs
|
|
1397
|
+
// CRITICAL FIX: Normalize IDs to strings for consistent comparison
|
|
1398
|
+
const existingRecordIds = new Set(existingRecords.filter(r => r && r.id).map(r => String(r.id)))
|
|
1008
1399
|
|
|
1400
|
+
// Add only NEW records from writeBufferSnapshot (not updates, as those are already in existingRecords)
|
|
1009
1401
|
for (const record of writeBufferSnapshot) {
|
|
1010
|
-
if (!deletedIdsSnapshot.has(record.id)) {
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
const existingIndex = allData.findIndex(r => r.id === record.id)
|
|
1014
|
-
allData[existingIndex] = record
|
|
1015
|
-
} else {
|
|
1016
|
-
// Add new record
|
|
1017
|
-
allData.push(record)
|
|
1018
|
-
}
|
|
1402
|
+
if (record && record.id && !deletedIdsSnapshot.has(String(record.id)) && !existingRecordIds.has(String(record.id))) {
|
|
1403
|
+
// This is a new record, not an update
|
|
1404
|
+
allData.push(record)
|
|
1019
1405
|
}
|
|
1020
1406
|
}
|
|
1021
1407
|
})
|
|
@@ -1060,15 +1446,43 @@ class Database extends EventEmitter {
|
|
|
1060
1446
|
console.log(`💾 Save: _streamExistingRecords returned ${existingRecords.length} records`)
|
|
1061
1447
|
console.log(`💾 Save: existingRecords:`, existingRecords)
|
|
1062
1448
|
}
|
|
1063
|
-
//
|
|
1064
|
-
|
|
1449
|
+
// CRITICAL FIX: _streamExistingRecords already handles updates via updatedRecordsMap
|
|
1450
|
+
// So existingRecords already contains updated records from writeBufferSnapshot
|
|
1451
|
+
// We only need to add records from writeBufferSnapshot that are NEW (not updates)
|
|
1452
|
+
allData = [...existingRecords]
|
|
1453
|
+
|
|
1454
|
+
// OPTIMIZATION: Use Set for faster lookups of existing record IDs
|
|
1455
|
+
const existingRecordIds = new Set(existingRecords.filter(r => r && r.id).map(r => r.id))
|
|
1456
|
+
|
|
1457
|
+
// Add only NEW records from writeBufferSnapshot (not updates, as those are already in existingRecords)
|
|
1458
|
+
for (const record of writeBufferSnapshot) {
|
|
1459
|
+
if (record && record.id && !deletedIdsSnapshot.has(String(record.id)) && !existingRecordIds.has(record.id)) {
|
|
1460
|
+
// This is a new record, not an update
|
|
1461
|
+
allData.push(record)
|
|
1462
|
+
}
|
|
1463
|
+
}
|
|
1464
|
+
|
|
1465
|
+
if (this.opts.debugMode) {
|
|
1466
|
+
const updatedCount = writeBufferSnapshot.filter(r => r && r.id && existingRecordIds.has(String(r.id))).length
|
|
1467
|
+
const newCount = writeBufferSnapshot.filter(r => r && r.id && !existingRecordIds.has(String(r.id))).length
|
|
1468
|
+
console.log(`💾 Save: Combined data - existingRecords: ${existingRecords.length}, updatedFromBuffer: ${updatedCount}, newFromBuffer: ${newCount}, total: ${allData.length}`)
|
|
1469
|
+
console.log(`💾 Save: WriteBuffer record IDs:`, writeBufferSnapshot.map(r => r && r.id ? r.id : 'no-id'))
|
|
1470
|
+
console.log(`💾 Save: Existing record IDs:`, Array.from(existingRecordIds))
|
|
1471
|
+
console.log(`💾 Save: Sample existing record:`, existingRecords[0] ? { id: existingRecords[0].id, name: existingRecords[0].name, tags: existingRecords[0].tags } : 'null')
|
|
1472
|
+
console.log(`💾 Save: Sample writeBuffer record:`, writeBufferSnapshot[0] ? { id: writeBufferSnapshot[0].id, name: writeBufferSnapshot[0].name, tags: writeBufferSnapshot[0].tags } : 'null')
|
|
1473
|
+
}
|
|
1065
1474
|
}).catch(error => {
|
|
1066
1475
|
if (this.opts.debugMode) {
|
|
1067
1476
|
console.log(`💾 Save: _streamExistingRecords failed:`, error.message)
|
|
1068
1477
|
}
|
|
1069
1478
|
// CRITICAL FIX: Use safe fallback to preserve existing data instead of losing it
|
|
1070
1479
|
return this._loadExistingRecordsFallback(deletedIdsSnapshot, writeBufferSnapshot).then(fallbackRecords => {
|
|
1071
|
-
|
|
1480
|
+
// CRITICAL FIX: Avoid duplicating updated records
|
|
1481
|
+
const fallbackRecordIds = new Set(fallbackRecords.map(r => r.id))
|
|
1482
|
+
const newRecordsFromBuffer = writeBufferSnapshot.filter(record =>
|
|
1483
|
+
!deletedIdsSnapshot.has(String(record.id)) && !fallbackRecordIds.has(record.id)
|
|
1484
|
+
)
|
|
1485
|
+
allData = [...fallbackRecords, ...newRecordsFromBuffer]
|
|
1072
1486
|
if (this.opts.debugMode) {
|
|
1073
1487
|
console.log(`💾 Save: Fallback preserved ${fallbackRecords.length} existing records, total: ${allData.length}`)
|
|
1074
1488
|
}
|
|
@@ -1078,7 +1492,7 @@ class Database extends EventEmitter {
|
|
|
1078
1492
|
console.log(`💾 Save: CRITICAL - Data loss may occur, only writeBuffer will be saved`)
|
|
1079
1493
|
}
|
|
1080
1494
|
// Last resort: at least save what we have in writeBuffer
|
|
1081
|
-
allData = writeBufferSnapshot.filter(record => !deletedIdsSnapshot.has(record.id))
|
|
1495
|
+
allData = writeBufferSnapshot.filter(record => !deletedIdsSnapshot.has(String(record.id)))
|
|
1082
1496
|
})
|
|
1083
1497
|
})
|
|
1084
1498
|
)
|
|
@@ -1092,7 +1506,12 @@ class Database extends EventEmitter {
|
|
|
1092
1506
|
// CRITICAL FIX: Use safe fallback to preserve existing data instead of losing it
|
|
1093
1507
|
try {
|
|
1094
1508
|
const fallbackRecords = await this._loadExistingRecordsFallback(deletedIdsSnapshot, writeBufferSnapshot)
|
|
1095
|
-
|
|
1509
|
+
// CRITICAL FIX: Avoid duplicating updated records
|
|
1510
|
+
const fallbackRecordIds = new Set(fallbackRecords.map(r => r.id))
|
|
1511
|
+
const newRecordsFromBuffer = writeBufferSnapshot.filter(record =>
|
|
1512
|
+
!deletedIdsSnapshot.has(String(record.id)) && !fallbackRecordIds.has(record.id)
|
|
1513
|
+
)
|
|
1514
|
+
allData = [...fallbackRecords, ...newRecordsFromBuffer]
|
|
1096
1515
|
if (this.opts.debugMode) {
|
|
1097
1516
|
console.log(`💾 Save: Fallback preserved ${fallbackRecords.length} existing records, total: ${allData.length}`)
|
|
1098
1517
|
}
|
|
@@ -1102,23 +1521,46 @@ class Database extends EventEmitter {
|
|
|
1102
1521
|
console.log(`💾 Save: CRITICAL - Data loss may occur, only writeBuffer will be saved`)
|
|
1103
1522
|
}
|
|
1104
1523
|
// Last resort: at least save what we have in writeBuffer
|
|
1105
|
-
allData = writeBufferSnapshot.filter(record => !deletedIdsSnapshot.has(record.id))
|
|
1524
|
+
allData = writeBufferSnapshot.filter(record => !deletedIdsSnapshot.has(String(record.id)))
|
|
1106
1525
|
}
|
|
1107
1526
|
}
|
|
1108
1527
|
} else {
|
|
1109
1528
|
// No existing data, use only writeBuffer
|
|
1110
|
-
allData = writeBufferSnapshot.filter(record => !deletedIdsSnapshot.has(record.id))
|
|
1529
|
+
allData = writeBufferSnapshot.filter(record => !deletedIdsSnapshot.has(String(record.id)))
|
|
1111
1530
|
}
|
|
1112
1531
|
}
|
|
1113
1532
|
|
|
1114
1533
|
// CRITICAL FIX: Calculate offsets based on actual serialized data that will be written
|
|
1115
1534
|
// This ensures consistency between offset calculation and file writing
|
|
1116
|
-
|
|
1117
|
-
|
|
1535
|
+
// CRITICAL FIX: Remove term IDs before serialization to ensure proper serialization
|
|
1536
|
+
const cleanedData = allData.map(record => {
|
|
1537
|
+
if (!record || typeof record !== 'object') {
|
|
1538
|
+
if (this.opts.debugMode) {
|
|
1539
|
+
console.log(`💾 Save: WARNING - Invalid record in allData:`, record)
|
|
1540
|
+
}
|
|
1541
|
+
return record
|
|
1542
|
+
}
|
|
1543
|
+
return this.removeTermIdsForSerialization(record)
|
|
1544
|
+
})
|
|
1545
|
+
|
|
1546
|
+
if (this.opts.debugMode) {
|
|
1547
|
+
console.log(`💾 Save: allData.length=${allData.length}, cleanedData.length=${cleanedData.length}`)
|
|
1548
|
+
console.log(`💾 Save: Sample cleaned record:`, cleanedData[0] ? Object.keys(cleanedData[0]) : 'null')
|
|
1549
|
+
}
|
|
1550
|
+
|
|
1551
|
+
const jsonlData = cleanedData.length > 0
|
|
1552
|
+
? this.serializer.serializeBatch(cleanedData)
|
|
1118
1553
|
: ''
|
|
1119
1554
|
const jsonlString = jsonlData.toString('utf8')
|
|
1120
1555
|
const lines = jsonlString.split('\n').filter(line => line.trim())
|
|
1121
1556
|
|
|
1557
|
+
if (this.opts.debugMode) {
|
|
1558
|
+
console.log(`💾 Save: Serialized ${lines.length} lines`)
|
|
1559
|
+
if (lines.length > 0) {
|
|
1560
|
+
console.log(`💾 Save: First line (first 200 chars):`, lines[0].substring(0, 200))
|
|
1561
|
+
}
|
|
1562
|
+
}
|
|
1563
|
+
|
|
1122
1564
|
this.offsets = []
|
|
1123
1565
|
let currentOffset = 0
|
|
1124
1566
|
for (let i = 0; i < lines.length; i++) {
|
|
@@ -1199,19 +1641,53 @@ class Database extends EventEmitter {
|
|
|
1199
1641
|
|
|
1200
1642
|
// Clear writeBuffer and deletedIds after successful save only if we had data to save
|
|
1201
1643
|
if (allData.length > 0) {
|
|
1202
|
-
// Rebuild index when records were deleted to maintain consistency
|
|
1644
|
+
// Rebuild index when records were deleted or updated to maintain consistency
|
|
1203
1645
|
const hadDeletedRecords = deletedIdsSnapshot.size > 0
|
|
1646
|
+
const hadUpdatedRecords = writeBufferSnapshot.length > 0
|
|
1204
1647
|
if (this.indexManager && this.indexManager.indexedFields && this.indexManager.indexedFields.length > 0) {
|
|
1205
|
-
if (hadDeletedRecords) {
|
|
1206
|
-
// Clear the index and rebuild it from the
|
|
1648
|
+
if (hadDeletedRecords || hadUpdatedRecords) {
|
|
1649
|
+
// Clear the index and rebuild it from the saved records
|
|
1650
|
+
// This ensures that lineNumbers point to the correct positions in the file
|
|
1207
1651
|
this.indexManager.clear()
|
|
1208
1652
|
if (this.opts.debugMode) {
|
|
1209
|
-
|
|
1653
|
+
if (hadDeletedRecords && hadUpdatedRecords) {
|
|
1654
|
+
console.log(`🧹 Rebuilding index after removing ${deletedIdsSnapshot.size} deleted records and updating ${writeBufferSnapshot.length} records`)
|
|
1655
|
+
} else if (hadDeletedRecords) {
|
|
1656
|
+
console.log(`🧹 Rebuilding index after removing ${deletedIdsSnapshot.size} deleted records`)
|
|
1657
|
+
} else {
|
|
1658
|
+
console.log(`🧹 Rebuilding index after updating ${writeBufferSnapshot.length} records`)
|
|
1659
|
+
}
|
|
1210
1660
|
}
|
|
1211
1661
|
|
|
1212
1662
|
// Rebuild index from the saved records
|
|
1663
|
+
// CRITICAL: Process term mapping for records loaded from file to ensure ${field}Ids are available
|
|
1213
1664
|
for (let i = 0; i < allData.length; i++) {
|
|
1214
|
-
|
|
1665
|
+
let record = allData[i]
|
|
1666
|
+
|
|
1667
|
+
// CRITICAL FIX: Ensure records have ${field}Ids for term mapping fields
|
|
1668
|
+
// Records from writeBuffer already have ${field}Ids from processTermMapping
|
|
1669
|
+
// Records from file need to be processed to restore ${field}Ids
|
|
1670
|
+
const termMappingFields = this.getTermMappingFields()
|
|
1671
|
+
if (termMappingFields.length > 0 && this.termManager) {
|
|
1672
|
+
for (const field of termMappingFields) {
|
|
1673
|
+
if (record[field] && Array.isArray(record[field])) {
|
|
1674
|
+
// Check if field contains term IDs (numbers) or terms (strings)
|
|
1675
|
+
const firstValue = record[field][0]
|
|
1676
|
+
if (typeof firstValue === 'number') {
|
|
1677
|
+
// Already term IDs, create ${field}Ids
|
|
1678
|
+
record[`${field}Ids`] = record[field]
|
|
1679
|
+
} else if (typeof firstValue === 'string') {
|
|
1680
|
+
// Terms, need to convert to term IDs
|
|
1681
|
+
const termIds = record[field].map(term => {
|
|
1682
|
+
const termId = this.termManager.getTermIdWithoutIncrement(term)
|
|
1683
|
+
return termId !== undefined ? termId : this.termManager.getTermId(term)
|
|
1684
|
+
})
|
|
1685
|
+
record[`${field}Ids`] = termIds
|
|
1686
|
+
}
|
|
1687
|
+
}
|
|
1688
|
+
}
|
|
1689
|
+
}
|
|
1690
|
+
|
|
1215
1691
|
await this.indexManager.add(record, i)
|
|
1216
1692
|
}
|
|
1217
1693
|
}
|
|
@@ -1247,6 +1723,8 @@ class Database extends EventEmitter {
|
|
|
1247
1723
|
this.writeBuffer = []
|
|
1248
1724
|
this.writeBufferOffsets = []
|
|
1249
1725
|
this.writeBufferSizes = []
|
|
1726
|
+
this.writeBufferTotalSize = 0
|
|
1727
|
+
this.writeBufferTotalSize = 0
|
|
1250
1728
|
}
|
|
1251
1729
|
|
|
1252
1730
|
// indexOffset already set correctly to currentOffset (total file size) above
|
|
@@ -1299,12 +1777,21 @@ class Database extends EventEmitter {
|
|
|
1299
1777
|
this.termManager.decrementTermCount(termId)
|
|
1300
1778
|
}
|
|
1301
1779
|
} else if (oldRecord[field] && Array.isArray(oldRecord[field])) {
|
|
1302
|
-
//
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1780
|
+
// Check if field contains term IDs (numbers) or terms (strings)
|
|
1781
|
+
const firstValue = oldRecord[field][0]
|
|
1782
|
+
if (typeof firstValue === 'number') {
|
|
1783
|
+
// Field contains term IDs (from find with restoreTerms: false)
|
|
1784
|
+
for (const termId of oldRecord[field]) {
|
|
1306
1785
|
this.termManager.decrementTermCount(termId)
|
|
1307
1786
|
}
|
|
1787
|
+
} else if (typeof firstValue === 'string') {
|
|
1788
|
+
// Field contains terms (strings) - convert to term IDs
|
|
1789
|
+
for (const term of oldRecord[field]) {
|
|
1790
|
+
const termId = this.termManager.termToId.get(term)
|
|
1791
|
+
if (termId) {
|
|
1792
|
+
this.termManager.decrementTermCount(termId)
|
|
1793
|
+
}
|
|
1794
|
+
}
|
|
1308
1795
|
}
|
|
1309
1796
|
}
|
|
1310
1797
|
}
|
|
@@ -1465,18 +1952,16 @@ class Database extends EventEmitter {
|
|
|
1465
1952
|
}
|
|
1466
1953
|
|
|
1467
1954
|
// OPTIMIZATION: Process records using pre-computed term IDs
|
|
1468
|
-
|
|
1469
|
-
const processedRecord = { ...record }
|
|
1470
|
-
|
|
1955
|
+
for (const record of records) {
|
|
1471
1956
|
for (const field of termMappingFields) {
|
|
1472
1957
|
if (record[field] && Array.isArray(record[field])) {
|
|
1473
1958
|
const termIds = record[field].map(term => termIdMap.get(term))
|
|
1474
|
-
|
|
1959
|
+
record[`${field}Ids`] = termIds
|
|
1475
1960
|
}
|
|
1476
1961
|
}
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1962
|
+
}
|
|
1963
|
+
|
|
1964
|
+
return records
|
|
1480
1965
|
}
|
|
1481
1966
|
|
|
1482
1967
|
|
|
@@ -1559,6 +2044,7 @@ class Database extends EventEmitter {
|
|
|
1559
2044
|
}
|
|
1560
2045
|
|
|
1561
2046
|
// Apply schema enforcement - convert to array format and back to enforce schema
|
|
2047
|
+
// This will discard any fields not in the schema
|
|
1562
2048
|
const schemaEnforcedRecord = this.applySchemaEnforcement(record)
|
|
1563
2049
|
|
|
1564
2050
|
// Don't store in this.data - only use writeBuffer and index
|
|
@@ -1570,18 +2056,19 @@ class Database extends EventEmitter {
|
|
|
1570
2056
|
// OPTIMIZATION: Calculate and store offset and size for writeBuffer record
|
|
1571
2057
|
// SPACE OPTIMIZATION: Remove term IDs before serialization
|
|
1572
2058
|
const cleanRecord = this.removeTermIdsForSerialization(record)
|
|
1573
|
-
const
|
|
1574
|
-
const recordSize =
|
|
2059
|
+
const recordBuffer = this.serializer.serialize(cleanRecord)
|
|
2060
|
+
const recordSize = recordBuffer.length
|
|
1575
2061
|
|
|
1576
2062
|
// Calculate offset based on end of file + previous writeBuffer sizes
|
|
1577
|
-
const previousWriteBufferSize = this.
|
|
2063
|
+
const previousWriteBufferSize = this.writeBufferTotalSize
|
|
1578
2064
|
const recordOffset = this.indexOffset + previousWriteBufferSize
|
|
1579
2065
|
|
|
1580
2066
|
this.writeBufferOffsets.push(recordOffset)
|
|
1581
2067
|
this.writeBufferSizes.push(recordSize)
|
|
2068
|
+
this.writeBufferTotalSize += recordSize
|
|
1582
2069
|
|
|
1583
|
-
// OPTIMIZATION: Use the
|
|
1584
|
-
const lineNumber = this.writeBuffer.length - 1
|
|
2070
|
+
// OPTIMIZATION: Use the absolute line number (persisted records + writeBuffer index)
|
|
2071
|
+
const lineNumber = this._getAbsoluteLineNumber(this.writeBuffer.length - 1)
|
|
1585
2072
|
|
|
1586
2073
|
// OPTIMIZATION: Defer index updates to batch processing
|
|
1587
2074
|
// Store the record for batch index processing
|
|
@@ -1652,7 +2139,7 @@ class Database extends EventEmitter {
|
|
|
1652
2139
|
console.log(`💾 _insertBatchInternal: processing size=${dataArray.length}, startWriteBuffer=${this.writeBuffer.length}`)
|
|
1653
2140
|
}
|
|
1654
2141
|
const records = []
|
|
1655
|
-
const
|
|
2142
|
+
const existingWriteBufferLength = this.writeBuffer.length
|
|
1656
2143
|
|
|
1657
2144
|
// Initialize schema if not already done (auto-detect from first record)
|
|
1658
2145
|
if (this.serializer && !this.serializer.schemaManager.isInitialized && dataArray.length > 0) {
|
|
@@ -1684,13 +2171,13 @@ class Database extends EventEmitter {
|
|
|
1684
2171
|
this.writeBuffer.push(...schemaEnforcedRecords)
|
|
1685
2172
|
|
|
1686
2173
|
// OPTIMIZATION: Calculate offsets and sizes in batch (O(n))
|
|
1687
|
-
let runningTotalSize = this.
|
|
2174
|
+
let runningTotalSize = this.writeBufferTotalSize
|
|
1688
2175
|
for (let i = 0; i < processedRecords.length; i++) {
|
|
1689
2176
|
const record = processedRecords[i]
|
|
1690
2177
|
// SPACE OPTIMIZATION: Remove term IDs before serialization
|
|
1691
2178
|
const cleanRecord = this.removeTermIdsForSerialization(record)
|
|
1692
|
-
const
|
|
1693
|
-
const recordSize =
|
|
2179
|
+
const recordBuffer = this.serializer.serialize(cleanRecord)
|
|
2180
|
+
const recordSize = recordBuffer.length
|
|
1694
2181
|
|
|
1695
2182
|
const recordOffset = this.indexOffset + runningTotalSize
|
|
1696
2183
|
runningTotalSize += recordSize
|
|
@@ -1698,6 +2185,7 @@ class Database extends EventEmitter {
|
|
|
1698
2185
|
this.writeBufferOffsets.push(recordOffset)
|
|
1699
2186
|
this.writeBufferSizes.push(recordSize)
|
|
1700
2187
|
}
|
|
2188
|
+
this.writeBufferTotalSize = runningTotalSize
|
|
1701
2189
|
|
|
1702
2190
|
// OPTIMIZATION: Batch process index updates
|
|
1703
2191
|
if (!this.pendingIndexUpdates) {
|
|
@@ -1705,7 +2193,7 @@ class Database extends EventEmitter {
|
|
|
1705
2193
|
}
|
|
1706
2194
|
|
|
1707
2195
|
for (let i = 0; i < processedRecords.length; i++) {
|
|
1708
|
-
const lineNumber =
|
|
2196
|
+
const lineNumber = this._getAbsoluteLineNumber(existingWriteBufferLength + i)
|
|
1709
2197
|
this.pendingIndexUpdates.push({ record: processedRecords[i], lineNumber })
|
|
1710
2198
|
}
|
|
1711
2199
|
|
|
@@ -1745,7 +2233,7 @@ class Database extends EventEmitter {
|
|
|
1745
2233
|
try {
|
|
1746
2234
|
// Validate indexed query mode if enabled
|
|
1747
2235
|
if (this.opts.indexedQueryMode === 'strict') {
|
|
1748
|
-
this._validateIndexedQuery(criteria)
|
|
2236
|
+
this._validateIndexedQuery(criteria, options)
|
|
1749
2237
|
}
|
|
1750
2238
|
|
|
1751
2239
|
// Get results from file (QueryManager already handles term ID restoration)
|
|
@@ -1820,8 +2308,15 @@ class Database extends EventEmitter {
|
|
|
1820
2308
|
/**
|
|
1821
2309
|
* Validate indexed query mode for strict mode
|
|
1822
2310
|
* @private
|
|
2311
|
+
* @param {Object} criteria - Query criteria
|
|
2312
|
+
* @param {Object} options - Query options
|
|
1823
2313
|
*/
|
|
1824
|
-
_validateIndexedQuery(criteria) {
|
|
2314
|
+
_validateIndexedQuery(criteria, options = {}) {
|
|
2315
|
+
// Allow bypassing strict mode validation with allowNonIndexed option
|
|
2316
|
+
if (options.allowNonIndexed === true) {
|
|
2317
|
+
return; // Skip validation for this query
|
|
2318
|
+
}
|
|
2319
|
+
|
|
1825
2320
|
if (!criteria || typeof criteria !== 'object') {
|
|
1826
2321
|
return // Allow null/undefined criteria
|
|
1827
2322
|
}
|
|
@@ -2094,23 +2589,26 @@ class Database extends EventEmitter {
|
|
|
2094
2589
|
}
|
|
2095
2590
|
}
|
|
2096
2591
|
|
|
2097
|
-
// Update record in writeBuffer or add to writeBuffer if not present
|
|
2592
|
+
// CRITICAL FIX: Update record in writeBuffer or add to writeBuffer if not present
|
|
2593
|
+
// For records in the file, we need to ensure they are properly marked for replacement
|
|
2098
2594
|
const index = this.writeBuffer.findIndex(r => r.id === record.id)
|
|
2099
2595
|
let lineNumber = null
|
|
2596
|
+
|
|
2100
2597
|
if (index !== -1) {
|
|
2101
2598
|
// Record is already in writeBuffer, update it
|
|
2102
2599
|
this.writeBuffer[index] = updated
|
|
2103
|
-
lineNumber = index
|
|
2600
|
+
lineNumber = this._getAbsoluteLineNumber(index)
|
|
2104
2601
|
if (this.opts.debugMode) {
|
|
2105
2602
|
console.log(`🔄 UPDATE: Updated existing writeBuffer record at index ${index}`)
|
|
2106
2603
|
}
|
|
2107
2604
|
} else {
|
|
2108
2605
|
// Record is in file, add updated version to writeBuffer
|
|
2109
|
-
//
|
|
2606
|
+
// CRITICAL FIX: Ensure the old record in file will be replaced by checking if it exists in offsets
|
|
2607
|
+
// The save() method will handle replacement via _streamExistingRecords which checks updatedRecordsMap
|
|
2110
2608
|
this.writeBuffer.push(updated)
|
|
2111
|
-
lineNumber = this.writeBuffer.length - 1
|
|
2609
|
+
lineNumber = this._getAbsoluteLineNumber(this.writeBuffer.length - 1)
|
|
2112
2610
|
if (this.opts.debugMode) {
|
|
2113
|
-
console.log(`🔄 UPDATE: Added
|
|
2611
|
+
console.log(`🔄 UPDATE: Added updated record to writeBuffer (will replace file record ${record.id})`)
|
|
2114
2612
|
}
|
|
2115
2613
|
}
|
|
2116
2614
|
|
|
@@ -2245,16 +2743,7 @@ class Database extends EventEmitter {
|
|
|
2245
2743
|
return
|
|
2246
2744
|
}
|
|
2247
2745
|
|
|
2248
|
-
//
|
|
2249
|
-
if (this.opts.schema && Array.isArray(this.opts.schema)) {
|
|
2250
|
-
this.serializer.initializeSchema(this.opts.schema)
|
|
2251
|
-
if (this.opts.debugMode) {
|
|
2252
|
-
console.log(`🔍 Schema initialized from options: ${this.opts.schema.join(', ')} [${this.instanceId}]`)
|
|
2253
|
-
}
|
|
2254
|
-
return
|
|
2255
|
-
}
|
|
2256
|
-
|
|
2257
|
-
// Try to initialize from fields configuration (new format)
|
|
2746
|
+
// Initialize from fields configuration (mandatory)
|
|
2258
2747
|
if (this.opts.fields && typeof this.opts.fields === 'object') {
|
|
2259
2748
|
const fieldNames = Object.keys(this.opts.fields)
|
|
2260
2749
|
if (fieldNames.length > 0) {
|
|
@@ -2266,7 +2755,7 @@ class Database extends EventEmitter {
|
|
|
2266
2755
|
}
|
|
2267
2756
|
}
|
|
2268
2757
|
|
|
2269
|
-
// Try to auto-detect schema from existing data
|
|
2758
|
+
// Try to auto-detect schema from existing data (fallback for migration scenarios)
|
|
2270
2759
|
if (this.data && this.data.length > 0) {
|
|
2271
2760
|
this.serializer.initializeSchema(this.data, true) // autoDetect = true
|
|
2272
2761
|
if (this.opts.debugMode) {
|
|
@@ -2275,10 +2764,6 @@ class Database extends EventEmitter {
|
|
|
2275
2764
|
return
|
|
2276
2765
|
}
|
|
2277
2766
|
|
|
2278
|
-
// CRITICAL FIX: Don't initialize schema from indexes
|
|
2279
|
-
// This was causing data loss because only indexed fields were preserved
|
|
2280
|
-
// Let schema be auto-detected from actual data instead
|
|
2281
|
-
|
|
2282
2767
|
if (this.opts.debugMode) {
|
|
2283
2768
|
console.log(`🔍 No schema initialization possible - will auto-detect on first insert [${this.instanceId}]`)
|
|
2284
2769
|
}
|
|
@@ -2294,6 +2779,21 @@ class Database extends EventEmitter {
|
|
|
2294
2779
|
const savedRecords = this.offsets.length
|
|
2295
2780
|
const writeBufferRecords = this.writeBuffer.length
|
|
2296
2781
|
|
|
2782
|
+
// CRITICAL FIX: If offsets are empty but indexOffset exists, use fallback calculation
|
|
2783
|
+
// This handles cases where offsets weren't loaded or were reset
|
|
2784
|
+
if (savedRecords === 0 && this.indexOffset > 0 && this.initialized) {
|
|
2785
|
+
// Try to use IndexManager totalLines if available
|
|
2786
|
+
if (this.indexManager && this.indexManager.totalLines > 0) {
|
|
2787
|
+
return this.indexManager.totalLines + writeBufferRecords
|
|
2788
|
+
}
|
|
2789
|
+
|
|
2790
|
+
// Fallback: estimate from indexOffset (less accurate but better than 0)
|
|
2791
|
+
// This is a defensive fix for cases where offsets are missing but file has data
|
|
2792
|
+
if (this.opts.debugMode) {
|
|
2793
|
+
console.log(`⚠️ LENGTH: offsets array is empty but indexOffset=${this.indexOffset}, using IndexManager.totalLines or estimation`)
|
|
2794
|
+
}
|
|
2795
|
+
}
|
|
2796
|
+
|
|
2297
2797
|
// CRITICAL FIX: Validate that offsets array is consistent with actual data
|
|
2298
2798
|
// This prevents the bug where database reassignment causes desynchronization
|
|
2299
2799
|
if (this.initialized && savedRecords > 0) {
|
|
@@ -2339,22 +2839,7 @@ class Database extends EventEmitter {
|
|
|
2339
2839
|
* Calculate current writeBuffer size in bytes (similar to published v1.1.0)
|
|
2340
2840
|
*/
|
|
2341
2841
|
currentWriteBufferSize() {
|
|
2342
|
-
|
|
2343
|
-
return 0
|
|
2344
|
-
}
|
|
2345
|
-
|
|
2346
|
-
// Calculate total size of all records in writeBuffer
|
|
2347
|
-
let totalSize = 0
|
|
2348
|
-
for (const record of this.writeBuffer) {
|
|
2349
|
-
if (record) {
|
|
2350
|
-
// SPACE OPTIMIZATION: Remove term IDs before size calculation
|
|
2351
|
-
const cleanRecord = this.removeTermIdsForSerialization(record)
|
|
2352
|
-
const recordJson = JSON.stringify(cleanRecord) + '\n'
|
|
2353
|
-
totalSize += Buffer.byteLength(recordJson, 'utf8')
|
|
2354
|
-
}
|
|
2355
|
-
}
|
|
2356
|
-
|
|
2357
|
-
return totalSize
|
|
2842
|
+
return this.writeBufferTotalSize || 0
|
|
2358
2843
|
}
|
|
2359
2844
|
|
|
2360
2845
|
/**
|
|
@@ -2387,21 +2872,195 @@ class Database extends EventEmitter {
|
|
|
2387
2872
|
}
|
|
2388
2873
|
|
|
2389
2874
|
/**
|
|
2390
|
-
*
|
|
2391
|
-
*
|
|
2392
|
-
* If anything is still active, it indicates a bug - log error and force cleanup
|
|
2875
|
+
* Schedule index rebuild when index data is missing or corrupted
|
|
2876
|
+
* @private
|
|
2393
2877
|
*/
|
|
2394
|
-
|
|
2395
|
-
|
|
2878
|
+
_scheduleIndexRebuild() {
|
|
2879
|
+
// Mark that rebuild is needed
|
|
2880
|
+
this._indexRebuildNeeded = true
|
|
2396
2881
|
|
|
2397
|
-
//
|
|
2398
|
-
|
|
2399
|
-
|
|
2400
|
-
|
|
2401
|
-
|
|
2402
|
-
|
|
2403
|
-
|
|
2404
|
-
|
|
2882
|
+
// Rebuild will happen lazily on first query if index is empty
|
|
2883
|
+
// This avoids blocking init() but ensures index is available when needed
|
|
2884
|
+
}
|
|
2885
|
+
|
|
2886
|
+
/**
|
|
2887
|
+
* Rebuild indexes from data file if needed
|
|
2888
|
+
* @private
|
|
2889
|
+
*/
|
|
2890
|
+
async _rebuildIndexesIfNeeded() {
|
|
2891
|
+
if (this.opts.debugMode) {
|
|
2892
|
+
console.log(`🔍 _rebuildIndexesIfNeeded called: _indexRebuildNeeded=${this._indexRebuildNeeded}`)
|
|
2893
|
+
}
|
|
2894
|
+
if (!this._indexRebuildNeeded) return
|
|
2895
|
+
if (!this.indexManager || !this.indexManager.indexedFields || this.indexManager.indexedFields.length === 0) return
|
|
2896
|
+
|
|
2897
|
+
// Check if index actually needs rebuilding
|
|
2898
|
+
let needsRebuild = false
|
|
2899
|
+
for (const field of this.indexManager.indexedFields) {
|
|
2900
|
+
if (!this.indexManager.hasUsableIndexData(field)) {
|
|
2901
|
+
needsRebuild = true
|
|
2902
|
+
break
|
|
2903
|
+
}
|
|
2904
|
+
}
|
|
2905
|
+
|
|
2906
|
+
if (!needsRebuild) {
|
|
2907
|
+
this._indexRebuildNeeded = false
|
|
2908
|
+
return
|
|
2909
|
+
}
|
|
2910
|
+
|
|
2911
|
+
// Check if rebuild is allowed
|
|
2912
|
+
if (!this.opts.allowIndexRebuild) {
|
|
2913
|
+
const idxPath = this.normalizedFile.replace('.jdb', '.idx.jdb')
|
|
2914
|
+
throw new Error(
|
|
2915
|
+
`Index rebuild required but disabled: Index file ${idxPath} is corrupted or missing, ` +
|
|
2916
|
+
`and allowIndexRebuild is set to false. ` +
|
|
2917
|
+
`Set allowIndexRebuild: true to automatically rebuild the index, ` +
|
|
2918
|
+
`or manually fix/delete the corrupted index file.`
|
|
2919
|
+
)
|
|
2920
|
+
}
|
|
2921
|
+
|
|
2922
|
+
if (this.opts.debugMode) {
|
|
2923
|
+
console.log('🔨 Rebuilding indexes from data file...')
|
|
2924
|
+
}
|
|
2925
|
+
|
|
2926
|
+
try {
|
|
2927
|
+
// Read all records and rebuild index
|
|
2928
|
+
let count = 0
|
|
2929
|
+
const startTime = Date.now()
|
|
2930
|
+
|
|
2931
|
+
// Auto-detect schema from first line if not initialized
|
|
2932
|
+
if (!this.serializer.schemaManager.isInitialized) {
|
|
2933
|
+
const fs = await import('fs')
|
|
2934
|
+
const readline = await import('readline')
|
|
2935
|
+
const stream = fs.createReadStream(this.fileHandler.file, {
|
|
2936
|
+
highWaterMark: 64 * 1024,
|
|
2937
|
+
encoding: 'utf8'
|
|
2938
|
+
})
|
|
2939
|
+
const rl = readline.createInterface({
|
|
2940
|
+
input: stream,
|
|
2941
|
+
crlfDelay: Infinity
|
|
2942
|
+
})
|
|
2943
|
+
|
|
2944
|
+
for await (const line of rl) {
|
|
2945
|
+
if (line && line.trim()) {
|
|
2946
|
+
try {
|
|
2947
|
+
const firstRecord = JSON.parse(line)
|
|
2948
|
+
if (Array.isArray(firstRecord)) {
|
|
2949
|
+
// Try to infer schema from opts.fields if available
|
|
2950
|
+
if (this.opts.fields && typeof this.opts.fields === 'object') {
|
|
2951
|
+
const fieldNames = Object.keys(this.opts.fields)
|
|
2952
|
+
if (fieldNames.length >= firstRecord.length) {
|
|
2953
|
+
// Use first N fields from opts.fields to match array length
|
|
2954
|
+
const schema = fieldNames.slice(0, firstRecord.length)
|
|
2955
|
+
this.serializer.initializeSchema(schema)
|
|
2956
|
+
if (this.opts.debugMode) {
|
|
2957
|
+
console.log(`🔍 Inferred schema from opts.fields: ${schema.join(', ')}`)
|
|
2958
|
+
}
|
|
2959
|
+
} else {
|
|
2960
|
+
throw new Error(`Cannot rebuild index: array has ${firstRecord.length} elements but opts.fields only defines ${fieldNames.length} fields. Schema must be explicitly provided.`)
|
|
2961
|
+
}
|
|
2962
|
+
} else {
|
|
2963
|
+
throw new Error('Cannot rebuild index: schema missing, file uses array format, and opts.fields not provided. The .idx.jdb file is corrupted.')
|
|
2964
|
+
}
|
|
2965
|
+
} else {
|
|
2966
|
+
// Object format, initialize from object keys
|
|
2967
|
+
this.serializer.initializeSchema(firstRecord, true)
|
|
2968
|
+
if (this.opts.debugMode) {
|
|
2969
|
+
console.log(`🔍 Auto-detected schema from object: ${Object.keys(firstRecord).join(', ')}`)
|
|
2970
|
+
}
|
|
2971
|
+
}
|
|
2972
|
+
break
|
|
2973
|
+
} catch (error) {
|
|
2974
|
+
if (this.opts.debugMode) {
|
|
2975
|
+
console.error('❌ Failed to auto-detect schema:', error.message)
|
|
2976
|
+
}
|
|
2977
|
+
throw error
|
|
2978
|
+
}
|
|
2979
|
+
}
|
|
2980
|
+
}
|
|
2981
|
+
stream.destroy()
|
|
2982
|
+
}
|
|
2983
|
+
|
|
2984
|
+
// Use streaming to read records without loading everything into memory
|
|
2985
|
+
// Also rebuild offsets while we're at it
|
|
2986
|
+
const fs = await import('fs')
|
|
2987
|
+
const readline = await import('readline')
|
|
2988
|
+
|
|
2989
|
+
this.offsets = []
|
|
2990
|
+
let currentOffset = 0
|
|
2991
|
+
|
|
2992
|
+
const stream = fs.createReadStream(this.fileHandler.file, {
|
|
2993
|
+
highWaterMark: 64 * 1024,
|
|
2994
|
+
encoding: 'utf8'
|
|
2995
|
+
})
|
|
2996
|
+
|
|
2997
|
+
const rl = readline.createInterface({
|
|
2998
|
+
input: stream,
|
|
2999
|
+
crlfDelay: Infinity
|
|
3000
|
+
})
|
|
3001
|
+
|
|
3002
|
+
try {
|
|
3003
|
+
for await (const line of rl) {
|
|
3004
|
+
if (line && line.trim()) {
|
|
3005
|
+
try {
|
|
3006
|
+
// Record the offset for this line
|
|
3007
|
+
this.offsets.push(currentOffset)
|
|
3008
|
+
|
|
3009
|
+
const record = this.serializer.deserialize(line)
|
|
3010
|
+
const recordWithTerms = this.restoreTermIdsAfterDeserialization(record)
|
|
3011
|
+
await this.indexManager.add(recordWithTerms, count)
|
|
3012
|
+
count++
|
|
3013
|
+
} catch (error) {
|
|
3014
|
+
// Skip invalid lines
|
|
3015
|
+
if (this.opts.debugMode) {
|
|
3016
|
+
console.log(`⚠️ Rebuild: Failed to deserialize line ${count}:`, error.message)
|
|
3017
|
+
}
|
|
3018
|
+
}
|
|
3019
|
+
}
|
|
3020
|
+
// Update offset for next line (including newline character)
|
|
3021
|
+
currentOffset += Buffer.byteLength(line, 'utf8') + 1
|
|
3022
|
+
}
|
|
3023
|
+
} finally {
|
|
3024
|
+
stream.destroy()
|
|
3025
|
+
}
|
|
3026
|
+
|
|
3027
|
+
// Update indexManager totalLines
|
|
3028
|
+
if (this.indexManager) {
|
|
3029
|
+
this.indexManager.setTotalLines(this.offsets.length)
|
|
3030
|
+
}
|
|
3031
|
+
|
|
3032
|
+
this._indexRebuildNeeded = false
|
|
3033
|
+
|
|
3034
|
+
if (this.opts.debugMode) {
|
|
3035
|
+
console.log(`✅ Index rebuilt from ${count} records in ${Date.now() - startTime}ms`)
|
|
3036
|
+
}
|
|
3037
|
+
|
|
3038
|
+
// Save the rebuilt index
|
|
3039
|
+
await this._saveIndexDataToFile()
|
|
3040
|
+
} catch (error) {
|
|
3041
|
+
if (this.opts.debugMode) {
|
|
3042
|
+
console.error('❌ Failed to rebuild indexes:', error.message)
|
|
3043
|
+
}
|
|
3044
|
+
// Don't throw - queries will fall back to streaming
|
|
3045
|
+
}
|
|
3046
|
+
}
|
|
3047
|
+
|
|
3048
|
+
/**
|
|
3049
|
+
* Destroy database - DESTRUCTIVE MODE
|
|
3050
|
+
* Assumes save() has already been called by user
|
|
3051
|
+
* If anything is still active, it indicates a bug - log error and force cleanup
|
|
3052
|
+
*/
|
|
3053
|
+
async destroy() {
|
|
3054
|
+
if (this.destroyed) return
|
|
3055
|
+
|
|
3056
|
+
// Mark as destroying immediately to prevent new operations
|
|
3057
|
+
this.destroying = true
|
|
3058
|
+
|
|
3059
|
+
// Wait for all active insert sessions to complete before destroying
|
|
3060
|
+
if (this.activeInsertSessions.size > 0) {
|
|
3061
|
+
if (this.opts.debugMode) {
|
|
3062
|
+
console.log(`⏳ destroy: Waiting for ${this.activeInsertSessions.size} active insert sessions`)
|
|
3063
|
+
}
|
|
2405
3064
|
|
|
2406
3065
|
const sessionPromises = Array.from(this.activeInsertSessions).map(session =>
|
|
2407
3066
|
session.waitForOperations(null) // Wait indefinitely for sessions to complete
|
|
@@ -2503,6 +3162,8 @@ class Database extends EventEmitter {
|
|
|
2503
3162
|
this.writeBuffer = []
|
|
2504
3163
|
this.writeBufferOffsets = []
|
|
2505
3164
|
this.writeBufferSizes = []
|
|
3165
|
+
this.writeBufferTotalSize = 0
|
|
3166
|
+
this.writeBufferTotalSize = 0
|
|
2506
3167
|
this.deletedIds.clear()
|
|
2507
3168
|
this.pendingOperations.clear()
|
|
2508
3169
|
this.pendingIndexUpdates = []
|
|
@@ -2570,8 +3231,211 @@ class Database extends EventEmitter {
|
|
|
2570
3231
|
async count(criteria = {}, options = {}) {
|
|
2571
3232
|
this._validateInitialization('count')
|
|
2572
3233
|
|
|
2573
|
-
|
|
2574
|
-
|
|
3234
|
+
// OPTIMIZATION: Use queryManager.count() instead of find() for better performance
|
|
3235
|
+
// This is especially faster for indexed queries which can use indexManager.query().size
|
|
3236
|
+
const fileCount = await this.queryManager.count(criteria, options)
|
|
3237
|
+
|
|
3238
|
+
// Count matching records in writeBuffer
|
|
3239
|
+
const writeBufferCount = this.writeBuffer.filter(record =>
|
|
3240
|
+
this.queryManager.matchesCriteria(record, criteria, options)
|
|
3241
|
+
).length
|
|
3242
|
+
|
|
3243
|
+
return fileCount + writeBufferCount
|
|
3244
|
+
}
|
|
3245
|
+
|
|
3246
|
+
/**
|
|
3247
|
+
* Check if any records exist for given field and terms (index-only, ultra-fast)
|
|
3248
|
+
* Delegates to IndexManager.exists() for maximum performance
|
|
3249
|
+
*
|
|
3250
|
+
* @param {string} fieldName - Indexed field name
|
|
3251
|
+
* @param {string|Array<string>} terms - Single term or array of terms
|
|
3252
|
+
* @param {Object} options - Options: { $all: true/false, caseInsensitive: true/false, excludes: Array<string> }
|
|
3253
|
+
* @returns {Promise<boolean>} - True if at least one match exists
|
|
3254
|
+
*
|
|
3255
|
+
* @example
|
|
3256
|
+
* // Check if channel exists
|
|
3257
|
+
* const exists = await db.exists('nameTerms', ['a', 'e'], { $all: true });
|
|
3258
|
+
*
|
|
3259
|
+
* @example
|
|
3260
|
+
* // Check if 'tv' exists but not 'globo'
|
|
3261
|
+
* const exists = await db.exists('nameTerms', 'tv', { excludes: ['globo'] });
|
|
3262
|
+
*/
|
|
3263
|
+
async exists(fieldName, terms, options = {}) {
|
|
3264
|
+
this._validateInitialization('exists')
|
|
3265
|
+
return this.indexManager.exists(fieldName, terms, options)
|
|
3266
|
+
}
|
|
3267
|
+
|
|
3268
|
+
/**
|
|
3269
|
+
* Calculate coverage for grouped include/exclude term sets
|
|
3270
|
+
* @param {string} fieldName - Name of the indexed field
|
|
3271
|
+
* @param {Array<object>} groups - Array of { terms, excludes } objects
|
|
3272
|
+
* @param {object} options - Optional settings
|
|
3273
|
+
* @returns {Promise<number>} Coverage percentage between 0 and 100
|
|
3274
|
+
*/
|
|
3275
|
+
async coverage(fieldName, groups, options = {}) {
|
|
3276
|
+
this._validateInitialization('coverage')
|
|
3277
|
+
|
|
3278
|
+
if (typeof fieldName !== 'string' || !fieldName.trim()) {
|
|
3279
|
+
throw new Error('fieldName must be a non-empty string')
|
|
3280
|
+
}
|
|
3281
|
+
|
|
3282
|
+
if (!Array.isArray(groups)) {
|
|
3283
|
+
throw new Error('groups must be an array')
|
|
3284
|
+
}
|
|
3285
|
+
|
|
3286
|
+
if (groups.length === 0) {
|
|
3287
|
+
return 0
|
|
3288
|
+
}
|
|
3289
|
+
|
|
3290
|
+
if (!this.opts.indexes || !this.opts.indexes[fieldName]) {
|
|
3291
|
+
throw new Error(`Field "${fieldName}" is not indexed`)
|
|
3292
|
+
}
|
|
3293
|
+
|
|
3294
|
+
const fieldType = this.opts.indexes[fieldName]
|
|
3295
|
+
const supportedTypes = ['array:string', 'string']
|
|
3296
|
+
if (!supportedTypes.includes(fieldType)) {
|
|
3297
|
+
throw new Error(`coverage() only supports fields of type ${supportedTypes.join(', ')} (found: ${fieldType})`)
|
|
3298
|
+
}
|
|
3299
|
+
|
|
3300
|
+
const fieldIndex = this.indexManager?.index?.data?.[fieldName]
|
|
3301
|
+
if (!fieldIndex) {
|
|
3302
|
+
return 0
|
|
3303
|
+
}
|
|
3304
|
+
|
|
3305
|
+
const isTermMapped = this.termManager &&
|
|
3306
|
+
this.termManager.termMappingFields &&
|
|
3307
|
+
this.termManager.termMappingFields.includes(fieldName)
|
|
3308
|
+
|
|
3309
|
+
const normalizeTerm = (term) => {
|
|
3310
|
+
if (term === undefined || term === null) {
|
|
3311
|
+
return ''
|
|
3312
|
+
}
|
|
3313
|
+
return String(term).trim()
|
|
3314
|
+
}
|
|
3315
|
+
|
|
3316
|
+
const resolveKey = (term) => {
|
|
3317
|
+
if (isTermMapped) {
|
|
3318
|
+
const termId = this.termManager.getTermIdWithoutIncrement(term)
|
|
3319
|
+
if (termId === null || termId === undefined) {
|
|
3320
|
+
return null
|
|
3321
|
+
}
|
|
3322
|
+
return String(termId)
|
|
3323
|
+
}
|
|
3324
|
+
return String(term)
|
|
3325
|
+
}
|
|
3326
|
+
|
|
3327
|
+
let matchedGroups = 0
|
|
3328
|
+
|
|
3329
|
+
for (const group of groups) {
|
|
3330
|
+
if (!group || typeof group !== 'object') {
|
|
3331
|
+
throw new Error('Each coverage group must be an object')
|
|
3332
|
+
}
|
|
3333
|
+
|
|
3334
|
+
const includeTermsRaw = Array.isArray(group.terms) ? group.terms : []
|
|
3335
|
+
const excludeTermsRaw = Array.isArray(group.excludes) ? group.excludes : []
|
|
3336
|
+
|
|
3337
|
+
const includeTerms = Array.from(new Set(
|
|
3338
|
+
includeTermsRaw
|
|
3339
|
+
.map(normalizeTerm)
|
|
3340
|
+
.filter(term => term.length > 0)
|
|
3341
|
+
))
|
|
3342
|
+
|
|
3343
|
+
if (includeTerms.length === 0) {
|
|
3344
|
+
throw new Error('Each coverage group must define at least one term')
|
|
3345
|
+
}
|
|
3346
|
+
|
|
3347
|
+
const excludeTerms = Array.from(new Set(
|
|
3348
|
+
excludeTermsRaw
|
|
3349
|
+
.map(normalizeTerm)
|
|
3350
|
+
.filter(term => term.length > 0)
|
|
3351
|
+
))
|
|
3352
|
+
|
|
3353
|
+
let candidateLines = null
|
|
3354
|
+
let groupMatched = true
|
|
3355
|
+
|
|
3356
|
+
for (const term of includeTerms) {
|
|
3357
|
+
const key = resolveKey(term)
|
|
3358
|
+
if (key === null) {
|
|
3359
|
+
groupMatched = false
|
|
3360
|
+
break
|
|
3361
|
+
}
|
|
3362
|
+
|
|
3363
|
+
const termData = fieldIndex[key]
|
|
3364
|
+
if (!termData) {
|
|
3365
|
+
groupMatched = false
|
|
3366
|
+
break
|
|
3367
|
+
}
|
|
3368
|
+
|
|
3369
|
+
const lineNumbers = this.indexManager._getAllLineNumbers(termData)
|
|
3370
|
+
if (!lineNumbers || lineNumbers.length === 0) {
|
|
3371
|
+
groupMatched = false
|
|
3372
|
+
break
|
|
3373
|
+
}
|
|
3374
|
+
|
|
3375
|
+
if (candidateLines === null) {
|
|
3376
|
+
candidateLines = new Set(lineNumbers)
|
|
3377
|
+
} else {
|
|
3378
|
+
const termSet = new Set(lineNumbers)
|
|
3379
|
+
for (const line of Array.from(candidateLines)) {
|
|
3380
|
+
if (!termSet.has(line)) {
|
|
3381
|
+
candidateLines.delete(line)
|
|
3382
|
+
}
|
|
3383
|
+
}
|
|
3384
|
+
}
|
|
3385
|
+
|
|
3386
|
+
if (!candidateLines || candidateLines.size === 0) {
|
|
3387
|
+
groupMatched = false
|
|
3388
|
+
break
|
|
3389
|
+
}
|
|
3390
|
+
}
|
|
3391
|
+
|
|
3392
|
+
if (!groupMatched || !candidateLines || candidateLines.size === 0) {
|
|
3393
|
+
continue
|
|
3394
|
+
}
|
|
3395
|
+
|
|
3396
|
+
for (const term of excludeTerms) {
|
|
3397
|
+
const key = resolveKey(term)
|
|
3398
|
+
if (key === null) {
|
|
3399
|
+
continue
|
|
3400
|
+
}
|
|
3401
|
+
|
|
3402
|
+
const termData = fieldIndex[key]
|
|
3403
|
+
if (!termData) {
|
|
3404
|
+
continue
|
|
3405
|
+
}
|
|
3406
|
+
|
|
3407
|
+
const excludeLines = this.indexManager._getAllLineNumbers(termData)
|
|
3408
|
+
if (!excludeLines || excludeLines.length === 0) {
|
|
3409
|
+
continue
|
|
3410
|
+
}
|
|
3411
|
+
|
|
3412
|
+
for (const line of excludeLines) {
|
|
3413
|
+
if (!candidateLines.size) {
|
|
3414
|
+
break
|
|
3415
|
+
}
|
|
3416
|
+
candidateLines.delete(line)
|
|
3417
|
+
}
|
|
3418
|
+
|
|
3419
|
+
if (!candidateLines.size) {
|
|
3420
|
+
break
|
|
3421
|
+
}
|
|
3422
|
+
}
|
|
3423
|
+
|
|
3424
|
+
if (candidateLines && candidateLines.size > 0) {
|
|
3425
|
+
matchedGroups++
|
|
3426
|
+
}
|
|
3427
|
+
}
|
|
3428
|
+
|
|
3429
|
+
if (matchedGroups === 0) {
|
|
3430
|
+
return 0
|
|
3431
|
+
}
|
|
3432
|
+
|
|
3433
|
+
const precision = typeof options.precision === 'number' && options.precision >= 0
|
|
3434
|
+
? options.precision
|
|
3435
|
+
: 2
|
|
3436
|
+
|
|
3437
|
+
const coverageValue = (matchedGroups / groups.length) * 100
|
|
3438
|
+
return Number(coverageValue.toFixed(precision))
|
|
2575
3439
|
}
|
|
2576
3440
|
|
|
2577
3441
|
/**
|
|
@@ -2589,7 +3453,8 @@ class Database extends EventEmitter {
|
|
|
2589
3453
|
const opts = {
|
|
2590
3454
|
limit: options.limit ?? 100,
|
|
2591
3455
|
sort: options.sort ?? 'desc',
|
|
2592
|
-
includeScore: options.includeScore !== false
|
|
3456
|
+
includeScore: options.includeScore !== false,
|
|
3457
|
+
mode: options.mode ?? 'sum'
|
|
2593
3458
|
}
|
|
2594
3459
|
|
|
2595
3460
|
// Validate fieldName
|
|
@@ -2613,6 +3478,12 @@ class Database extends EventEmitter {
|
|
|
2613
3478
|
throw new Error(`Score value for term "${term}" must be a number`)
|
|
2614
3479
|
}
|
|
2615
3480
|
}
|
|
3481
|
+
|
|
3482
|
+
// Validate mode
|
|
3483
|
+
const allowedModes = new Set(['sum', 'max', 'avg', 'first'])
|
|
3484
|
+
if (!allowedModes.has(opts.mode)) {
|
|
3485
|
+
throw new Error(`Invalid score mode "${opts.mode}". Must be one of: ${Array.from(allowedModes).join(', ')}`)
|
|
3486
|
+
}
|
|
2616
3487
|
|
|
2617
3488
|
// Check if field is indexed and is array:string type
|
|
2618
3489
|
if (!this.opts.indexes || !this.opts.indexes[fieldName]) {
|
|
@@ -2637,6 +3508,7 @@ class Database extends EventEmitter {
|
|
|
2637
3508
|
|
|
2638
3509
|
// Accumulate scores for each line number
|
|
2639
3510
|
const scoreMap = new Map()
|
|
3511
|
+
const countMap = opts.mode === 'avg' ? new Map() : null
|
|
2640
3512
|
|
|
2641
3513
|
// Iterate through each term in the scores object
|
|
2642
3514
|
for (const [term, weight] of Object.entries(scores)) {
|
|
@@ -2666,8 +3538,44 @@ class Database extends EventEmitter {
|
|
|
2666
3538
|
|
|
2667
3539
|
// Add weight to score for each line number
|
|
2668
3540
|
for (const lineNumber of lineNumbers) {
|
|
2669
|
-
const currentScore = scoreMap.get(lineNumber)
|
|
2670
|
-
|
|
3541
|
+
const currentScore = scoreMap.get(lineNumber)
|
|
3542
|
+
|
|
3543
|
+
switch (opts.mode) {
|
|
3544
|
+
case 'sum': {
|
|
3545
|
+
const nextScore = (currentScore || 0) + weight
|
|
3546
|
+
scoreMap.set(lineNumber, nextScore)
|
|
3547
|
+
break
|
|
3548
|
+
}
|
|
3549
|
+
case 'max': {
|
|
3550
|
+
if (currentScore === undefined) {
|
|
3551
|
+
scoreMap.set(lineNumber, weight)
|
|
3552
|
+
} else {
|
|
3553
|
+
scoreMap.set(lineNumber, Math.max(currentScore, weight))
|
|
3554
|
+
}
|
|
3555
|
+
break
|
|
3556
|
+
}
|
|
3557
|
+
case 'avg': {
|
|
3558
|
+
const previous = currentScore || 0
|
|
3559
|
+
scoreMap.set(lineNumber, previous + weight)
|
|
3560
|
+
const count = (countMap.get(lineNumber) || 0) + 1
|
|
3561
|
+
countMap.set(lineNumber, count)
|
|
3562
|
+
break
|
|
3563
|
+
}
|
|
3564
|
+
case 'first': {
|
|
3565
|
+
if (currentScore === undefined) {
|
|
3566
|
+
scoreMap.set(lineNumber, weight)
|
|
3567
|
+
}
|
|
3568
|
+
break
|
|
3569
|
+
}
|
|
3570
|
+
}
|
|
3571
|
+
}
|
|
3572
|
+
}
|
|
3573
|
+
|
|
3574
|
+
// For average mode, divide total by count
|
|
3575
|
+
if (opts.mode === 'avg') {
|
|
3576
|
+
for (const [lineNumber, totalScore] of scoreMap.entries()) {
|
|
3577
|
+
const count = countMap.get(lineNumber) || 1
|
|
3578
|
+
scoreMap.set(lineNumber, totalScore / count)
|
|
2671
3579
|
}
|
|
2672
3580
|
}
|
|
2673
3581
|
|
|
@@ -2693,24 +3601,83 @@ class Database extends EventEmitter {
|
|
|
2693
3601
|
const lineNumbers = limitedEntries.map(([lineNumber]) => lineNumber)
|
|
2694
3602
|
const scoresByLineNumber = new Map(limitedEntries)
|
|
2695
3603
|
|
|
2696
|
-
|
|
2697
|
-
const ranges = this.getRanges(lineNumbers)
|
|
2698
|
-
const groupedRanges = await this.fileHandler.groupedRanges(ranges)
|
|
3604
|
+
const persistedCount = Array.isArray(this.offsets) ? this.offsets.length : 0
|
|
2699
3605
|
|
|
2700
|
-
|
|
2701
|
-
const
|
|
3606
|
+
// Separate lineNumbers into file records and writeBuffer records
|
|
3607
|
+
const fileLineNumbers = []
|
|
3608
|
+
const writeBufferLineNumbers = []
|
|
3609
|
+
|
|
3610
|
+
for (const lineNumber of lineNumbers) {
|
|
3611
|
+
if (lineNumber >= persistedCount) {
|
|
3612
|
+
// This lineNumber points to writeBuffer
|
|
3613
|
+
writeBufferLineNumbers.push(lineNumber)
|
|
3614
|
+
} else {
|
|
3615
|
+
// This lineNumber points to file
|
|
3616
|
+
fileLineNumbers.push(lineNumber)
|
|
3617
|
+
}
|
|
3618
|
+
}
|
|
2702
3619
|
|
|
2703
3620
|
const results = []
|
|
2704
3621
|
|
|
2705
|
-
|
|
2706
|
-
|
|
2707
|
-
|
|
2708
|
-
|
|
2709
|
-
|
|
2710
|
-
|
|
2711
|
-
|
|
2712
|
-
|
|
2713
|
-
|
|
3622
|
+
// Read records from file
|
|
3623
|
+
if (fileLineNumbers.length > 0) {
|
|
3624
|
+
const ranges = this.getRanges(fileLineNumbers)
|
|
3625
|
+
if (ranges.length > 0) {
|
|
3626
|
+
// Create a map from start offset to lineNumber for accurate mapping
|
|
3627
|
+
const startToLineNumber = new Map()
|
|
3628
|
+
for (const range of ranges) {
|
|
3629
|
+
if (range.index !== undefined) {
|
|
3630
|
+
startToLineNumber.set(range.start, range.index)
|
|
3631
|
+
}
|
|
3632
|
+
}
|
|
3633
|
+
|
|
3634
|
+
const groupedRanges = await this.fileHandler.groupedRanges(ranges)
|
|
3635
|
+
|
|
3636
|
+
const fs = await import('fs')
|
|
3637
|
+
const fd = await fs.promises.open(this.fileHandler.file, 'r')
|
|
3638
|
+
|
|
3639
|
+
try {
|
|
3640
|
+
for (const groupedRange of groupedRanges) {
|
|
3641
|
+
for await (const row of this.fileHandler.readGroupedRange(groupedRange, fd)) {
|
|
3642
|
+
try {
|
|
3643
|
+
const record = this.serializer.deserialize(row.line)
|
|
3644
|
+
|
|
3645
|
+
// Get line number from the row, fallback to start offset mapping
|
|
3646
|
+
let lineNumber = row._ !== null && row._ !== undefined ? row._ : (startToLineNumber.get(row.start) ?? 0)
|
|
3647
|
+
|
|
3648
|
+
// Restore term IDs to terms
|
|
3649
|
+
const recordWithTerms = this.restoreTermIdsAfterDeserialization(record)
|
|
3650
|
+
|
|
3651
|
+
// Add line number
|
|
3652
|
+
recordWithTerms._ = lineNumber
|
|
3653
|
+
|
|
3654
|
+
// Add score if includeScore is true (default is true)
|
|
3655
|
+
if (opts.includeScore !== false) {
|
|
3656
|
+
recordWithTerms.score = scoresByLineNumber.get(lineNumber) || 0
|
|
3657
|
+
}
|
|
3658
|
+
|
|
3659
|
+
results.push(recordWithTerms)
|
|
3660
|
+
} catch (error) {
|
|
3661
|
+
// Skip invalid lines
|
|
3662
|
+
if (this.opts.debugMode) {
|
|
3663
|
+
console.error('Error deserializing record in score():', error)
|
|
3664
|
+
}
|
|
3665
|
+
}
|
|
3666
|
+
}
|
|
3667
|
+
}
|
|
3668
|
+
} finally {
|
|
3669
|
+
await fd.close()
|
|
3670
|
+
}
|
|
3671
|
+
}
|
|
3672
|
+
}
|
|
3673
|
+
|
|
3674
|
+
// Read records from writeBuffer
|
|
3675
|
+
if (writeBufferLineNumbers.length > 0 && this.writeBuffer) {
|
|
3676
|
+
for (const lineNumber of writeBufferLineNumbers) {
|
|
3677
|
+
const writeBufferIndex = lineNumber - persistedCount
|
|
3678
|
+
if (writeBufferIndex >= 0 && writeBufferIndex < this.writeBuffer.length) {
|
|
3679
|
+
const record = this.writeBuffer[writeBufferIndex]
|
|
3680
|
+
if (record) {
|
|
2714
3681
|
// Restore term IDs to terms
|
|
2715
3682
|
const recordWithTerms = this.restoreTermIdsAfterDeserialization(record)
|
|
2716
3683
|
|
|
@@ -2723,16 +3690,9 @@ class Database extends EventEmitter {
|
|
|
2723
3690
|
}
|
|
2724
3691
|
|
|
2725
3692
|
results.push(recordWithTerms)
|
|
2726
|
-
} catch (error) {
|
|
2727
|
-
// Skip invalid lines
|
|
2728
|
-
if (this.opts.debugMode) {
|
|
2729
|
-
console.error('Error deserializing record in score():', error)
|
|
2730
|
-
}
|
|
2731
3693
|
}
|
|
2732
3694
|
}
|
|
2733
3695
|
}
|
|
2734
|
-
} finally {
|
|
2735
|
-
await fd.close()
|
|
2736
3696
|
}
|
|
2737
3697
|
|
|
2738
3698
|
// Re-sort results to maintain score order (since reads might be out of order)
|
|
@@ -2944,11 +3904,52 @@ class Database extends EventEmitter {
|
|
|
2944
3904
|
}
|
|
2945
3905
|
|
|
2946
3906
|
// CRITICAL FIX: Only remove processed items from writeBuffer after all async operations complete
|
|
2947
|
-
// OPTIMIZATION: Use Set.has() for O(1) lookup - same Set used for processing
|
|
2948
3907
|
const beforeLength = this.writeBuffer.length
|
|
2949
|
-
|
|
3908
|
+
if (beforeLength > 0) {
|
|
3909
|
+
const originalRecords = this.writeBuffer
|
|
3910
|
+
const originalOffsets = this.writeBufferOffsets
|
|
3911
|
+
const originalSizes = this.writeBufferSizes
|
|
3912
|
+
const retainedRecords = []
|
|
3913
|
+
const retainedOffsets = []
|
|
3914
|
+
const retainedSizes = []
|
|
3915
|
+
let retainedTotal = 0
|
|
3916
|
+
let removedCount = 0
|
|
3917
|
+
|
|
3918
|
+
for (let i = 0; i < originalRecords.length; i++) {
|
|
3919
|
+
const record = originalRecords[i]
|
|
3920
|
+
if (itemsToProcess.has(record)) {
|
|
3921
|
+
removedCount++
|
|
3922
|
+
continue
|
|
3923
|
+
}
|
|
3924
|
+
|
|
3925
|
+
retainedRecords.push(record)
|
|
3926
|
+
if (originalOffsets && i < originalOffsets.length) {
|
|
3927
|
+
retainedOffsets.push(originalOffsets[i])
|
|
3928
|
+
}
|
|
3929
|
+
if (originalSizes && i < originalSizes.length) {
|
|
3930
|
+
const size = originalSizes[i]
|
|
3931
|
+
if (size !== undefined) {
|
|
3932
|
+
retainedSizes.push(size)
|
|
3933
|
+
retainedTotal += size
|
|
3934
|
+
}
|
|
3935
|
+
}
|
|
3936
|
+
}
|
|
3937
|
+
|
|
3938
|
+
if (removedCount > 0) {
|
|
3939
|
+
this.writeBuffer = retainedRecords
|
|
3940
|
+
this.writeBufferOffsets = retainedOffsets
|
|
3941
|
+
this.writeBufferSizes = retainedSizes
|
|
3942
|
+
this.writeBufferTotalSize = retainedTotal
|
|
3943
|
+
}
|
|
3944
|
+
}
|
|
2950
3945
|
const afterLength = this.writeBuffer.length
|
|
2951
3946
|
|
|
3947
|
+
if (afterLength === 0) {
|
|
3948
|
+
this.writeBufferOffsets = []
|
|
3949
|
+
this.writeBufferSizes = []
|
|
3950
|
+
this.writeBufferTotalSize = 0
|
|
3951
|
+
}
|
|
3952
|
+
|
|
2952
3953
|
if (this.opts.debugMode && beforeLength !== afterLength) {
|
|
2953
3954
|
console.log(`💾 _processWriteBuffer: Removed ${beforeLength - afterLength} items from writeBuffer (${beforeLength} -> ${afterLength})`)
|
|
2954
3955
|
}
|
|
@@ -3033,9 +4034,11 @@ class Database extends EventEmitter {
|
|
|
3033
4034
|
for (let i = 0; i < lines.length && i < this.offsets.length; i++) {
|
|
3034
4035
|
try {
|
|
3035
4036
|
const record = this.serializer.deserialize(lines[i])
|
|
3036
|
-
if (record && !deletedIdsSnapshot.has(record.id)) {
|
|
4037
|
+
if (record && !deletedIdsSnapshot.has(String(record.id))) {
|
|
3037
4038
|
// Check if this record is not being updated in writeBuffer
|
|
3038
|
-
|
|
4039
|
+
// CRITICAL FIX: Normalize IDs to strings for consistent comparison
|
|
4040
|
+
const normalizedRecordId = String(record.id)
|
|
4041
|
+
const updatedRecord = writeBufferSnapshot.find(r => r && r.id && String(r.id) === normalizedRecordId)
|
|
3039
4042
|
if (!updatedRecord) {
|
|
3040
4043
|
existingRecords.push(record)
|
|
3041
4044
|
}
|
|
@@ -3079,9 +4082,14 @@ class Database extends EventEmitter {
|
|
|
3079
4082
|
// existingRecords.length = this.offsets.length
|
|
3080
4083
|
|
|
3081
4084
|
// Create a map of updated records for quick lookup
|
|
4085
|
+
// CRITICAL FIX: Normalize IDs to strings for consistent comparison
|
|
3082
4086
|
const updatedRecordsMap = new Map()
|
|
3083
4087
|
writeBufferSnapshot.forEach(record => {
|
|
3084
|
-
|
|
4088
|
+
if (record && record.id !== undefined && record.id !== null) {
|
|
4089
|
+
// Normalize ID to string for consistent comparison
|
|
4090
|
+
const normalizedId = String(record.id)
|
|
4091
|
+
updatedRecordsMap.set(normalizedId, record)
|
|
4092
|
+
}
|
|
3085
4093
|
})
|
|
3086
4094
|
|
|
3087
4095
|
// OPTIMIZATION: Cache file stats to avoid repeated stat() calls
|
|
@@ -3249,7 +4257,8 @@ class Database extends EventEmitter {
|
|
|
3249
4257
|
if (recordId !== undefined && recordId !== null) {
|
|
3250
4258
|
recordId = String(recordId)
|
|
3251
4259
|
// Check if this record needs full parsing (updated or deleted)
|
|
3252
|
-
|
|
4260
|
+
// CRITICAL FIX: Normalize ID to string for consistent comparison
|
|
4261
|
+
needsFullParse = updatedRecordsMap.has(recordId) || deletedIdsSnapshot.has(String(recordId))
|
|
3253
4262
|
} else {
|
|
3254
4263
|
needsFullParse = true
|
|
3255
4264
|
}
|
|
@@ -3264,7 +4273,8 @@ class Database extends EventEmitter {
|
|
|
3264
4273
|
const idMatch = trimmedLine.match(/"id"\s*:\s*"([^"]+)"|"id"\s*:\s*(\d+)/)
|
|
3265
4274
|
if (idMatch) {
|
|
3266
4275
|
recordId = idMatch[1] || idMatch[2]
|
|
3267
|
-
|
|
4276
|
+
// CRITICAL FIX: Normalize ID to string for consistent comparison
|
|
4277
|
+
needsFullParse = updatedRecordsMap.has(String(recordId)) || deletedIdsSnapshot.has(String(recordId))
|
|
3268
4278
|
} else {
|
|
3269
4279
|
needsFullParse = true
|
|
3270
4280
|
}
|
|
@@ -3289,9 +4299,11 @@ class Database extends EventEmitter {
|
|
|
3289
4299
|
// Use record directly (no need to restore term IDs)
|
|
3290
4300
|
const recordWithIds = record
|
|
3291
4301
|
|
|
3292
|
-
|
|
4302
|
+
// CRITICAL FIX: Normalize ID to string for consistent comparison
|
|
4303
|
+
const normalizedId = String(recordWithIds.id)
|
|
4304
|
+
if (updatedRecordsMap.has(normalizedId)) {
|
|
3293
4305
|
// Replace with updated version
|
|
3294
|
-
const updatedRecord = updatedRecordsMap.get(
|
|
4306
|
+
const updatedRecord = updatedRecordsMap.get(normalizedId)
|
|
3295
4307
|
if (this.opts.debugMode) {
|
|
3296
4308
|
console.log(`💾 Save: Updated record ${recordWithIds.id} (${recordWithIds.name || 'Unnamed'})`)
|
|
3297
4309
|
}
|
|
@@ -3301,7 +4313,7 @@ class Database extends EventEmitter {
|
|
|
3301
4313
|
id: recordWithIds.id,
|
|
3302
4314
|
needsParse: false
|
|
3303
4315
|
}
|
|
3304
|
-
} else if (!deletedIdsSnapshot.has(recordWithIds.id)) {
|
|
4316
|
+
} else if (!deletedIdsSnapshot.has(String(recordWithIds.id))) {
|
|
3305
4317
|
// Keep existing record if not deleted
|
|
3306
4318
|
if (this.opts.debugMode) {
|
|
3307
4319
|
console.log(`💾 Save: Kept record ${recordWithIds.id} (${recordWithIds.name || 'Unnamed'})`)
|
|
@@ -3534,6 +4546,240 @@ class Database extends EventEmitter {
|
|
|
3534
4546
|
}).filter(n => n !== undefined)
|
|
3535
4547
|
}
|
|
3536
4548
|
|
|
4549
|
+
/**
|
|
4550
|
+
* Get the base line number for writeBuffer entries (number of persisted records)
|
|
4551
|
+
* @private
|
|
4552
|
+
*/
|
|
4553
|
+
_getWriteBufferBaseLineNumber() {
|
|
4554
|
+
return Array.isArray(this.offsets) ? this.offsets.length : 0
|
|
4555
|
+
}
|
|
4556
|
+
|
|
4557
|
+
/**
|
|
4558
|
+
* Convert a writeBuffer index into an absolute line number
|
|
4559
|
+
* @param {number} writeBufferIndex - Index inside writeBuffer (0-based)
|
|
4560
|
+
* @returns {number} Absolute line number (0-based)
|
|
4561
|
+
* @private
|
|
4562
|
+
*/
|
|
4563
|
+
_getAbsoluteLineNumber(writeBufferIndex) {
|
|
4564
|
+
if (typeof writeBufferIndex !== 'number' || writeBufferIndex < 0) {
|
|
4565
|
+
throw new Error('Invalid writeBuffer index')
|
|
4566
|
+
}
|
|
4567
|
+
return this._getWriteBufferBaseLineNumber() + writeBufferIndex
|
|
4568
|
+
}
|
|
4569
|
+
|
|
4570
|
+
async *_streamingRecoveryGenerator(criteria, options, alreadyYielded = 0, map = null, remainingSkipValue = 0) {
|
|
4571
|
+
if (this._offsetRecoveryInProgress) {
|
|
4572
|
+
return
|
|
4573
|
+
}
|
|
4574
|
+
|
|
4575
|
+
if (!this.fileHandler || !this.fileHandler.file) {
|
|
4576
|
+
return
|
|
4577
|
+
}
|
|
4578
|
+
|
|
4579
|
+
this._offsetRecoveryInProgress = true
|
|
4580
|
+
|
|
4581
|
+
const fsModule = this._fsModule || (this._fsModule = await import('fs'))
|
|
4582
|
+
let fd
|
|
4583
|
+
|
|
4584
|
+
try {
|
|
4585
|
+
fd = await fsModule.promises.open(this.fileHandler.file, 'r')
|
|
4586
|
+
} catch (error) {
|
|
4587
|
+
this._offsetRecoveryInProgress = false
|
|
4588
|
+
if (this.opts.debugMode) {
|
|
4589
|
+
console.warn(`⚠️ Offset recovery skipped: ${error.message}`)
|
|
4590
|
+
}
|
|
4591
|
+
return
|
|
4592
|
+
}
|
|
4593
|
+
|
|
4594
|
+
const chunkSize = this.opts.offsetRecoveryChunkSize || 64 * 1024
|
|
4595
|
+
let buffer = Buffer.alloc(0)
|
|
4596
|
+
let readOffset = 0
|
|
4597
|
+
const originalOffsets = Array.isArray(this.offsets) ? [...this.offsets] : []
|
|
4598
|
+
const newOffsets = []
|
|
4599
|
+
let offsetAdjusted = false
|
|
4600
|
+
let limitReached = false
|
|
4601
|
+
let lineIndex = 0
|
|
4602
|
+
let lastLineEnd = 0
|
|
4603
|
+
let producedTotal = alreadyYielded || 0
|
|
4604
|
+
let remainingSkip = remainingSkipValue || 0
|
|
4605
|
+
let remainingAlreadyYielded = alreadyYielded || 0
|
|
4606
|
+
const limit = typeof options?.limit === 'number' ? options.limit : null
|
|
4607
|
+
const includeOffsets = options?.includeOffsets === true
|
|
4608
|
+
const includeLinePosition = this.opts.includeLinePosition
|
|
4609
|
+
const mapSet = map instanceof Set ? new Set(map) : (Array.isArray(map) ? new Set(map) : null)
|
|
4610
|
+
const criteriaIsObject = criteria && typeof criteria === 'object' && !Array.isArray(criteria) && !(criteria instanceof Set)
|
|
4611
|
+
const hasCriteria = criteriaIsObject && Object.keys(criteria).length > 0
|
|
4612
|
+
|
|
4613
|
+
const decodeLineBuffer = (lineBuffer) => {
|
|
4614
|
+
let trimmed = lineBuffer
|
|
4615
|
+
if (trimmed.length > 0 && trimmed[trimmed.length - 1] === 0x0A) {
|
|
4616
|
+
trimmed = trimmed.subarray(0, trimmed.length - 1)
|
|
4617
|
+
}
|
|
4618
|
+
if (trimmed.length > 0 && trimmed[trimmed.length - 1] === 0x0D) {
|
|
4619
|
+
trimmed = trimmed.subarray(0, trimmed.length - 1)
|
|
4620
|
+
}
|
|
4621
|
+
return trimmed
|
|
4622
|
+
}
|
|
4623
|
+
|
|
4624
|
+
const processLine = async (lineBuffer, lineStart) => {
|
|
4625
|
+
const lineLength = lineBuffer.length
|
|
4626
|
+
newOffsets[lineIndex] = lineStart
|
|
4627
|
+
const expected = originalOffsets[lineIndex]
|
|
4628
|
+
if (expected !== undefined && expected !== lineStart) {
|
|
4629
|
+
offsetAdjusted = true
|
|
4630
|
+
if (this.opts.debugMode) {
|
|
4631
|
+
console.warn(`⚠️ Offset mismatch detected at line ${lineIndex}: expected ${expected}, actual ${lineStart}`)
|
|
4632
|
+
}
|
|
4633
|
+
} else if (expected === undefined) {
|
|
4634
|
+
offsetAdjusted = true
|
|
4635
|
+
}
|
|
4636
|
+
|
|
4637
|
+
lastLineEnd = Math.max(lastLineEnd, lineStart + lineLength)
|
|
4638
|
+
|
|
4639
|
+
let entryWithTerms = null
|
|
4640
|
+
let shouldYield = false
|
|
4641
|
+
|
|
4642
|
+
const decodedBuffer = decodeLineBuffer(lineBuffer)
|
|
4643
|
+
if (decodedBuffer.length > 0) {
|
|
4644
|
+
let lineString
|
|
4645
|
+
try {
|
|
4646
|
+
lineString = decodedBuffer.toString('utf8')
|
|
4647
|
+
} catch (error) {
|
|
4648
|
+
lineString = decodedBuffer.toString('utf8', { replacement: '?' })
|
|
4649
|
+
}
|
|
4650
|
+
|
|
4651
|
+
try {
|
|
4652
|
+
const record = await this.serializer.deserialize(lineString)
|
|
4653
|
+
if (record && typeof record === 'object') {
|
|
4654
|
+
entryWithTerms = this.restoreTermIdsAfterDeserialization(record)
|
|
4655
|
+
if (includeLinePosition) {
|
|
4656
|
+
entryWithTerms._ = lineIndex
|
|
4657
|
+
}
|
|
4658
|
+
|
|
4659
|
+
if (mapSet) {
|
|
4660
|
+
shouldYield = mapSet.has(lineIndex)
|
|
4661
|
+
if (shouldYield) {
|
|
4662
|
+
mapSet.delete(lineIndex)
|
|
4663
|
+
}
|
|
4664
|
+
} else if (hasCriteria) {
|
|
4665
|
+
shouldYield = this.queryManager.matchesCriteria(entryWithTerms, criteria, options)
|
|
4666
|
+
} else {
|
|
4667
|
+
shouldYield = true
|
|
4668
|
+
}
|
|
4669
|
+
}
|
|
4670
|
+
} catch (error) {
|
|
4671
|
+
if (this.opts.debugMode) {
|
|
4672
|
+
console.warn(`⚠️ Offset recovery failed to deserialize line ${lineIndex} at ${lineStart}: ${error.message}`)
|
|
4673
|
+
}
|
|
4674
|
+
}
|
|
4675
|
+
}
|
|
4676
|
+
|
|
4677
|
+
let yieldedEntry = null
|
|
4678
|
+
|
|
4679
|
+
if (shouldYield && entryWithTerms) {
|
|
4680
|
+
if (remainingSkip > 0) {
|
|
4681
|
+
remainingSkip--
|
|
4682
|
+
} else if (remainingAlreadyYielded > 0) {
|
|
4683
|
+
remainingAlreadyYielded--
|
|
4684
|
+
} else if (!limit || producedTotal < limit) {
|
|
4685
|
+
producedTotal++
|
|
4686
|
+
yieldedEntry = includeOffsets
|
|
4687
|
+
? { entry: entryWithTerms, start: lineStart, _: lineIndex }
|
|
4688
|
+
: entryWithTerms
|
|
4689
|
+
} else {
|
|
4690
|
+
limitReached = true
|
|
4691
|
+
}
|
|
4692
|
+
}
|
|
4693
|
+
|
|
4694
|
+
lineIndex++
|
|
4695
|
+
|
|
4696
|
+
if (yieldedEntry) {
|
|
4697
|
+
return yieldedEntry
|
|
4698
|
+
}
|
|
4699
|
+
return null
|
|
4700
|
+
}
|
|
4701
|
+
|
|
4702
|
+
let recoveryFailed = false
|
|
4703
|
+
|
|
4704
|
+
try {
|
|
4705
|
+
while (true) {
|
|
4706
|
+
const tempBuffer = Buffer.allocUnsafe(chunkSize)
|
|
4707
|
+
const { bytesRead } = await fd.read(tempBuffer, 0, chunkSize, readOffset)
|
|
4708
|
+
|
|
4709
|
+
if (bytesRead === 0) {
|
|
4710
|
+
if (buffer.length > 0) {
|
|
4711
|
+
const lineStart = readOffset - buffer.length
|
|
4712
|
+
const yieldedEntry = await processLine(buffer, lineStart)
|
|
4713
|
+
if (yieldedEntry) {
|
|
4714
|
+
yield yieldedEntry
|
|
4715
|
+
}
|
|
4716
|
+
}
|
|
4717
|
+
break
|
|
4718
|
+
}
|
|
4719
|
+
|
|
4720
|
+
readOffset += bytesRead
|
|
4721
|
+
let chunk = buffer.length > 0
|
|
4722
|
+
? Buffer.concat([buffer, tempBuffer.subarray(0, bytesRead)])
|
|
4723
|
+
: tempBuffer.subarray(0, bytesRead)
|
|
4724
|
+
|
|
4725
|
+
let processedUpTo = 0
|
|
4726
|
+
const chunkBaseOffset = readOffset - chunk.length
|
|
4727
|
+
|
|
4728
|
+
while (true) {
|
|
4729
|
+
const newlineIndex = chunk.indexOf(0x0A, processedUpTo)
|
|
4730
|
+
if (newlineIndex === -1) {
|
|
4731
|
+
break
|
|
4732
|
+
}
|
|
4733
|
+
|
|
4734
|
+
const lineBuffer = chunk.subarray(processedUpTo, newlineIndex + 1)
|
|
4735
|
+
const lineStart = chunkBaseOffset + processedUpTo
|
|
4736
|
+
const yieldedEntry = await processLine(lineBuffer, lineStart)
|
|
4737
|
+
processedUpTo = newlineIndex + 1
|
|
4738
|
+
|
|
4739
|
+
if (yieldedEntry) {
|
|
4740
|
+
yield yieldedEntry
|
|
4741
|
+
}
|
|
4742
|
+
}
|
|
4743
|
+
|
|
4744
|
+
buffer = chunk.subarray(processedUpTo)
|
|
4745
|
+
}
|
|
4746
|
+
} catch (error) {
|
|
4747
|
+
recoveryFailed = true
|
|
4748
|
+
if (this.opts.debugMode) {
|
|
4749
|
+
console.warn(`⚠️ Offset recovery aborted: ${error.message}`)
|
|
4750
|
+
}
|
|
4751
|
+
} finally {
|
|
4752
|
+
await fd.close().catch(() => {})
|
|
4753
|
+
this._offsetRecoveryInProgress = false
|
|
4754
|
+
|
|
4755
|
+
if (recoveryFailed) {
|
|
4756
|
+
return
|
|
4757
|
+
}
|
|
4758
|
+
|
|
4759
|
+
this.offsets = newOffsets
|
|
4760
|
+
if (lineIndex < this.offsets.length) {
|
|
4761
|
+
this.offsets.length = lineIndex
|
|
4762
|
+
}
|
|
4763
|
+
|
|
4764
|
+
if (originalOffsets.length !== newOffsets.length) {
|
|
4765
|
+
offsetAdjusted = true
|
|
4766
|
+
}
|
|
4767
|
+
|
|
4768
|
+
this.indexOffset = lastLineEnd
|
|
4769
|
+
|
|
4770
|
+
if (offsetAdjusted) {
|
|
4771
|
+
this.shouldSave = true
|
|
4772
|
+
try {
|
|
4773
|
+
await this._saveIndexDataToFile()
|
|
4774
|
+
} catch (error) {
|
|
4775
|
+
if (this.opts.debugMode) {
|
|
4776
|
+
console.warn(`⚠️ Failed to persist recovered offsets: ${error.message}`)
|
|
4777
|
+
}
|
|
4778
|
+
}
|
|
4779
|
+
}
|
|
4780
|
+
}
|
|
4781
|
+
}
|
|
4782
|
+
|
|
3537
4783
|
/**
|
|
3538
4784
|
* Walk through records using streaming (real implementation)
|
|
3539
4785
|
*/
|
|
@@ -3547,6 +4793,7 @@ class Database extends EventEmitter {
|
|
|
3547
4793
|
if (this.indexOffset === 0 && this.writeBuffer.length === 0) return
|
|
3548
4794
|
|
|
3549
4795
|
let count = 0
|
|
4796
|
+
let remainingSkip = options.skip || 0
|
|
3550
4797
|
|
|
3551
4798
|
let map
|
|
3552
4799
|
if (!Array.isArray(criteria)) {
|
|
@@ -3557,8 +4804,9 @@ class Database extends EventEmitter {
|
|
|
3557
4804
|
map = [...this.indexManager.query(criteria, options)]
|
|
3558
4805
|
} else {
|
|
3559
4806
|
// For empty criteria {} or null/undefined, get all records
|
|
3560
|
-
|
|
3561
|
-
|
|
4807
|
+
const totalRecords = this.offsets && this.offsets.length > 0
|
|
4808
|
+
? this.offsets.length
|
|
4809
|
+
: this.writeBuffer.length
|
|
3562
4810
|
map = [...Array(totalRecords).keys()]
|
|
3563
4811
|
}
|
|
3564
4812
|
} else {
|
|
@@ -3577,6 +4825,10 @@ class Database extends EventEmitter {
|
|
|
3577
4825
|
}
|
|
3578
4826
|
const entry = this.writeBuffer[i]
|
|
3579
4827
|
if (entry && this.queryManager.matchesCriteria(entry, criteria, options)) {
|
|
4828
|
+
if (remainingSkip > 0) {
|
|
4829
|
+
remainingSkip--
|
|
4830
|
+
continue
|
|
4831
|
+
}
|
|
3580
4832
|
count++
|
|
3581
4833
|
if (options.includeOffsets) {
|
|
3582
4834
|
yield { entry, start: 0, _: i }
|
|
@@ -3597,6 +4849,10 @@ class Database extends EventEmitter {
|
|
|
3597
4849
|
if (lineNumber < this.writeBuffer.length) {
|
|
3598
4850
|
const entry = this.writeBuffer[lineNumber]
|
|
3599
4851
|
if (entry) {
|
|
4852
|
+
if (remainingSkip > 0) {
|
|
4853
|
+
remainingSkip--
|
|
4854
|
+
continue
|
|
4855
|
+
}
|
|
3600
4856
|
count++
|
|
3601
4857
|
if (options.includeOffsets) {
|
|
3602
4858
|
yield { entry, start: 0, _: lineNumber }
|
|
@@ -3657,6 +4913,11 @@ class Database extends EventEmitter {
|
|
|
3657
4913
|
// SPACE OPTIMIZATION: Restore term IDs to terms for user
|
|
3658
4914
|
const recordWithTerms = this.restoreTermIdsAfterDeserialization(record)
|
|
3659
4915
|
|
|
4916
|
+
if (remainingSkip > 0) {
|
|
4917
|
+
remainingSkip--
|
|
4918
|
+
continue
|
|
4919
|
+
}
|
|
4920
|
+
|
|
3660
4921
|
count++
|
|
3661
4922
|
if (options.includeOffsets) {
|
|
3662
4923
|
yield { entry: recordWithTerms, start: row.start, _: row._ || 0 }
|
|
@@ -3667,7 +4928,21 @@ class Database extends EventEmitter {
|
|
|
3667
4928
|
yield recordWithTerms
|
|
3668
4929
|
}
|
|
3669
4930
|
} catch (error) {
|
|
3670
|
-
//
|
|
4931
|
+
// CRITICAL FIX: Log deserialization errors instead of silently ignoring them
|
|
4932
|
+
// This helps identify data corruption issues
|
|
4933
|
+
if (1||this.opts.debugMode) {
|
|
4934
|
+
console.warn(`⚠️ walk(): Failed to deserialize record at offset ${row.start}: ${error.message}`)
|
|
4935
|
+
console.warn(`⚠️ walk(): Problematic line (first 200 chars): ${row.line.substring(0, 200)}`)
|
|
4936
|
+
}
|
|
4937
|
+
if (!this._offsetRecoveryInProgress) {
|
|
4938
|
+
for await (const recoveredEntry of this._streamingRecoveryGenerator(criteria, options, count, map, remainingSkip)) {
|
|
4939
|
+
yield recoveredEntry
|
|
4940
|
+
count++
|
|
4941
|
+
}
|
|
4942
|
+
return
|
|
4943
|
+
}
|
|
4944
|
+
// Skip invalid lines but continue processing
|
|
4945
|
+
// This prevents one corrupted record from stopping the entire walk operation
|
|
3671
4946
|
}
|
|
3672
4947
|
}
|
|
3673
4948
|
if (options.limit && count >= options.limit) {
|
|
@@ -3696,6 +4971,12 @@ class Database extends EventEmitter {
|
|
|
3696
4971
|
if (options.limit && count >= options.limit) {
|
|
3697
4972
|
break
|
|
3698
4973
|
}
|
|
4974
|
+
|
|
4975
|
+
if (remainingSkip > 0) {
|
|
4976
|
+
remainingSkip--
|
|
4977
|
+
continue
|
|
4978
|
+
}
|
|
4979
|
+
|
|
3699
4980
|
count++
|
|
3700
4981
|
|
|
3701
4982
|
// SPACE OPTIMIZATION: Restore term IDs to terms for user
|
|
@@ -3732,20 +5013,44 @@ class Database extends EventEmitter {
|
|
|
3732
5013
|
if (options.limit && count >= options.limit) {
|
|
3733
5014
|
break
|
|
3734
5015
|
}
|
|
3735
|
-
const entry = await this.serializer.deserialize(row.line, { compress: this.opts.compress, v8: this.opts.v8 })
|
|
3736
|
-
if (entry === null) continue
|
|
3737
5016
|
|
|
3738
|
-
|
|
3739
|
-
|
|
5017
|
+
try {
|
|
5018
|
+
const entry = await this.serializer.deserialize(row.line, { compress: this.opts.compress, v8: this.opts.v8 })
|
|
5019
|
+
if (entry === null) continue
|
|
5020
|
+
|
|
5021
|
+
// SPACE OPTIMIZATION: Restore term IDs to terms for user
|
|
5022
|
+
const entryWithTerms = this.restoreTermIdsAfterDeserialization(entry)
|
|
3740
5023
|
|
|
3741
|
-
|
|
3742
|
-
|
|
3743
|
-
|
|
3744
|
-
|
|
3745
|
-
|
|
3746
|
-
|
|
5024
|
+
if (remainingSkip > 0) {
|
|
5025
|
+
remainingSkip--
|
|
5026
|
+
continue
|
|
5027
|
+
}
|
|
5028
|
+
|
|
5029
|
+
count++
|
|
5030
|
+
if (options.includeOffsets) {
|
|
5031
|
+
yield { entry: entryWithTerms, start: row.start, _: row._ || this.offsets.findIndex(n => n === row.start) }
|
|
5032
|
+
} else {
|
|
5033
|
+
if (this.opts.includeLinePosition) {
|
|
5034
|
+
entryWithTerms._ = row._ || this.offsets.findIndex(n => n === row.start)
|
|
5035
|
+
}
|
|
5036
|
+
yield entryWithTerms
|
|
5037
|
+
}
|
|
5038
|
+
} catch (error) {
|
|
5039
|
+
// CRITICAL FIX: Log deserialization errors instead of silently ignoring them
|
|
5040
|
+
// This helps identify data corruption issues
|
|
5041
|
+
if (1||this.opts.debugMode) {
|
|
5042
|
+
console.warn(`⚠️ walk(): Failed to deserialize record at offset ${row.start}: ${error.message}`)
|
|
5043
|
+
console.warn(`⚠️ walk(): Problematic line (first 200 chars): ${row.line.substring(0, 200)}`)
|
|
3747
5044
|
}
|
|
3748
|
-
|
|
5045
|
+
if (!this._offsetRecoveryInProgress) {
|
|
5046
|
+
for await (const recoveredEntry of this._streamingRecoveryGenerator(criteria, options, count, map, remainingSkip)) {
|
|
5047
|
+
yield recoveredEntry
|
|
5048
|
+
count++
|
|
5049
|
+
}
|
|
5050
|
+
return
|
|
5051
|
+
}
|
|
5052
|
+
// Skip invalid lines but continue processing
|
|
5053
|
+
// This prevents one corrupted record from stopping the entire walk operation
|
|
3749
5054
|
}
|
|
3750
5055
|
}
|
|
3751
5056
|
}
|
|
@@ -3899,16 +5204,20 @@ class Database extends EventEmitter {
|
|
|
3899
5204
|
|
|
3900
5205
|
// Update record in writeBuffer or add to writeBuffer
|
|
3901
5206
|
const index = this.writeBuffer.findIndex(r => r.id === record.id)
|
|
5207
|
+
let targetIndex
|
|
3902
5208
|
if (index !== -1) {
|
|
3903
5209
|
// Record is already in writeBuffer, update it
|
|
3904
5210
|
this.writeBuffer[index] = record
|
|
5211
|
+
targetIndex = index
|
|
3905
5212
|
} else {
|
|
3906
5213
|
// Record is in file, add updated version to writeBuffer
|
|
3907
5214
|
this.writeBuffer.push(record)
|
|
5215
|
+
targetIndex = this.writeBuffer.length - 1
|
|
3908
5216
|
}
|
|
3909
5217
|
|
|
3910
5218
|
// Update index
|
|
3911
|
-
|
|
5219
|
+
const absoluteLineNumber = this._getAbsoluteLineNumber(targetIndex)
|
|
5220
|
+
await this.indexManager.update(record, record, absoluteLineNumber)
|
|
3912
5221
|
}
|
|
3913
5222
|
|
|
3914
5223
|
if (this.opts.debugMode) {
|
|
@@ -3982,8 +5291,24 @@ class Database extends EventEmitter {
|
|
|
3982
5291
|
this.writeBufferSizes = []
|
|
3983
5292
|
}
|
|
3984
5293
|
} else {
|
|
3985
|
-
//
|
|
3986
|
-
|
|
5294
|
+
// Only save index data if it actually has content
|
|
5295
|
+
// Don't overwrite a valid index with an empty one
|
|
5296
|
+
if (this.indexManager && this.indexManager.indexedFields && this.indexManager.indexedFields.length > 0) {
|
|
5297
|
+
let hasIndexData = false
|
|
5298
|
+
for (const field of this.indexManager.indexedFields) {
|
|
5299
|
+
if (this.indexManager.hasUsableIndexData(field)) {
|
|
5300
|
+
hasIndexData = true
|
|
5301
|
+
break
|
|
5302
|
+
}
|
|
5303
|
+
}
|
|
5304
|
+
// Only save if we have actual index data OR if offsets are populated
|
|
5305
|
+
// (offsets being populated means we've processed data)
|
|
5306
|
+
if (hasIndexData || (this.offsets && this.offsets.length > 0)) {
|
|
5307
|
+
await this._saveIndexDataToFile()
|
|
5308
|
+
} else if (this.opts.debugMode) {
|
|
5309
|
+
console.log('⚠️ close(): Skipping index save - index is empty and no offsets')
|
|
5310
|
+
}
|
|
5311
|
+
}
|
|
3987
5312
|
}
|
|
3988
5313
|
|
|
3989
5314
|
// 2. Mark as closed (but not destroyed) to allow reopening
|
|
@@ -4019,8 +5344,43 @@ class Database extends EventEmitter {
|
|
|
4019
5344
|
if (this.indexManager) {
|
|
4020
5345
|
try {
|
|
4021
5346
|
const idxPath = this.normalizedFile.replace('.jdb', '.idx.jdb')
|
|
5347
|
+
const indexJSON = this.indexManager.indexedFields && this.indexManager.indexedFields.length > 0 ? this.indexManager.toJSON() : {}
|
|
5348
|
+
|
|
5349
|
+
// Check if index is empty
|
|
5350
|
+
const isEmpty = !indexJSON || Object.keys(indexJSON).length === 0 ||
|
|
5351
|
+
(this.indexManager.indexedFields && this.indexManager.indexedFields.every(field => {
|
|
5352
|
+
const fieldIndex = indexJSON[field]
|
|
5353
|
+
return !fieldIndex || (typeof fieldIndex === 'object' && Object.keys(fieldIndex).length === 0)
|
|
5354
|
+
}))
|
|
5355
|
+
|
|
5356
|
+
// PROTECTION: Don't overwrite a valid index file with empty data
|
|
5357
|
+
// If the .idx.jdb file exists and has data, and we're trying to save empty index,
|
|
5358
|
+
// skip the save to prevent corruption
|
|
5359
|
+
if (isEmpty && !this.offsets?.length) {
|
|
5360
|
+
const fs = await import('fs')
|
|
5361
|
+
if (fs.existsSync(idxPath)) {
|
|
5362
|
+
try {
|
|
5363
|
+
const existingData = JSON.parse(await fs.promises.readFile(idxPath, 'utf8'))
|
|
5364
|
+
const existingHasData = existingData.index && Object.keys(existingData.index).length > 0
|
|
5365
|
+
const existingHasOffsets = existingData.offsets && existingData.offsets.length > 0
|
|
5366
|
+
|
|
5367
|
+
if (existingHasData || existingHasOffsets) {
|
|
5368
|
+
if (this.opts.debugMode) {
|
|
5369
|
+
console.log(`⚠️ _saveIndexDataToFile: Skipping save - would overwrite valid index with empty data`)
|
|
5370
|
+
}
|
|
5371
|
+
return // Don't overwrite valid index with empty one
|
|
5372
|
+
}
|
|
5373
|
+
} catch (error) {
|
|
5374
|
+
// If we can't read existing file, proceed with save (might be corrupted)
|
|
5375
|
+
if (this.opts.debugMode) {
|
|
5376
|
+
console.log(`⚠️ _saveIndexDataToFile: Could not read existing index file, proceeding with save`)
|
|
5377
|
+
}
|
|
5378
|
+
}
|
|
5379
|
+
}
|
|
5380
|
+
}
|
|
5381
|
+
|
|
4022
5382
|
const indexData = {
|
|
4023
|
-
index:
|
|
5383
|
+
index: indexJSON,
|
|
4024
5384
|
offsets: this.offsets, // Save actual offsets for efficient file operations
|
|
4025
5385
|
indexOffset: this.indexOffset, // Save file size for proper range calculations
|
|
4026
5386
|
// Save configuration for reuse when database exists
|