jexidb 2.1.0 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/dist/Database.cjs +9253 -437
  2. package/package.json +9 -2
  3. package/src/Database.mjs +1572 -212
  4. package/src/FileHandler.mjs +83 -44
  5. package/src/OperationQueue.mjs +23 -23
  6. package/src/SchemaManager.mjs +325 -268
  7. package/src/Serializer.mjs +234 -24
  8. package/src/managers/IndexManager.mjs +778 -87
  9. package/src/managers/QueryManager.mjs +340 -67
  10. package/src/managers/TermManager.mjs +7 -7
  11. package/src/utils/operatorNormalizer.mjs +116 -0
  12. package/.babelrc +0 -13
  13. package/.gitattributes +0 -2
  14. package/CHANGELOG.md +0 -140
  15. package/babel.config.json +0 -5
  16. package/docs/API.md +0 -1051
  17. package/docs/EXAMPLES.md +0 -701
  18. package/docs/README.md +0 -194
  19. package/examples/iterate-usage-example.js +0 -157
  20. package/examples/simple-iterate-example.js +0 -115
  21. package/jest.config.js +0 -24
  22. package/scripts/README.md +0 -47
  23. package/scripts/clean-test-files.js +0 -75
  24. package/scripts/prepare.js +0 -31
  25. package/scripts/run-tests.js +0 -80
  26. package/test/$not-operator-with-and.test.js +0 -282
  27. package/test/README.md +0 -8
  28. package/test/close-init-cycle.test.js +0 -256
  29. package/test/critical-bugs-fixes.test.js +0 -1069
  30. package/test/index-persistence.test.js +0 -306
  31. package/test/index-serialization.test.js +0 -314
  32. package/test/indexed-query-mode.test.js +0 -360
  33. package/test/iterate-method.test.js +0 -272
  34. package/test/query-operators.test.js +0 -238
  35. package/test/regex-array-fields.test.js +0 -129
  36. package/test/score-method.test.js +0 -238
  37. package/test/setup.js +0 -17
  38. package/test/term-mapping-minimal.test.js +0 -154
  39. package/test/term-mapping-simple.test.js +0 -257
  40. package/test/term-mapping.test.js +0 -514
  41. package/test/writebuffer-flush-resilience.test.js +0 -204
package/src/Database.mjs CHANGED
@@ -76,12 +76,16 @@ class InsertSession {
76
76
  constructor(database, sessionOptions = {}) {
77
77
  this.database = database
78
78
  this.batchSize = sessionOptions.batchSize || 100
79
+ this.enableAutoSave = sessionOptions.enableAutoSave !== undefined ? sessionOptions.enableAutoSave : true
79
80
  this.totalInserted = 0
80
81
  this.flushing = false
81
82
  this.batches = [] // Array of batches to avoid slice() in flush()
82
83
  this.currentBatch = [] // Current batch being filled
83
84
  this.sessionId = Math.random().toString(36).substr(2, 9)
84
85
 
86
+ // Track pending auto-flush operations
87
+ this.pendingAutoFlushes = new Set()
88
+
85
89
  // Register this session as active
86
90
  this.database.activeInsertSessions.add(this)
87
91
  }
@@ -103,46 +107,153 @@ class InsertSession {
103
107
  this.currentBatch.push(finalRecord)
104
108
  this.totalInserted++
105
109
 
106
- // If batch is full, move it to batches array
110
+ // If batch is full, move it to batches array and trigger auto-flush
107
111
  if (this.currentBatch.length >= this.batchSize) {
108
112
  this.batches.push(this.currentBatch)
109
113
  this.currentBatch = []
114
+
115
+ // Auto-flush in background (non-blocking)
116
+ // This ensures batches are flushed automatically without blocking add()
117
+ this.autoFlush().catch(err => {
118
+ // Log error but don't throw - we don't want to break the add() flow
119
+ console.error('Auto-flush error in InsertSession:', err)
120
+ })
110
121
  }
111
122
 
112
123
  return finalRecord
113
124
  }
114
125
 
115
- async flush() {
116
- // Check if there's anything to flush
117
- if (this.batches.length === 0 && this.currentBatch.length === 0) return
118
-
119
- // Prevent concurrent flushes
126
+ async autoFlush() {
127
+ // Only flush if not already flushing
128
+ // This method will process all pending batches
120
129
  if (this.flushing) return
130
+
131
+ // Create a promise for this auto-flush operation
132
+ const flushPromise = this._doFlush()
133
+ this.pendingAutoFlushes.add(flushPromise)
134
+
135
+ // Remove from pending set when complete (success or error)
136
+ flushPromise
137
+ .then(() => {
138
+ this.pendingAutoFlushes.delete(flushPromise)
139
+ })
140
+ .catch((err) => {
141
+ this.pendingAutoFlushes.delete(flushPromise)
142
+ throw err
143
+ })
144
+
145
+ return flushPromise
146
+ }
147
+
148
+ async _doFlush() {
149
+ // Check if database is destroyed or closed before starting
150
+ if (this.database.destroyed || this.database.closed) {
151
+ // Clear batches if database is closed/destroyed
152
+ this.batches = []
153
+ this.currentBatch = []
154
+ return
155
+ }
156
+
157
+ // Prevent concurrent flushes - if already flushing, wait for it
158
+ if (this.flushing) {
159
+ // Wait for the current flush to complete
160
+ while (this.flushing) {
161
+ await new Promise(resolve => setTimeout(resolve, 1))
162
+ }
163
+ // After waiting, check if there's anything left to flush
164
+ // If another flush completed everything, we're done
165
+ if (this.batches.length === 0 && this.currentBatch.length === 0) return
166
+
167
+ // Check again if database was closed during wait
168
+ if (this.database.destroyed || this.database.closed) {
169
+ this.batches = []
170
+ this.currentBatch = []
171
+ return
172
+ }
173
+ }
174
+
121
175
  this.flushing = true
122
176
 
123
177
  try {
124
- // Process all complete batches
125
- for (const batch of this.batches) {
126
- await this.database.insertBatch(batch)
127
- }
178
+ // Process continuously until queue is completely empty
179
+ // This handles the case where new data is added during the flush
180
+ while (this.batches.length > 0 || this.currentBatch.length > 0) {
181
+ // Check if database was closed during processing
182
+ if (this.database.destroyed || this.database.closed) {
183
+ // Clear remaining batches
184
+ this.batches = []
185
+ this.currentBatch = []
186
+ return
187
+ }
128
188
 
129
- // Process remaining records in current batch
130
- if (this.currentBatch.length > 0) {
131
- await this.database.insertBatch(this.currentBatch)
132
- }
189
+ // Process all complete batches that exist at this moment
190
+ // Note: new batches may be added to this.batches during this loop
191
+ const batchesToProcess = this.batches.length
192
+ for (let i = 0; i < batchesToProcess; i++) {
193
+ // Check again before each batch
194
+ if (this.database.destroyed || this.database.closed) {
195
+ this.batches = []
196
+ this.currentBatch = []
197
+ return
198
+ }
199
+
200
+ const batch = this.batches.shift() // Remove from front
201
+ await this.database.insertBatch(batch)
202
+ }
133
203
 
134
- // Clear all batches
135
- this.batches = []
136
- this.currentBatch = []
204
+ // Process current batch if it has data
205
+ // Note: new records may be added to currentBatch during processing
206
+ if (this.currentBatch.length > 0) {
207
+ // Check if database was closed
208
+ if (this.database.destroyed || this.database.closed) {
209
+ this.batches = []
210
+ this.currentBatch = []
211
+ return
212
+ }
213
+
214
+ // Check if currentBatch reached batchSize during processing
215
+ if (this.currentBatch.length >= this.batchSize) {
216
+ // Move it to batches array and process in next iteration
217
+ this.batches.push(this.currentBatch)
218
+ this.currentBatch = []
219
+ continue
220
+ }
221
+
222
+ // Process the current batch
223
+ const batchToProcess = this.currentBatch
224
+ this.currentBatch = [] // Clear before processing to allow new adds
225
+ await this.database.insertBatch(batchToProcess)
226
+ }
227
+ }
137
228
  } finally {
138
229
  this.flushing = false
139
230
  }
140
231
  }
141
232
 
233
+ async flush() {
234
+ // Wait for any pending auto-flushes to complete first
235
+ await this.waitForAutoFlushes()
236
+
237
+ // Then do a final flush to ensure everything is processed
238
+ await this._doFlush()
239
+ }
240
+
241
+ async waitForAutoFlushes() {
242
+ // Wait for all pending auto-flush operations to complete
243
+ if (this.pendingAutoFlushes.size > 0) {
244
+ await Promise.all(Array.from(this.pendingAutoFlushes))
245
+ }
246
+ }
247
+
142
248
  async commit() {
143
249
  // CRITICAL FIX: Make session auto-reusable by removing committed state
144
250
  // Allow multiple commits on the same session
145
251
 
252
+ // First, wait for all pending auto-flushes to complete
253
+ await this.waitForAutoFlushes()
254
+
255
+ // Then flush any remaining data (including currentBatch)
256
+ // This ensures everything is inserted before commit returns
146
257
  await this.flush()
147
258
 
148
259
  // Reset session state for next commit cycle
@@ -158,6 +269,9 @@ class InsertSession {
158
269
  const startTime = Date.now()
159
270
  const hasTimeout = maxWaitTime !== null && maxWaitTime !== undefined
160
271
 
272
+ // Wait for auto-flushes first
273
+ await this.waitForAutoFlushes()
274
+
161
275
  while (this.flushing || this.batches.length > 0 || this.currentBatch.length > 0) {
162
276
  // Check timeout only if we have one
163
277
  if (hasTimeout && (Date.now() - startTime) >= maxWaitTime) {
@@ -174,7 +288,10 @@ class InsertSession {
174
288
  * Check if this session has pending operations
175
289
  */
176
290
  hasPendingOperations() {
177
- return this.flushing || this.batches.length > 0 || this.currentBatch.length > 0
291
+ return this.pendingAutoFlushes.size > 0 ||
292
+ this.flushing ||
293
+ this.batches.length > 0 ||
294
+ this.currentBatch.length > 0
178
295
  }
179
296
 
180
297
  /**
@@ -189,6 +306,7 @@ class InsertSession {
189
306
  this.currentBatch = []
190
307
  this.totalInserted = 0
191
308
  this.flushing = false
309
+ this.pendingAutoFlushes.clear()
192
310
  }
193
311
  }
194
312
 
@@ -238,6 +356,8 @@ class Database extends EventEmitter {
238
356
  streamingThreshold: opts.streamingThreshold || 0.8, // Use streaming when limit > 80% of total records
239
357
  // Serialization options
240
358
  enableArraySerialization: opts.enableArraySerialization !== false, // Enable array serialization by default
359
+ // Index rebuild options
360
+ allowIndexRebuild: opts.allowIndexRebuild === true, // Allow automatic index rebuild when corrupted (default false - throws error)
241
361
  }, opts)
242
362
 
243
363
  // CRITICAL FIX: Initialize AbortController for lifecycle management
@@ -264,6 +384,8 @@ class Database extends EventEmitter {
264
384
  this.isSaving = false
265
385
  this.lastSaveTime = null
266
386
  this.initialized = false
387
+ this._offsetRecoveryInProgress = false
388
+ this.writeBufferTotalSize = 0
267
389
 
268
390
 
269
391
  // Initialize managers
@@ -310,10 +432,11 @@ class Database extends EventEmitter {
310
432
 
311
433
  // Validate indexes array (new format) - but only if we have fields
312
434
  if (this.opts.originalIndexes && Array.isArray(this.opts.originalIndexes)) {
313
- if (!this.opts.fields) {
314
- throw new Error('Index fields array requires fields configuration. Use: { fields: {...}, indexes: [...] }')
435
+ if (this.opts.fields) {
436
+ this.validateIndexFields(this.opts.originalIndexes)
437
+ } else if (this.opts.debugMode) {
438
+ console.log('⚠️ Skipping index field validation because no fields configuration was provided')
315
439
  }
316
- this.validateIndexFields(this.opts.originalIndexes)
317
440
  }
318
441
 
319
442
  if (this.opts.debugMode) {
@@ -330,10 +453,14 @@ class Database extends EventEmitter {
330
453
  * Validate field types
331
454
  */
332
455
  validateFieldTypes(fields, configType) {
333
- const supportedTypes = ['string', 'number', 'boolean', 'array:string', 'array:number', 'array:boolean', 'array', 'object']
456
+ const supportedTypes = ['string', 'number', 'boolean', 'array:string', 'array:number', 'array:boolean', 'array', 'object', 'auto']
334
457
  const errors = []
335
458
 
336
459
  for (const [fieldName, fieldType] of Object.entries(fields)) {
460
+ if (fieldType === 'auto') {
461
+ continue
462
+ }
463
+
337
464
  // Check if type is supported
338
465
  if (!supportedTypes.includes(fieldType)) {
339
466
  errors.push(`Unsupported ${configType} type '${fieldType}' for field '${fieldName}'. Supported types: ${supportedTypes.join(', ')}`)
@@ -383,26 +510,24 @@ class Database extends EventEmitter {
383
510
  * Prepare index configuration for IndexManager
384
511
  */
385
512
  prepareIndexConfiguration() {
386
- // Convert new fields/indexes format to legacy format for IndexManager
387
- if (this.opts.fields && Array.isArray(this.opts.indexes)) {
388
- // New format: { fields: {...}, indexes: [...] }
513
+ if (Array.isArray(this.opts.indexes)) {
389
514
  const indexedFields = {}
390
- const originalIndexes = [...this.opts.indexes] // Keep original for validation
391
-
515
+ const originalIndexes = [...this.opts.indexes]
516
+ const hasFieldConfig = this.opts.fields && typeof this.opts.fields === 'object'
517
+
392
518
  for (const fieldName of this.opts.indexes) {
393
- if (this.opts.fields[fieldName]) {
519
+ if (hasFieldConfig && this.opts.fields[fieldName]) {
394
520
  indexedFields[fieldName] = this.opts.fields[fieldName]
521
+ } else {
522
+ indexedFields[fieldName] = 'auto'
395
523
  }
396
524
  }
397
-
398
- // Store original indexes for validation
525
+
399
526
  this.opts.originalIndexes = originalIndexes
400
-
401
- // Replace indexes array with object for IndexManager
402
527
  this.opts.indexes = indexedFields
403
-
528
+
404
529
  if (this.opts.debugMode) {
405
- console.log(`🔍 Converted fields/indexes format: ${Object.keys(indexedFields).join(', ')} [${this.instanceId}]`)
530
+ console.log(`🔍 Normalized indexes array to object: ${Object.keys(indexedFields).join(', ')} [${this.instanceId}]`)
406
531
  }
407
532
  }
408
533
  // Legacy format (indexes as object) is already compatible
@@ -420,6 +545,32 @@ class Database extends EventEmitter {
420
545
  return
421
546
  }
422
547
 
548
+ // Handle legacy 'schema' option migration
549
+ if (this.opts.schema) {
550
+ // If fields is already provided and valid, ignore schema
551
+ if (this.opts.fields && typeof this.opts.fields === 'object' && Object.keys(this.opts.fields).length > 0) {
552
+ if (this.opts.debugMode) {
553
+ console.log(`⚠️ Both 'schema' and 'fields' options provided. Ignoring 'schema' and using 'fields'. [${this.instanceId}]`)
554
+ }
555
+ } else if (Array.isArray(this.opts.schema)) {
556
+ // Schema as array is no longer supported
557
+ throw new Error('The "schema" option as an array is no longer supported. Please use "fields" as an object instead. Example: { fields: { id: "number", name: "string" } }')
558
+ } else if (typeof this.opts.schema === 'object' && this.opts.schema !== null) {
559
+ // Schema as object - migrate to fields
560
+ this.opts.fields = { ...this.opts.schema }
561
+ if (this.opts.debugMode) {
562
+ console.log(`⚠️ Migrated 'schema' option to 'fields'. Please update your code to use 'fields' instead of 'schema'. [${this.instanceId}]`)
563
+ }
564
+ } else {
565
+ throw new Error('The "schema" option must be an object. Example: { schema: { id: "number", name: "string" } }')
566
+ }
567
+ }
568
+
569
+ // Validate that fields is provided (mandatory)
570
+ if (!this.opts.fields || typeof this.opts.fields !== 'object' || Object.keys(this.opts.fields).length === 0) {
571
+ throw new Error('The "fields" option is mandatory and must be an object with at least one field definition. Example: { fields: { id: "number", name: "string" } }')
572
+ }
573
+
423
574
  // CRITICAL FIX: Initialize serializer first - this was missing and causing crashes
424
575
  this.serializer = new Serializer(this.opts)
425
576
 
@@ -436,6 +587,19 @@ class Database extends EventEmitter {
436
587
  this.termManager.termMappingFields = termMappingFields
437
588
  this.opts.termMapping = true // Always enable term mapping for optimal performance
438
589
 
590
+ // Validation: Ensure all array:string indexed fields are in term mapping fields
591
+ if (this.opts.indexes) {
592
+ const arrayStringFields = []
593
+ for (const [field, type] of Object.entries(this.opts.indexes)) {
594
+ if (type === 'array:string' && !termMappingFields.includes(field)) {
595
+ arrayStringFields.push(field)
596
+ }
597
+ }
598
+ if (arrayStringFields.length > 0) {
599
+ console.warn(`⚠️ Warning: The following array:string indexed fields were not added to term mapping: ${arrayStringFields.join(', ')}. This may impact performance.`)
600
+ }
601
+ }
602
+
439
603
  if (this.opts.debugMode) {
440
604
  if (termMappingFields.length > 0) {
441
605
  console.log(`🔍 TermManager initialized for fields: ${termMappingFields.join(', ')} [${this.instanceId}]`)
@@ -462,6 +626,7 @@ class Database extends EventEmitter {
462
626
  this.writeBuffer = []
463
627
  this.writeBufferOffsets = [] // Track offsets for writeBuffer records
464
628
  this.writeBufferSizes = [] // Track sizes for writeBuffer records
629
+ this.writeBufferTotalSize = 0
465
630
  this.isInsideOperationQueue = false // Flag to prevent deadlock in save() calls
466
631
 
467
632
  // Initialize other managers
@@ -483,8 +648,8 @@ class Database extends EventEmitter {
483
648
  const termMappingFields = []
484
649
 
485
650
  for (const [field, type] of Object.entries(this.opts.indexes)) {
486
- // Fields that should use term mapping
487
- if (type === 'array:string' || type === 'string') {
651
+ // Fields that should use term mapping (only array fields)
652
+ if (type === 'array:string') {
488
653
  termMappingFields.push(field)
489
654
  }
490
655
  }
@@ -704,6 +869,9 @@ class Database extends EventEmitter {
704
869
  // Don't load the entire file - just initialize empty state
705
870
  // The actual record count will come from loaded offsets
706
871
  this.writeBuffer = [] // writeBuffer is only for new unsaved records
872
+ this.writeBufferOffsets = []
873
+ this.writeBufferSizes = []
874
+ this.writeBufferTotalSize = 0
707
875
 
708
876
  // recordCount will be determined from loaded offsets
709
877
  // If no offsets were loaded, we'll count records only if needed
@@ -713,13 +881,55 @@ class Database extends EventEmitter {
713
881
  const idxPath = this.normalizedFile.replace('.jdb', '.idx.jdb')
714
882
  try {
715
883
  const idxFileHandler = new FileHandler(idxPath, this.fileMutex, this.opts)
884
+
885
+ // Check if file exists BEFORE trying to read it
886
+ const fileExists = await idxFileHandler.exists()
887
+ if (!fileExists) {
888
+ // File doesn't exist - this will be handled by catch block
889
+ throw new Error('Index file does not exist')
890
+ }
891
+
716
892
  const idxData = await idxFileHandler.readAll()
717
- if (idxData && idxData.trim()) {
893
+
894
+ // If file exists but is empty or has no content, treat as corrupted
895
+ if (!idxData || !idxData.trim()) {
896
+ // File exists but is empty - treat as corrupted
897
+ const fileExists = await this.fileHandler.exists()
898
+ if (fileExists) {
899
+ const stats = await this.fileHandler.getFileStats()
900
+ if (stats && stats.size > 0) {
901
+ // Data file has content but index is empty - corrupted
902
+ if (!this.opts.allowIndexRebuild) {
903
+ throw new Error(
904
+ `Index file is corrupted: ${idxPath} exists but contains no index data, ` +
905
+ `while the data file has ${stats.size} bytes. ` +
906
+ `Set allowIndexRebuild: true to automatically rebuild the index, ` +
907
+ `or manually fix/delete the corrupted index file.`
908
+ )
909
+ }
910
+ // Schedule rebuild if allowed
911
+ if (this.opts.debugMode) {
912
+ console.log(`⚠️ Index file exists but is empty while data file has ${stats.size} bytes - scheduling rebuild`)
913
+ }
914
+ this._scheduleIndexRebuild()
915
+ // Continue execution - rebuild will happen on first query
916
+ // Don't return - let the code continue to load other things if needed
917
+ }
918
+ }
919
+ // If data file is also empty, just continue (no error needed)
920
+ // Don't return - let the code continue to load other things if needed
921
+ } else {
922
+ // File has content - parse and load it
718
923
  const parsedIdxData = JSON.parse(idxData)
719
924
 
720
925
  // Always load offsets if available (even without indexed fields)
721
926
  if (parsedIdxData.offsets && Array.isArray(parsedIdxData.offsets)) {
722
927
  this.offsets = parsedIdxData.offsets
928
+ // CRITICAL FIX: Update IndexManager totalLines to match offsets length
929
+ // This ensures queries and length property work correctly even if offsets are reset later
930
+ if (this.indexManager && this.offsets.length > 0) {
931
+ this.indexManager.setTotalLines(this.offsets.length)
932
+ }
723
933
  if (this.opts.debugMode) {
724
934
  console.log(`📂 Loaded ${this.offsets.length} offsets from ${idxPath}`)
725
935
  }
@@ -733,24 +943,8 @@ class Database extends EventEmitter {
733
943
  }
734
944
  }
735
945
 
736
- // Load index data only if available and we have indexed fields
737
- if (parsedIdxData && parsedIdxData.index && this.indexManager.indexedFields && this.indexManager.indexedFields.length > 0) {
738
- this.indexManager.load(parsedIdxData.index)
739
-
740
- // Load term mapping data from .idx file if it exists
741
- if (parsedIdxData.termMapping && this.termManager) {
742
- await this.termManager.loadTerms(parsedIdxData.termMapping)
743
- if (this.opts.debugMode) {
744
- console.log(`📂 Loaded term mapping from ${idxPath}`)
745
- }
746
- }
747
-
748
- if (this.opts.debugMode) {
749
- console.log(`📂 Loaded index data from ${idxPath}`)
750
- }
751
- }
752
-
753
946
  // Load configuration from .idx file if database exists
947
+ // CRITICAL: Load config FIRST so indexes are available for term mapping detection
754
948
  if (parsedIdxData.config) {
755
949
  const config = parsedIdxData.config
756
950
 
@@ -764,11 +958,94 @@ class Database extends EventEmitter {
764
958
 
765
959
  if (config.indexes) {
766
960
  this.opts.indexes = config.indexes
961
+ if (this.indexManager) {
962
+ this.indexManager.setIndexesConfig(config.indexes)
963
+ }
767
964
  if (this.opts.debugMode) {
768
965
  console.log(`📂 Loaded indexes config from ${idxPath}:`, Object.keys(config.indexes))
769
966
  }
770
967
  }
771
968
 
969
+ // CRITICAL FIX: Update term mapping fields AFTER loading indexes from config
970
+ // This ensures termManager knows which fields use term mapping
971
+ // (getTermMappingFields() was called during init() before indexes were loaded)
972
+ if (this.termManager && config.indexes) {
973
+ const termMappingFields = this.getTermMappingFields()
974
+ this.termManager.termMappingFields = termMappingFields
975
+ if (this.opts.debugMode && termMappingFields.length > 0) {
976
+ console.log(`🔍 Updated term mapping fields after loading indexes: ${termMappingFields.join(', ')}`)
977
+ }
978
+ }
979
+ }
980
+
981
+ // Load term mapping data from .idx file if it exists
982
+ // CRITICAL: Load termMapping even if index is empty (terms are needed for queries)
983
+ // NOTE: termMappingFields should already be set above from config.indexes
984
+ if (parsedIdxData.termMapping && this.termManager && this.termManager.termMappingFields && this.termManager.termMappingFields.length > 0) {
985
+ await this.termManager.loadTerms(parsedIdxData.termMapping)
986
+ if (this.opts.debugMode) {
987
+ console.log(`📂 Loaded term mapping from ${idxPath} (${Object.keys(parsedIdxData.termMapping).length} terms)`)
988
+ }
989
+ }
990
+
991
+ // Load index data only if available and we have indexed fields
992
+ if (parsedIdxData && parsedIdxData.index && this.indexManager.indexedFields && this.indexManager.indexedFields.length > 0) {
993
+ this.indexManager.load(parsedIdxData.index)
994
+
995
+ if (this.opts.debugMode) {
996
+ console.log(`📂 Loaded index data from ${idxPath}`)
997
+ }
998
+
999
+ // Check if loaded index is actually empty (corrupted)
1000
+ let hasAnyIndexData = false
1001
+ for (const field of this.indexManager.indexedFields) {
1002
+ if (this.indexManager.hasUsableIndexData(field)) {
1003
+ hasAnyIndexData = true
1004
+ break
1005
+ }
1006
+ }
1007
+
1008
+ if (this.opts.debugMode) {
1009
+ console.log(`📊 Index check: hasAnyIndexData=${hasAnyIndexData}, indexedFields=${this.indexManager.indexedFields.join(',')}`)
1010
+ }
1011
+
1012
+ // Schedule rebuild if index is empty AND file exists with data
1013
+ if (!hasAnyIndexData) {
1014
+ // Check if the actual .jdb file has data
1015
+ const fileExists = await this.fileHandler.exists()
1016
+ if (this.opts.debugMode) {
1017
+ console.log(`📊 File check: exists=${fileExists}`)
1018
+ }
1019
+ if (fileExists) {
1020
+ const stats = await this.fileHandler.getFileStats()
1021
+ if (this.opts.debugMode) {
1022
+ console.log(`📊 File stats: size=${stats?.size}`)
1023
+ }
1024
+ if (stats && stats.size > 0) {
1025
+ // File has data but index is empty - corrupted index detected
1026
+ if (!this.opts.allowIndexRebuild) {
1027
+ const idxPath = this.normalizedFile.replace('.jdb', '.idx.jdb')
1028
+ throw new Error(
1029
+ `Index file is corrupted: ${idxPath} exists but contains no index data, ` +
1030
+ `while the data file has ${stats.size} bytes. ` +
1031
+ `Set allowIndexRebuild: true to automatically rebuild the index, ` +
1032
+ `or manually fix/delete the corrupted index file.`
1033
+ )
1034
+ }
1035
+ // Schedule rebuild if allowed
1036
+ if (this.opts.debugMode) {
1037
+ console.log(`⚠️ Index loaded but empty while file has ${stats.size} bytes - scheduling rebuild`)
1038
+ }
1039
+ this._scheduleIndexRebuild()
1040
+ }
1041
+ }
1042
+ }
1043
+ }
1044
+
1045
+ // Continue with remaining config loading
1046
+ if (parsedIdxData.config) {
1047
+ const config = parsedIdxData.config
1048
+
772
1049
  if (config.originalIndexes) {
773
1050
  this.opts.originalIndexes = config.originalIndexes
774
1051
  if (this.opts.debugMode) {
@@ -776,22 +1053,107 @@ class Database extends EventEmitter {
776
1053
  }
777
1054
  }
778
1055
 
779
- // Reinitialize schema from saved configuration
780
- if (config.schema && this.serializer) {
1056
+ // Reinitialize schema from saved configuration (only if fields not provided)
1057
+ // Note: fields option takes precedence over saved schema
1058
+ if (!this.opts.fields && config.schema && this.serializer) {
781
1059
  this.serializer.initializeSchema(config.schema)
782
1060
  if (this.opts.debugMode) {
783
1061
  console.log(`📂 Loaded schema from ${idxPath}:`, config.schema.join(', '))
784
1062
  }
1063
+ } else if (this.opts.fields && this.serializer) {
1064
+ // Use fields option instead of saved schema
1065
+ const fieldNames = Object.keys(this.opts.fields)
1066
+ if (fieldNames.length > 0) {
1067
+ this.serializer.initializeSchema(fieldNames)
1068
+ if (this.opts.debugMode) {
1069
+ console.log(`📂 Schema initialized from fields option:`, fieldNames.join(', '))
1070
+ }
1071
+ }
785
1072
  }
786
1073
  }
787
1074
  }
788
1075
  } catch (idxError) {
789
1076
  // Index file doesn't exist or is corrupted, rebuild from data
1077
+ // BUT: if error is about rebuild being disabled, re-throw it immediately
1078
+ if (idxError.message && (idxError.message.includes('allowIndexRebuild') || idxError.message.includes('corrupted'))) {
1079
+ throw idxError
1080
+ }
1081
+
1082
+ // If error is "Index file does not exist", check if we should throw or rebuild
1083
+ if (idxError.message && idxError.message.includes('does not exist')) {
1084
+ // Check if the actual .jdb file has data that needs indexing
1085
+ try {
1086
+ const fileExists = await this.fileHandler.exists()
1087
+ if (fileExists) {
1088
+ const stats = await this.fileHandler.getFileStats()
1089
+ if (stats && stats.size > 0) {
1090
+ // File has data but index is missing
1091
+ if (!this.opts.allowIndexRebuild) {
1092
+ const idxPath = this.normalizedFile.replace('.jdb', '.idx.jdb')
1093
+ throw new Error(
1094
+ `Index file is missing or corrupted: ${idxPath} does not exist or is invalid, ` +
1095
+ `while the data file has ${stats.size} bytes. ` +
1096
+ `Set allowIndexRebuild: true to automatically rebuild the index, ` +
1097
+ `or manually create/fix the index file.`
1098
+ )
1099
+ }
1100
+ // Schedule rebuild if allowed
1101
+ if (this.opts.debugMode) {
1102
+ console.log(`⚠️ .jdb file has ${stats.size} bytes but index missing - scheduling rebuild`)
1103
+ }
1104
+ this._scheduleIndexRebuild()
1105
+ return // Exit early
1106
+ }
1107
+ }
1108
+ } catch (statsError) {
1109
+ if (this.opts.debugMode) {
1110
+ console.log('⚠️ Could not check file stats:', statsError.message)
1111
+ }
1112
+ // Re-throw if it's our error about rebuild being disabled
1113
+ if (statsError.message && statsError.message.includes('allowIndexRebuild')) {
1114
+ throw statsError
1115
+ }
1116
+ }
1117
+ // If no data file or empty, just continue (no error needed)
1118
+ return
1119
+ }
1120
+
790
1121
  if (this.opts.debugMode) {
791
- console.log('📂 No index file found, rebuilding indexes from data')
1122
+ console.log('📂 No index file found or corrupted, checking if rebuild is needed...')
1123
+ }
1124
+
1125
+ // Check if the actual .jdb file has data that needs indexing
1126
+ try {
1127
+ const fileExists = await this.fileHandler.exists()
1128
+ if (fileExists) {
1129
+ const stats = await this.fileHandler.getFileStats()
1130
+ if (stats && stats.size > 0) {
1131
+ // File has data but index is missing or corrupted
1132
+ if (!this.opts.allowIndexRebuild) {
1133
+ const idxPath = this.normalizedFile.replace('.jdb', '.idx.jdb')
1134
+ throw new Error(
1135
+ `Index file is missing or corrupted: ${idxPath} does not exist or is invalid, ` +
1136
+ `while the data file has ${stats.size} bytes. ` +
1137
+ `Set allowIndexRebuild: true to automatically rebuild the index, ` +
1138
+ `or manually create/fix the index file.`
1139
+ )
1140
+ }
1141
+ // Schedule rebuild if allowed
1142
+ if (this.opts.debugMode) {
1143
+ console.log(`⚠️ .jdb file has ${stats.size} bytes but index missing - scheduling rebuild`)
1144
+ }
1145
+ this._scheduleIndexRebuild()
1146
+ }
1147
+ }
1148
+ } catch (statsError) {
1149
+ if (this.opts.debugMode) {
1150
+ console.log('⚠️ Could not check file stats:', statsError.message)
1151
+ }
1152
+ // Re-throw if it's our error about rebuild being disabled
1153
+ if (statsError.message && statsError.message.includes('allowIndexRebuild')) {
1154
+ throw statsError
1155
+ }
792
1156
  }
793
- // We can't rebuild index without violating no-memory-storage rule
794
- // Index will be rebuilt as needed during queries
795
1157
  }
796
1158
  } else {
797
1159
  // No indexed fields, no need to rebuild indexes
@@ -820,6 +1182,28 @@ class Database extends EventEmitter {
820
1182
  console.log(`💾 save() called: writeBuffer.length=${this.writeBuffer.length}, offsets.length=${this.offsets.length}`)
821
1183
  }
822
1184
 
1185
+ // CRITICAL FIX: Wait for all active insert sessions to complete their auto-flushes
1186
+ // This prevents race conditions where save() writes data while auto-flushes are still adding to writeBuffer
1187
+ if (this.activeInsertSessions && this.activeInsertSessions.size > 0) {
1188
+ if (this.opts.debugMode) {
1189
+ console.log(`⏳ save(): Waiting for ${this.activeInsertSessions.size} active insert sessions to complete auto-flushes`)
1190
+ }
1191
+
1192
+ const sessionPromises = Array.from(this.activeInsertSessions).map(session =>
1193
+ session.waitForAutoFlushes().catch(err => {
1194
+ if (this.opts.debugMode) {
1195
+ console.warn(`⚠️ save(): Error waiting for insert session: ${err.message}`)
1196
+ }
1197
+ })
1198
+ )
1199
+
1200
+ await Promise.all(sessionPromises)
1201
+
1202
+ if (this.opts.debugMode) {
1203
+ console.log(`✅ save(): All insert sessions completed auto-flushes`)
1204
+ }
1205
+ }
1206
+
823
1207
  // Auto-save removed - no need to pause anything
824
1208
 
825
1209
  try {
@@ -915,7 +1299,8 @@ class Database extends EventEmitter {
915
1299
 
916
1300
  // CRITICAL FIX: Capture writeBuffer and deletedIds at the start to prevent race conditions
917
1301
  const writeBufferSnapshot = [...this.writeBuffer]
918
- const deletedIdsSnapshot = new Set(this.deletedIds)
1302
+ // CRITICAL FIX: Normalize deleted IDs to strings for consistent comparison
1303
+ const deletedIdsSnapshot = new Set(Array.from(this.deletedIds).map(id => String(id)))
919
1304
 
920
1305
  // OPTIMIZATION: Process pending index updates in batch before save
921
1306
  if (this.pendingIndexUpdates && this.pendingIndexUpdates.length > 0) {
@@ -964,10 +1349,12 @@ class Database extends EventEmitter {
964
1349
  let orphanedCount = 0
965
1350
 
966
1351
  // Check if there are new records to save (after flush, writeBuffer should be empty)
1352
+ // CRITICAL FIX: Also check writeBufferSnapshot.length > 0 to handle updates/deletes
1353
+ // that were in writeBuffer before flush but are now in snapshot
967
1354
  if (this.opts.debugMode) {
968
1355
  console.log(`💾 Save: writeBuffer.length=${this.writeBuffer.length}, writeBufferSnapshot.length=${writeBufferSnapshot.length}`)
969
1356
  }
970
- if (this.writeBuffer.length > 0) {
1357
+ if (this.writeBuffer.length > 0 || writeBufferSnapshot.length > 0) {
971
1358
  if (this.opts.debugMode) {
972
1359
  console.log(`💾 Save: WriteBuffer has ${writeBufferSnapshot.length} records, using streaming approach`)
973
1360
  }
@@ -1001,21 +1388,20 @@ class Database extends EventEmitter {
1001
1388
  // Add streaming operation
1002
1389
  parallelOperations.push(
1003
1390
  this._streamExistingRecords(deletedIdsSnapshot, writeBufferSnapshot).then(existingRecords => {
1391
+ // CRITICAL FIX: _streamExistingRecords already handles updates via updatedRecordsMap
1392
+ // So existingRecords already contains updated records from writeBufferSnapshot
1393
+ // We only need to add records from writeBufferSnapshot that are NEW (not updates)
1004
1394
  allData = [...existingRecords]
1005
1395
 
1006
- // OPTIMIZATION: Use Map for faster lookups
1007
- const existingRecordMap = new Map(existingRecords.filter(r => r && r.id).map(r => [r.id, r]))
1396
+ // OPTIMIZATION: Use Set for faster lookups of existing record IDs
1397
+ // CRITICAL FIX: Normalize IDs to strings for consistent comparison
1398
+ const existingRecordIds = new Set(existingRecords.filter(r => r && r.id).map(r => String(r.id)))
1008
1399
 
1400
+ // Add only NEW records from writeBufferSnapshot (not updates, as those are already in existingRecords)
1009
1401
  for (const record of writeBufferSnapshot) {
1010
- if (!deletedIdsSnapshot.has(record.id)) {
1011
- if (existingRecordMap.has(record.id)) {
1012
- // Replace existing record
1013
- const existingIndex = allData.findIndex(r => r.id === record.id)
1014
- allData[existingIndex] = record
1015
- } else {
1016
- // Add new record
1017
- allData.push(record)
1018
- }
1402
+ if (record && record.id && !deletedIdsSnapshot.has(String(record.id)) && !existingRecordIds.has(String(record.id))) {
1403
+ // This is a new record, not an update
1404
+ allData.push(record)
1019
1405
  }
1020
1406
  }
1021
1407
  })
@@ -1060,15 +1446,43 @@ class Database extends EventEmitter {
1060
1446
  console.log(`💾 Save: _streamExistingRecords returned ${existingRecords.length} records`)
1061
1447
  console.log(`💾 Save: existingRecords:`, existingRecords)
1062
1448
  }
1063
- // Combine existing records with new records from writeBuffer
1064
- allData = [...existingRecords, ...writeBufferSnapshot.filter(record => !deletedIdsSnapshot.has(record.id))]
1449
+ // CRITICAL FIX: _streamExistingRecords already handles updates via updatedRecordsMap
1450
+ // So existingRecords already contains updated records from writeBufferSnapshot
1451
+ // We only need to add records from writeBufferSnapshot that are NEW (not updates)
1452
+ allData = [...existingRecords]
1453
+
1454
+ // OPTIMIZATION: Use Set for faster lookups of existing record IDs
1455
+ const existingRecordIds = new Set(existingRecords.filter(r => r && r.id).map(r => r.id))
1456
+
1457
+ // Add only NEW records from writeBufferSnapshot (not updates, as those are already in existingRecords)
1458
+ for (const record of writeBufferSnapshot) {
1459
+ if (record && record.id && !deletedIdsSnapshot.has(String(record.id)) && !existingRecordIds.has(record.id)) {
1460
+ // This is a new record, not an update
1461
+ allData.push(record)
1462
+ }
1463
+ }
1464
+
1465
+ if (this.opts.debugMode) {
1466
+ const updatedCount = writeBufferSnapshot.filter(r => r && r.id && existingRecordIds.has(String(r.id))).length
1467
+ const newCount = writeBufferSnapshot.filter(r => r && r.id && !existingRecordIds.has(String(r.id))).length
1468
+ console.log(`💾 Save: Combined data - existingRecords: ${existingRecords.length}, updatedFromBuffer: ${updatedCount}, newFromBuffer: ${newCount}, total: ${allData.length}`)
1469
+ console.log(`💾 Save: WriteBuffer record IDs:`, writeBufferSnapshot.map(r => r && r.id ? r.id : 'no-id'))
1470
+ console.log(`💾 Save: Existing record IDs:`, Array.from(existingRecordIds))
1471
+ console.log(`💾 Save: Sample existing record:`, existingRecords[0] ? { id: existingRecords[0].id, name: existingRecords[0].name, tags: existingRecords[0].tags } : 'null')
1472
+ console.log(`💾 Save: Sample writeBuffer record:`, writeBufferSnapshot[0] ? { id: writeBufferSnapshot[0].id, name: writeBufferSnapshot[0].name, tags: writeBufferSnapshot[0].tags } : 'null')
1473
+ }
1065
1474
  }).catch(error => {
1066
1475
  if (this.opts.debugMode) {
1067
1476
  console.log(`💾 Save: _streamExistingRecords failed:`, error.message)
1068
1477
  }
1069
1478
  // CRITICAL FIX: Use safe fallback to preserve existing data instead of losing it
1070
1479
  return this._loadExistingRecordsFallback(deletedIdsSnapshot, writeBufferSnapshot).then(fallbackRecords => {
1071
- allData = [...fallbackRecords, ...writeBufferSnapshot.filter(record => !deletedIdsSnapshot.has(record.id))]
1480
+ // CRITICAL FIX: Avoid duplicating updated records
1481
+ const fallbackRecordIds = new Set(fallbackRecords.map(r => r.id))
1482
+ const newRecordsFromBuffer = writeBufferSnapshot.filter(record =>
1483
+ !deletedIdsSnapshot.has(String(record.id)) && !fallbackRecordIds.has(record.id)
1484
+ )
1485
+ allData = [...fallbackRecords, ...newRecordsFromBuffer]
1072
1486
  if (this.opts.debugMode) {
1073
1487
  console.log(`💾 Save: Fallback preserved ${fallbackRecords.length} existing records, total: ${allData.length}`)
1074
1488
  }
@@ -1078,7 +1492,7 @@ class Database extends EventEmitter {
1078
1492
  console.log(`💾 Save: CRITICAL - Data loss may occur, only writeBuffer will be saved`)
1079
1493
  }
1080
1494
  // Last resort: at least save what we have in writeBuffer
1081
- allData = writeBufferSnapshot.filter(record => !deletedIdsSnapshot.has(record.id))
1495
+ allData = writeBufferSnapshot.filter(record => !deletedIdsSnapshot.has(String(record.id)))
1082
1496
  })
1083
1497
  })
1084
1498
  )
@@ -1092,7 +1506,12 @@ class Database extends EventEmitter {
1092
1506
  // CRITICAL FIX: Use safe fallback to preserve existing data instead of losing it
1093
1507
  try {
1094
1508
  const fallbackRecords = await this._loadExistingRecordsFallback(deletedIdsSnapshot, writeBufferSnapshot)
1095
- allData = [...fallbackRecords, ...writeBufferSnapshot.filter(record => !deletedIdsSnapshot.has(record.id))]
1509
+ // CRITICAL FIX: Avoid duplicating updated records
1510
+ const fallbackRecordIds = new Set(fallbackRecords.map(r => r.id))
1511
+ const newRecordsFromBuffer = writeBufferSnapshot.filter(record =>
1512
+ !deletedIdsSnapshot.has(String(record.id)) && !fallbackRecordIds.has(record.id)
1513
+ )
1514
+ allData = [...fallbackRecords, ...newRecordsFromBuffer]
1096
1515
  if (this.opts.debugMode) {
1097
1516
  console.log(`💾 Save: Fallback preserved ${fallbackRecords.length} existing records, total: ${allData.length}`)
1098
1517
  }
@@ -1102,23 +1521,46 @@ class Database extends EventEmitter {
1102
1521
  console.log(`💾 Save: CRITICAL - Data loss may occur, only writeBuffer will be saved`)
1103
1522
  }
1104
1523
  // Last resort: at least save what we have in writeBuffer
1105
- allData = writeBufferSnapshot.filter(record => !deletedIdsSnapshot.has(record.id))
1524
+ allData = writeBufferSnapshot.filter(record => !deletedIdsSnapshot.has(String(record.id)))
1106
1525
  }
1107
1526
  }
1108
1527
  } else {
1109
1528
  // No existing data, use only writeBuffer
1110
- allData = writeBufferSnapshot.filter(record => !deletedIdsSnapshot.has(record.id))
1529
+ allData = writeBufferSnapshot.filter(record => !deletedIdsSnapshot.has(String(record.id)))
1111
1530
  }
1112
1531
  }
1113
1532
 
1114
1533
  // CRITICAL FIX: Calculate offsets based on actual serialized data that will be written
1115
1534
  // This ensures consistency between offset calculation and file writing
1116
- const jsonlData = allData.length > 0
1117
- ? this.serializer.serializeBatch(allData)
1535
+ // CRITICAL FIX: Remove term IDs before serialization to ensure proper serialization
1536
+ const cleanedData = allData.map(record => {
1537
+ if (!record || typeof record !== 'object') {
1538
+ if (this.opts.debugMode) {
1539
+ console.log(`💾 Save: WARNING - Invalid record in allData:`, record)
1540
+ }
1541
+ return record
1542
+ }
1543
+ return this.removeTermIdsForSerialization(record)
1544
+ })
1545
+
1546
+ if (this.opts.debugMode) {
1547
+ console.log(`💾 Save: allData.length=${allData.length}, cleanedData.length=${cleanedData.length}`)
1548
+ console.log(`💾 Save: Sample cleaned record:`, cleanedData[0] ? Object.keys(cleanedData[0]) : 'null')
1549
+ }
1550
+
1551
+ const jsonlData = cleanedData.length > 0
1552
+ ? this.serializer.serializeBatch(cleanedData)
1118
1553
  : ''
1119
1554
  const jsonlString = jsonlData.toString('utf8')
1120
1555
  const lines = jsonlString.split('\n').filter(line => line.trim())
1121
1556
 
1557
+ if (this.opts.debugMode) {
1558
+ console.log(`💾 Save: Serialized ${lines.length} lines`)
1559
+ if (lines.length > 0) {
1560
+ console.log(`💾 Save: First line (first 200 chars):`, lines[0].substring(0, 200))
1561
+ }
1562
+ }
1563
+
1122
1564
  this.offsets = []
1123
1565
  let currentOffset = 0
1124
1566
  for (let i = 0; i < lines.length; i++) {
@@ -1199,19 +1641,53 @@ class Database extends EventEmitter {
1199
1641
 
1200
1642
  // Clear writeBuffer and deletedIds after successful save only if we had data to save
1201
1643
  if (allData.length > 0) {
1202
- // Rebuild index when records were deleted to maintain consistency
1644
+ // Rebuild index when records were deleted or updated to maintain consistency
1203
1645
  const hadDeletedRecords = deletedIdsSnapshot.size > 0
1646
+ const hadUpdatedRecords = writeBufferSnapshot.length > 0
1204
1647
  if (this.indexManager && this.indexManager.indexedFields && this.indexManager.indexedFields.length > 0) {
1205
- if (hadDeletedRecords) {
1206
- // Clear the index and rebuild it from the remaining records
1648
+ if (hadDeletedRecords || hadUpdatedRecords) {
1649
+ // Clear the index and rebuild it from the saved records
1650
+ // This ensures that lineNumbers point to the correct positions in the file
1207
1651
  this.indexManager.clear()
1208
1652
  if (this.opts.debugMode) {
1209
- console.log(`🧹 Rebuilding index after removing ${deletedIdsSnapshot.size} deleted records`)
1653
+ if (hadDeletedRecords && hadUpdatedRecords) {
1654
+ console.log(`🧹 Rebuilding index after removing ${deletedIdsSnapshot.size} deleted records and updating ${writeBufferSnapshot.length} records`)
1655
+ } else if (hadDeletedRecords) {
1656
+ console.log(`🧹 Rebuilding index after removing ${deletedIdsSnapshot.size} deleted records`)
1657
+ } else {
1658
+ console.log(`🧹 Rebuilding index after updating ${writeBufferSnapshot.length} records`)
1659
+ }
1210
1660
  }
1211
1661
 
1212
1662
  // Rebuild index from the saved records
1663
+ // CRITICAL: Process term mapping for records loaded from file to ensure ${field}Ids are available
1213
1664
  for (let i = 0; i < allData.length; i++) {
1214
- const record = allData[i]
1665
+ let record = allData[i]
1666
+
1667
+ // CRITICAL FIX: Ensure records have ${field}Ids for term mapping fields
1668
+ // Records from writeBuffer already have ${field}Ids from processTermMapping
1669
+ // Records from file need to be processed to restore ${field}Ids
1670
+ const termMappingFields = this.getTermMappingFields()
1671
+ if (termMappingFields.length > 0 && this.termManager) {
1672
+ for (const field of termMappingFields) {
1673
+ if (record[field] && Array.isArray(record[field])) {
1674
+ // Check if field contains term IDs (numbers) or terms (strings)
1675
+ const firstValue = record[field][0]
1676
+ if (typeof firstValue === 'number') {
1677
+ // Already term IDs, create ${field}Ids
1678
+ record[`${field}Ids`] = record[field]
1679
+ } else if (typeof firstValue === 'string') {
1680
+ // Terms, need to convert to term IDs
1681
+ const termIds = record[field].map(term => {
1682
+ const termId = this.termManager.getTermIdWithoutIncrement(term)
1683
+ return termId !== undefined ? termId : this.termManager.getTermId(term)
1684
+ })
1685
+ record[`${field}Ids`] = termIds
1686
+ }
1687
+ }
1688
+ }
1689
+ }
1690
+
1215
1691
  await this.indexManager.add(record, i)
1216
1692
  }
1217
1693
  }
@@ -1247,6 +1723,8 @@ class Database extends EventEmitter {
1247
1723
  this.writeBuffer = []
1248
1724
  this.writeBufferOffsets = []
1249
1725
  this.writeBufferSizes = []
1726
+ this.writeBufferTotalSize = 0
1727
+ this.writeBufferTotalSize = 0
1250
1728
  }
1251
1729
 
1252
1730
  // indexOffset already set correctly to currentOffset (total file size) above
@@ -1299,12 +1777,21 @@ class Database extends EventEmitter {
1299
1777
  this.termManager.decrementTermCount(termId)
1300
1778
  }
1301
1779
  } else if (oldRecord[field] && Array.isArray(oldRecord[field])) {
1302
- // Use terms to decrement (fallback for backward compatibility)
1303
- for (const term of oldRecord[field]) {
1304
- const termId = this.termManager.termToId.get(term)
1305
- if (termId) {
1780
+ // Check if field contains term IDs (numbers) or terms (strings)
1781
+ const firstValue = oldRecord[field][0]
1782
+ if (typeof firstValue === 'number') {
1783
+ // Field contains term IDs (from find with restoreTerms: false)
1784
+ for (const termId of oldRecord[field]) {
1306
1785
  this.termManager.decrementTermCount(termId)
1307
1786
  }
1787
+ } else if (typeof firstValue === 'string') {
1788
+ // Field contains terms (strings) - convert to term IDs
1789
+ for (const term of oldRecord[field]) {
1790
+ const termId = this.termManager.termToId.get(term)
1791
+ if (termId) {
1792
+ this.termManager.decrementTermCount(termId)
1793
+ }
1794
+ }
1308
1795
  }
1309
1796
  }
1310
1797
  }
@@ -1465,18 +1952,16 @@ class Database extends EventEmitter {
1465
1952
  }
1466
1953
 
1467
1954
  // OPTIMIZATION: Process records using pre-computed term IDs
1468
- return records.map(record => {
1469
- const processedRecord = { ...record }
1470
-
1955
+ for (const record of records) {
1471
1956
  for (const field of termMappingFields) {
1472
1957
  if (record[field] && Array.isArray(record[field])) {
1473
1958
  const termIds = record[field].map(term => termIdMap.get(term))
1474
- processedRecord[`${field}Ids`] = termIds
1959
+ record[`${field}Ids`] = termIds
1475
1960
  }
1476
1961
  }
1477
-
1478
- return processedRecord
1479
- })
1962
+ }
1963
+
1964
+ return records
1480
1965
  }
1481
1966
 
1482
1967
 
@@ -1559,6 +2044,7 @@ class Database extends EventEmitter {
1559
2044
  }
1560
2045
 
1561
2046
  // Apply schema enforcement - convert to array format and back to enforce schema
2047
+ // This will discard any fields not in the schema
1562
2048
  const schemaEnforcedRecord = this.applySchemaEnforcement(record)
1563
2049
 
1564
2050
  // Don't store in this.data - only use writeBuffer and index
@@ -1570,18 +2056,19 @@ class Database extends EventEmitter {
1570
2056
  // OPTIMIZATION: Calculate and store offset and size for writeBuffer record
1571
2057
  // SPACE OPTIMIZATION: Remove term IDs before serialization
1572
2058
  const cleanRecord = this.removeTermIdsForSerialization(record)
1573
- const recordJson = this.serializer.serialize(cleanRecord).toString('utf8')
1574
- const recordSize = Buffer.byteLength(recordJson, 'utf8')
2059
+ const recordBuffer = this.serializer.serialize(cleanRecord)
2060
+ const recordSize = recordBuffer.length
1575
2061
 
1576
2062
  // Calculate offset based on end of file + previous writeBuffer sizes
1577
- const previousWriteBufferSize = this.writeBufferSizes.reduce((sum, size) => sum + size, 0)
2063
+ const previousWriteBufferSize = this.writeBufferTotalSize
1578
2064
  const recordOffset = this.indexOffset + previousWriteBufferSize
1579
2065
 
1580
2066
  this.writeBufferOffsets.push(recordOffset)
1581
2067
  this.writeBufferSizes.push(recordSize)
2068
+ this.writeBufferTotalSize += recordSize
1582
2069
 
1583
- // OPTIMIZATION: Use the current writeBuffer size as the line number (0-based index)
1584
- const lineNumber = this.writeBuffer.length - 1
2070
+ // OPTIMIZATION: Use the absolute line number (persisted records + writeBuffer index)
2071
+ const lineNumber = this._getAbsoluteLineNumber(this.writeBuffer.length - 1)
1585
2072
 
1586
2073
  // OPTIMIZATION: Defer index updates to batch processing
1587
2074
  // Store the record for batch index processing
@@ -1652,7 +2139,7 @@ class Database extends EventEmitter {
1652
2139
  console.log(`💾 _insertBatchInternal: processing size=${dataArray.length}, startWriteBuffer=${this.writeBuffer.length}`)
1653
2140
  }
1654
2141
  const records = []
1655
- const startLineNumber = this.writeBuffer.length
2142
+ const existingWriteBufferLength = this.writeBuffer.length
1656
2143
 
1657
2144
  // Initialize schema if not already done (auto-detect from first record)
1658
2145
  if (this.serializer && !this.serializer.schemaManager.isInitialized && dataArray.length > 0) {
@@ -1684,13 +2171,13 @@ class Database extends EventEmitter {
1684
2171
  this.writeBuffer.push(...schemaEnforcedRecords)
1685
2172
 
1686
2173
  // OPTIMIZATION: Calculate offsets and sizes in batch (O(n))
1687
- let runningTotalSize = this.writeBufferSizes.reduce((sum, size) => sum + size, 0)
2174
+ let runningTotalSize = this.writeBufferTotalSize
1688
2175
  for (let i = 0; i < processedRecords.length; i++) {
1689
2176
  const record = processedRecords[i]
1690
2177
  // SPACE OPTIMIZATION: Remove term IDs before serialization
1691
2178
  const cleanRecord = this.removeTermIdsForSerialization(record)
1692
- const recordJson = this.serializer.serialize(cleanRecord).toString('utf8')
1693
- const recordSize = Buffer.byteLength(recordJson, 'utf8')
2179
+ const recordBuffer = this.serializer.serialize(cleanRecord)
2180
+ const recordSize = recordBuffer.length
1694
2181
 
1695
2182
  const recordOffset = this.indexOffset + runningTotalSize
1696
2183
  runningTotalSize += recordSize
@@ -1698,6 +2185,7 @@ class Database extends EventEmitter {
1698
2185
  this.writeBufferOffsets.push(recordOffset)
1699
2186
  this.writeBufferSizes.push(recordSize)
1700
2187
  }
2188
+ this.writeBufferTotalSize = runningTotalSize
1701
2189
 
1702
2190
  // OPTIMIZATION: Batch process index updates
1703
2191
  if (!this.pendingIndexUpdates) {
@@ -1705,7 +2193,7 @@ class Database extends EventEmitter {
1705
2193
  }
1706
2194
 
1707
2195
  for (let i = 0; i < processedRecords.length; i++) {
1708
- const lineNumber = startLineNumber + i
2196
+ const lineNumber = this._getAbsoluteLineNumber(existingWriteBufferLength + i)
1709
2197
  this.pendingIndexUpdates.push({ record: processedRecords[i], lineNumber })
1710
2198
  }
1711
2199
 
@@ -1745,7 +2233,7 @@ class Database extends EventEmitter {
1745
2233
  try {
1746
2234
  // Validate indexed query mode if enabled
1747
2235
  if (this.opts.indexedQueryMode === 'strict') {
1748
- this._validateIndexedQuery(criteria)
2236
+ this._validateIndexedQuery(criteria, options)
1749
2237
  }
1750
2238
 
1751
2239
  // Get results from file (QueryManager already handles term ID restoration)
@@ -1820,8 +2308,15 @@ class Database extends EventEmitter {
1820
2308
  /**
1821
2309
  * Validate indexed query mode for strict mode
1822
2310
  * @private
2311
+ * @param {Object} criteria - Query criteria
2312
+ * @param {Object} options - Query options
1823
2313
  */
1824
- _validateIndexedQuery(criteria) {
2314
+ _validateIndexedQuery(criteria, options = {}) {
2315
+ // Allow bypassing strict mode validation with allowNonIndexed option
2316
+ if (options.allowNonIndexed === true) {
2317
+ return; // Skip validation for this query
2318
+ }
2319
+
1825
2320
  if (!criteria || typeof criteria !== 'object') {
1826
2321
  return // Allow null/undefined criteria
1827
2322
  }
@@ -2094,23 +2589,26 @@ class Database extends EventEmitter {
2094
2589
  }
2095
2590
  }
2096
2591
 
2097
- // Update record in writeBuffer or add to writeBuffer if not present
2592
+ // CRITICAL FIX: Update record in writeBuffer or add to writeBuffer if not present
2593
+ // For records in the file, we need to ensure they are properly marked for replacement
2098
2594
  const index = this.writeBuffer.findIndex(r => r.id === record.id)
2099
2595
  let lineNumber = null
2596
+
2100
2597
  if (index !== -1) {
2101
2598
  // Record is already in writeBuffer, update it
2102
2599
  this.writeBuffer[index] = updated
2103
- lineNumber = index
2600
+ lineNumber = this._getAbsoluteLineNumber(index)
2104
2601
  if (this.opts.debugMode) {
2105
2602
  console.log(`🔄 UPDATE: Updated existing writeBuffer record at index ${index}`)
2106
2603
  }
2107
2604
  } else {
2108
2605
  // Record is in file, add updated version to writeBuffer
2109
- // This will ensure the updated record is saved and replaces the file version
2606
+ // CRITICAL FIX: Ensure the old record in file will be replaced by checking if it exists in offsets
2607
+ // The save() method will handle replacement via _streamExistingRecords which checks updatedRecordsMap
2110
2608
  this.writeBuffer.push(updated)
2111
- lineNumber = this.writeBuffer.length - 1
2609
+ lineNumber = this._getAbsoluteLineNumber(this.writeBuffer.length - 1)
2112
2610
  if (this.opts.debugMode) {
2113
- console.log(`🔄 UPDATE: Added new record to writeBuffer at index ${lineNumber}`)
2611
+ console.log(`🔄 UPDATE: Added updated record to writeBuffer (will replace file record ${record.id})`)
2114
2612
  }
2115
2613
  }
2116
2614
 
@@ -2245,16 +2743,7 @@ class Database extends EventEmitter {
2245
2743
  return
2246
2744
  }
2247
2745
 
2248
- // Try to get schema from options first
2249
- if (this.opts.schema && Array.isArray(this.opts.schema)) {
2250
- this.serializer.initializeSchema(this.opts.schema)
2251
- if (this.opts.debugMode) {
2252
- console.log(`🔍 Schema initialized from options: ${this.opts.schema.join(', ')} [${this.instanceId}]`)
2253
- }
2254
- return
2255
- }
2256
-
2257
- // Try to initialize from fields configuration (new format)
2746
+ // Initialize from fields configuration (mandatory)
2258
2747
  if (this.opts.fields && typeof this.opts.fields === 'object') {
2259
2748
  const fieldNames = Object.keys(this.opts.fields)
2260
2749
  if (fieldNames.length > 0) {
@@ -2266,7 +2755,7 @@ class Database extends EventEmitter {
2266
2755
  }
2267
2756
  }
2268
2757
 
2269
- // Try to auto-detect schema from existing data
2758
+ // Try to auto-detect schema from existing data (fallback for migration scenarios)
2270
2759
  if (this.data && this.data.length > 0) {
2271
2760
  this.serializer.initializeSchema(this.data, true) // autoDetect = true
2272
2761
  if (this.opts.debugMode) {
@@ -2275,10 +2764,6 @@ class Database extends EventEmitter {
2275
2764
  return
2276
2765
  }
2277
2766
 
2278
- // CRITICAL FIX: Don't initialize schema from indexes
2279
- // This was causing data loss because only indexed fields were preserved
2280
- // Let schema be auto-detected from actual data instead
2281
-
2282
2767
  if (this.opts.debugMode) {
2283
2768
  console.log(`🔍 No schema initialization possible - will auto-detect on first insert [${this.instanceId}]`)
2284
2769
  }
@@ -2294,6 +2779,21 @@ class Database extends EventEmitter {
2294
2779
  const savedRecords = this.offsets.length
2295
2780
  const writeBufferRecords = this.writeBuffer.length
2296
2781
 
2782
+ // CRITICAL FIX: If offsets are empty but indexOffset exists, use fallback calculation
2783
+ // This handles cases where offsets weren't loaded or were reset
2784
+ if (savedRecords === 0 && this.indexOffset > 0 && this.initialized) {
2785
+ // Try to use IndexManager totalLines if available
2786
+ if (this.indexManager && this.indexManager.totalLines > 0) {
2787
+ return this.indexManager.totalLines + writeBufferRecords
2788
+ }
2789
+
2790
+ // Fallback: estimate from indexOffset (less accurate but better than 0)
2791
+ // This is a defensive fix for cases where offsets are missing but file has data
2792
+ if (this.opts.debugMode) {
2793
+ console.log(`⚠️ LENGTH: offsets array is empty but indexOffset=${this.indexOffset}, using IndexManager.totalLines or estimation`)
2794
+ }
2795
+ }
2796
+
2297
2797
  // CRITICAL FIX: Validate that offsets array is consistent with actual data
2298
2798
  // This prevents the bug where database reassignment causes desynchronization
2299
2799
  if (this.initialized && savedRecords > 0) {
@@ -2339,22 +2839,7 @@ class Database extends EventEmitter {
2339
2839
  * Calculate current writeBuffer size in bytes (similar to published v1.1.0)
2340
2840
  */
2341
2841
  currentWriteBufferSize() {
2342
- if (!this.writeBuffer || this.writeBuffer.length === 0) {
2343
- return 0
2344
- }
2345
-
2346
- // Calculate total size of all records in writeBuffer
2347
- let totalSize = 0
2348
- for (const record of this.writeBuffer) {
2349
- if (record) {
2350
- // SPACE OPTIMIZATION: Remove term IDs before size calculation
2351
- const cleanRecord = this.removeTermIdsForSerialization(record)
2352
- const recordJson = JSON.stringify(cleanRecord) + '\n'
2353
- totalSize += Buffer.byteLength(recordJson, 'utf8')
2354
- }
2355
- }
2356
-
2357
- return totalSize
2842
+ return this.writeBufferTotalSize || 0
2358
2843
  }
2359
2844
 
2360
2845
  /**
@@ -2387,21 +2872,195 @@ class Database extends EventEmitter {
2387
2872
  }
2388
2873
 
2389
2874
  /**
2390
- * Destroy database - DESTRUCTIVE MODE
2391
- * Assumes save() has already been called by user
2392
- * If anything is still active, it indicates a bug - log error and force cleanup
2875
+ * Schedule index rebuild when index data is missing or corrupted
2876
+ * @private
2393
2877
  */
2394
- async destroy() {
2395
- if (this.destroyed) return
2878
+ _scheduleIndexRebuild() {
2879
+ // Mark that rebuild is needed
2880
+ this._indexRebuildNeeded = true
2396
2881
 
2397
- // Mark as destroying immediately to prevent new operations
2398
- this.destroying = true
2399
-
2400
- // Wait for all active insert sessions to complete before destroying
2401
- if (this.activeInsertSessions.size > 0) {
2402
- if (this.opts.debugMode) {
2403
- console.log(`⏳ destroy: Waiting for ${this.activeInsertSessions.size} active insert sessions`)
2404
- }
2882
+ // Rebuild will happen lazily on first query if index is empty
2883
+ // This avoids blocking init() but ensures index is available when needed
2884
+ }
2885
+
2886
+ /**
2887
+ * Rebuild indexes from data file if needed
2888
+ * @private
2889
+ */
2890
+ async _rebuildIndexesIfNeeded() {
2891
+ if (this.opts.debugMode) {
2892
+ console.log(`🔍 _rebuildIndexesIfNeeded called: _indexRebuildNeeded=${this._indexRebuildNeeded}`)
2893
+ }
2894
+ if (!this._indexRebuildNeeded) return
2895
+ if (!this.indexManager || !this.indexManager.indexedFields || this.indexManager.indexedFields.length === 0) return
2896
+
2897
+ // Check if index actually needs rebuilding
2898
+ let needsRebuild = false
2899
+ for (const field of this.indexManager.indexedFields) {
2900
+ if (!this.indexManager.hasUsableIndexData(field)) {
2901
+ needsRebuild = true
2902
+ break
2903
+ }
2904
+ }
2905
+
2906
+ if (!needsRebuild) {
2907
+ this._indexRebuildNeeded = false
2908
+ return
2909
+ }
2910
+
2911
+ // Check if rebuild is allowed
2912
+ if (!this.opts.allowIndexRebuild) {
2913
+ const idxPath = this.normalizedFile.replace('.jdb', '.idx.jdb')
2914
+ throw new Error(
2915
+ `Index rebuild required but disabled: Index file ${idxPath} is corrupted or missing, ` +
2916
+ `and allowIndexRebuild is set to false. ` +
2917
+ `Set allowIndexRebuild: true to automatically rebuild the index, ` +
2918
+ `or manually fix/delete the corrupted index file.`
2919
+ )
2920
+ }
2921
+
2922
+ if (this.opts.debugMode) {
2923
+ console.log('🔨 Rebuilding indexes from data file...')
2924
+ }
2925
+
2926
+ try {
2927
+ // Read all records and rebuild index
2928
+ let count = 0
2929
+ const startTime = Date.now()
2930
+
2931
+ // Auto-detect schema from first line if not initialized
2932
+ if (!this.serializer.schemaManager.isInitialized) {
2933
+ const fs = await import('fs')
2934
+ const readline = await import('readline')
2935
+ const stream = fs.createReadStream(this.fileHandler.file, {
2936
+ highWaterMark: 64 * 1024,
2937
+ encoding: 'utf8'
2938
+ })
2939
+ const rl = readline.createInterface({
2940
+ input: stream,
2941
+ crlfDelay: Infinity
2942
+ })
2943
+
2944
+ for await (const line of rl) {
2945
+ if (line && line.trim()) {
2946
+ try {
2947
+ const firstRecord = JSON.parse(line)
2948
+ if (Array.isArray(firstRecord)) {
2949
+ // Try to infer schema from opts.fields if available
2950
+ if (this.opts.fields && typeof this.opts.fields === 'object') {
2951
+ const fieldNames = Object.keys(this.opts.fields)
2952
+ if (fieldNames.length >= firstRecord.length) {
2953
+ // Use first N fields from opts.fields to match array length
2954
+ const schema = fieldNames.slice(0, firstRecord.length)
2955
+ this.serializer.initializeSchema(schema)
2956
+ if (this.opts.debugMode) {
2957
+ console.log(`🔍 Inferred schema from opts.fields: ${schema.join(', ')}`)
2958
+ }
2959
+ } else {
2960
+ throw new Error(`Cannot rebuild index: array has ${firstRecord.length} elements but opts.fields only defines ${fieldNames.length} fields. Schema must be explicitly provided.`)
2961
+ }
2962
+ } else {
2963
+ throw new Error('Cannot rebuild index: schema missing, file uses array format, and opts.fields not provided. The .idx.jdb file is corrupted.')
2964
+ }
2965
+ } else {
2966
+ // Object format, initialize from object keys
2967
+ this.serializer.initializeSchema(firstRecord, true)
2968
+ if (this.opts.debugMode) {
2969
+ console.log(`🔍 Auto-detected schema from object: ${Object.keys(firstRecord).join(', ')}`)
2970
+ }
2971
+ }
2972
+ break
2973
+ } catch (error) {
2974
+ if (this.opts.debugMode) {
2975
+ console.error('❌ Failed to auto-detect schema:', error.message)
2976
+ }
2977
+ throw error
2978
+ }
2979
+ }
2980
+ }
2981
+ stream.destroy()
2982
+ }
2983
+
2984
+ // Use streaming to read records without loading everything into memory
2985
+ // Also rebuild offsets while we're at it
2986
+ const fs = await import('fs')
2987
+ const readline = await import('readline')
2988
+
2989
+ this.offsets = []
2990
+ let currentOffset = 0
2991
+
2992
+ const stream = fs.createReadStream(this.fileHandler.file, {
2993
+ highWaterMark: 64 * 1024,
2994
+ encoding: 'utf8'
2995
+ })
2996
+
2997
+ const rl = readline.createInterface({
2998
+ input: stream,
2999
+ crlfDelay: Infinity
3000
+ })
3001
+
3002
+ try {
3003
+ for await (const line of rl) {
3004
+ if (line && line.trim()) {
3005
+ try {
3006
+ // Record the offset for this line
3007
+ this.offsets.push(currentOffset)
3008
+
3009
+ const record = this.serializer.deserialize(line)
3010
+ const recordWithTerms = this.restoreTermIdsAfterDeserialization(record)
3011
+ await this.indexManager.add(recordWithTerms, count)
3012
+ count++
3013
+ } catch (error) {
3014
+ // Skip invalid lines
3015
+ if (this.opts.debugMode) {
3016
+ console.log(`⚠️ Rebuild: Failed to deserialize line ${count}:`, error.message)
3017
+ }
3018
+ }
3019
+ }
3020
+ // Update offset for next line (including newline character)
3021
+ currentOffset += Buffer.byteLength(line, 'utf8') + 1
3022
+ }
3023
+ } finally {
3024
+ stream.destroy()
3025
+ }
3026
+
3027
+ // Update indexManager totalLines
3028
+ if (this.indexManager) {
3029
+ this.indexManager.setTotalLines(this.offsets.length)
3030
+ }
3031
+
3032
+ this._indexRebuildNeeded = false
3033
+
3034
+ if (this.opts.debugMode) {
3035
+ console.log(`✅ Index rebuilt from ${count} records in ${Date.now() - startTime}ms`)
3036
+ }
3037
+
3038
+ // Save the rebuilt index
3039
+ await this._saveIndexDataToFile()
3040
+ } catch (error) {
3041
+ if (this.opts.debugMode) {
3042
+ console.error('❌ Failed to rebuild indexes:', error.message)
3043
+ }
3044
+ // Don't throw - queries will fall back to streaming
3045
+ }
3046
+ }
3047
+
3048
+ /**
3049
+ * Destroy database - DESTRUCTIVE MODE
3050
+ * Assumes save() has already been called by user
3051
+ * If anything is still active, it indicates a bug - log error and force cleanup
3052
+ */
3053
+ async destroy() {
3054
+ if (this.destroyed) return
3055
+
3056
+ // Mark as destroying immediately to prevent new operations
3057
+ this.destroying = true
3058
+
3059
+ // Wait for all active insert sessions to complete before destroying
3060
+ if (this.activeInsertSessions.size > 0) {
3061
+ if (this.opts.debugMode) {
3062
+ console.log(`⏳ destroy: Waiting for ${this.activeInsertSessions.size} active insert sessions`)
3063
+ }
2405
3064
 
2406
3065
  const sessionPromises = Array.from(this.activeInsertSessions).map(session =>
2407
3066
  session.waitForOperations(null) // Wait indefinitely for sessions to complete
@@ -2503,6 +3162,8 @@ class Database extends EventEmitter {
2503
3162
  this.writeBuffer = []
2504
3163
  this.writeBufferOffsets = []
2505
3164
  this.writeBufferSizes = []
3165
+ this.writeBufferTotalSize = 0
3166
+ this.writeBufferTotalSize = 0
2506
3167
  this.deletedIds.clear()
2507
3168
  this.pendingOperations.clear()
2508
3169
  this.pendingIndexUpdates = []
@@ -2570,8 +3231,211 @@ class Database extends EventEmitter {
2570
3231
  async count(criteria = {}, options = {}) {
2571
3232
  this._validateInitialization('count')
2572
3233
 
2573
- const results = await this.find(criteria, options)
2574
- return results.length
3234
+ // OPTIMIZATION: Use queryManager.count() instead of find() for better performance
3235
+ // This is especially faster for indexed queries which can use indexManager.query().size
3236
+ const fileCount = await this.queryManager.count(criteria, options)
3237
+
3238
+ // Count matching records in writeBuffer
3239
+ const writeBufferCount = this.writeBuffer.filter(record =>
3240
+ this.queryManager.matchesCriteria(record, criteria, options)
3241
+ ).length
3242
+
3243
+ return fileCount + writeBufferCount
3244
+ }
3245
+
3246
+ /**
3247
+ * Check if any records exist for given field and terms (index-only, ultra-fast)
3248
+ * Delegates to IndexManager.exists() for maximum performance
3249
+ *
3250
+ * @param {string} fieldName - Indexed field name
3251
+ * @param {string|Array<string>} terms - Single term or array of terms
3252
+ * @param {Object} options - Options: { $all: true/false, caseInsensitive: true/false, excludes: Array<string> }
3253
+ * @returns {Promise<boolean>} - True if at least one match exists
3254
+ *
3255
+ * @example
3256
+ * // Check if channel exists
3257
+ * const exists = await db.exists('nameTerms', ['a', 'e'], { $all: true });
3258
+ *
3259
+ * @example
3260
+ * // Check if 'tv' exists but not 'globo'
3261
+ * const exists = await db.exists('nameTerms', 'tv', { excludes: ['globo'] });
3262
+ */
3263
+ async exists(fieldName, terms, options = {}) {
3264
+ this._validateInitialization('exists')
3265
+ return this.indexManager.exists(fieldName, terms, options)
3266
+ }
3267
+
3268
+ /**
3269
+ * Calculate coverage for grouped include/exclude term sets
3270
+ * @param {string} fieldName - Name of the indexed field
3271
+ * @param {Array<object>} groups - Array of { terms, excludes } objects
3272
+ * @param {object} options - Optional settings
3273
+ * @returns {Promise<number>} Coverage percentage between 0 and 100
3274
+ */
3275
+ async coverage(fieldName, groups, options = {}) {
3276
+ this._validateInitialization('coverage')
3277
+
3278
+ if (typeof fieldName !== 'string' || !fieldName.trim()) {
3279
+ throw new Error('fieldName must be a non-empty string')
3280
+ }
3281
+
3282
+ if (!Array.isArray(groups)) {
3283
+ throw new Error('groups must be an array')
3284
+ }
3285
+
3286
+ if (groups.length === 0) {
3287
+ return 0
3288
+ }
3289
+
3290
+ if (!this.opts.indexes || !this.opts.indexes[fieldName]) {
3291
+ throw new Error(`Field "${fieldName}" is not indexed`)
3292
+ }
3293
+
3294
+ const fieldType = this.opts.indexes[fieldName]
3295
+ const supportedTypes = ['array:string', 'string']
3296
+ if (!supportedTypes.includes(fieldType)) {
3297
+ throw new Error(`coverage() only supports fields of type ${supportedTypes.join(', ')} (found: ${fieldType})`)
3298
+ }
3299
+
3300
+ const fieldIndex = this.indexManager?.index?.data?.[fieldName]
3301
+ if (!fieldIndex) {
3302
+ return 0
3303
+ }
3304
+
3305
+ const isTermMapped = this.termManager &&
3306
+ this.termManager.termMappingFields &&
3307
+ this.termManager.termMappingFields.includes(fieldName)
3308
+
3309
+ const normalizeTerm = (term) => {
3310
+ if (term === undefined || term === null) {
3311
+ return ''
3312
+ }
3313
+ return String(term).trim()
3314
+ }
3315
+
3316
+ const resolveKey = (term) => {
3317
+ if (isTermMapped) {
3318
+ const termId = this.termManager.getTermIdWithoutIncrement(term)
3319
+ if (termId === null || termId === undefined) {
3320
+ return null
3321
+ }
3322
+ return String(termId)
3323
+ }
3324
+ return String(term)
3325
+ }
3326
+
3327
+ let matchedGroups = 0
3328
+
3329
+ for (const group of groups) {
3330
+ if (!group || typeof group !== 'object') {
3331
+ throw new Error('Each coverage group must be an object')
3332
+ }
3333
+
3334
+ const includeTermsRaw = Array.isArray(group.terms) ? group.terms : []
3335
+ const excludeTermsRaw = Array.isArray(group.excludes) ? group.excludes : []
3336
+
3337
+ const includeTerms = Array.from(new Set(
3338
+ includeTermsRaw
3339
+ .map(normalizeTerm)
3340
+ .filter(term => term.length > 0)
3341
+ ))
3342
+
3343
+ if (includeTerms.length === 0) {
3344
+ throw new Error('Each coverage group must define at least one term')
3345
+ }
3346
+
3347
+ const excludeTerms = Array.from(new Set(
3348
+ excludeTermsRaw
3349
+ .map(normalizeTerm)
3350
+ .filter(term => term.length > 0)
3351
+ ))
3352
+
3353
+ let candidateLines = null
3354
+ let groupMatched = true
3355
+
3356
+ for (const term of includeTerms) {
3357
+ const key = resolveKey(term)
3358
+ if (key === null) {
3359
+ groupMatched = false
3360
+ break
3361
+ }
3362
+
3363
+ const termData = fieldIndex[key]
3364
+ if (!termData) {
3365
+ groupMatched = false
3366
+ break
3367
+ }
3368
+
3369
+ const lineNumbers = this.indexManager._getAllLineNumbers(termData)
3370
+ if (!lineNumbers || lineNumbers.length === 0) {
3371
+ groupMatched = false
3372
+ break
3373
+ }
3374
+
3375
+ if (candidateLines === null) {
3376
+ candidateLines = new Set(lineNumbers)
3377
+ } else {
3378
+ const termSet = new Set(lineNumbers)
3379
+ for (const line of Array.from(candidateLines)) {
3380
+ if (!termSet.has(line)) {
3381
+ candidateLines.delete(line)
3382
+ }
3383
+ }
3384
+ }
3385
+
3386
+ if (!candidateLines || candidateLines.size === 0) {
3387
+ groupMatched = false
3388
+ break
3389
+ }
3390
+ }
3391
+
3392
+ if (!groupMatched || !candidateLines || candidateLines.size === 0) {
3393
+ continue
3394
+ }
3395
+
3396
+ for (const term of excludeTerms) {
3397
+ const key = resolveKey(term)
3398
+ if (key === null) {
3399
+ continue
3400
+ }
3401
+
3402
+ const termData = fieldIndex[key]
3403
+ if (!termData) {
3404
+ continue
3405
+ }
3406
+
3407
+ const excludeLines = this.indexManager._getAllLineNumbers(termData)
3408
+ if (!excludeLines || excludeLines.length === 0) {
3409
+ continue
3410
+ }
3411
+
3412
+ for (const line of excludeLines) {
3413
+ if (!candidateLines.size) {
3414
+ break
3415
+ }
3416
+ candidateLines.delete(line)
3417
+ }
3418
+
3419
+ if (!candidateLines.size) {
3420
+ break
3421
+ }
3422
+ }
3423
+
3424
+ if (candidateLines && candidateLines.size > 0) {
3425
+ matchedGroups++
3426
+ }
3427
+ }
3428
+
3429
+ if (matchedGroups === 0) {
3430
+ return 0
3431
+ }
3432
+
3433
+ const precision = typeof options.precision === 'number' && options.precision >= 0
3434
+ ? options.precision
3435
+ : 2
3436
+
3437
+ const coverageValue = (matchedGroups / groups.length) * 100
3438
+ return Number(coverageValue.toFixed(precision))
2575
3439
  }
2576
3440
 
2577
3441
  /**
@@ -2589,7 +3453,8 @@ class Database extends EventEmitter {
2589
3453
  const opts = {
2590
3454
  limit: options.limit ?? 100,
2591
3455
  sort: options.sort ?? 'desc',
2592
- includeScore: options.includeScore !== false
3456
+ includeScore: options.includeScore !== false,
3457
+ mode: options.mode ?? 'sum'
2593
3458
  }
2594
3459
 
2595
3460
  // Validate fieldName
@@ -2613,6 +3478,12 @@ class Database extends EventEmitter {
2613
3478
  throw new Error(`Score value for term "${term}" must be a number`)
2614
3479
  }
2615
3480
  }
3481
+
3482
+ // Validate mode
3483
+ const allowedModes = new Set(['sum', 'max', 'avg', 'first'])
3484
+ if (!allowedModes.has(opts.mode)) {
3485
+ throw new Error(`Invalid score mode "${opts.mode}". Must be one of: ${Array.from(allowedModes).join(', ')}`)
3486
+ }
2616
3487
 
2617
3488
  // Check if field is indexed and is array:string type
2618
3489
  if (!this.opts.indexes || !this.opts.indexes[fieldName]) {
@@ -2637,6 +3508,7 @@ class Database extends EventEmitter {
2637
3508
 
2638
3509
  // Accumulate scores for each line number
2639
3510
  const scoreMap = new Map()
3511
+ const countMap = opts.mode === 'avg' ? new Map() : null
2640
3512
 
2641
3513
  // Iterate through each term in the scores object
2642
3514
  for (const [term, weight] of Object.entries(scores)) {
@@ -2666,8 +3538,44 @@ class Database extends EventEmitter {
2666
3538
 
2667
3539
  // Add weight to score for each line number
2668
3540
  for (const lineNumber of lineNumbers) {
2669
- const currentScore = scoreMap.get(lineNumber) || 0
2670
- scoreMap.set(lineNumber, currentScore + weight)
3541
+ const currentScore = scoreMap.get(lineNumber)
3542
+
3543
+ switch (opts.mode) {
3544
+ case 'sum': {
3545
+ const nextScore = (currentScore || 0) + weight
3546
+ scoreMap.set(lineNumber, nextScore)
3547
+ break
3548
+ }
3549
+ case 'max': {
3550
+ if (currentScore === undefined) {
3551
+ scoreMap.set(lineNumber, weight)
3552
+ } else {
3553
+ scoreMap.set(lineNumber, Math.max(currentScore, weight))
3554
+ }
3555
+ break
3556
+ }
3557
+ case 'avg': {
3558
+ const previous = currentScore || 0
3559
+ scoreMap.set(lineNumber, previous + weight)
3560
+ const count = (countMap.get(lineNumber) || 0) + 1
3561
+ countMap.set(lineNumber, count)
3562
+ break
3563
+ }
3564
+ case 'first': {
3565
+ if (currentScore === undefined) {
3566
+ scoreMap.set(lineNumber, weight)
3567
+ }
3568
+ break
3569
+ }
3570
+ }
3571
+ }
3572
+ }
3573
+
3574
+ // For average mode, divide total by count
3575
+ if (opts.mode === 'avg') {
3576
+ for (const [lineNumber, totalScore] of scoreMap.entries()) {
3577
+ const count = countMap.get(lineNumber) || 1
3578
+ scoreMap.set(lineNumber, totalScore / count)
2671
3579
  }
2672
3580
  }
2673
3581
 
@@ -2693,24 +3601,83 @@ class Database extends EventEmitter {
2693
3601
  const lineNumbers = limitedEntries.map(([lineNumber]) => lineNumber)
2694
3602
  const scoresByLineNumber = new Map(limitedEntries)
2695
3603
 
2696
- // Use getRanges and fileHandler to read records
2697
- const ranges = this.getRanges(lineNumbers)
2698
- const groupedRanges = await this.fileHandler.groupedRanges(ranges)
3604
+ const persistedCount = Array.isArray(this.offsets) ? this.offsets.length : 0
2699
3605
 
2700
- const fs = await import('fs')
2701
- const fd = await fs.promises.open(this.fileHandler.file, 'r')
3606
+ // Separate lineNumbers into file records and writeBuffer records
3607
+ const fileLineNumbers = []
3608
+ const writeBufferLineNumbers = []
3609
+
3610
+ for (const lineNumber of lineNumbers) {
3611
+ if (lineNumber >= persistedCount) {
3612
+ // This lineNumber points to writeBuffer
3613
+ writeBufferLineNumbers.push(lineNumber)
3614
+ } else {
3615
+ // This lineNumber points to file
3616
+ fileLineNumbers.push(lineNumber)
3617
+ }
3618
+ }
2702
3619
 
2703
3620
  const results = []
2704
3621
 
2705
- try {
2706
- for (const groupedRange of groupedRanges) {
2707
- for await (const row of this.fileHandler.readGroupedRange(groupedRange, fd)) {
2708
- try {
2709
- const record = this.serializer.deserialize(row.line)
2710
-
2711
- // Get line number from the row
2712
- const lineNumber = row._ || 0
2713
-
3622
+ // Read records from file
3623
+ if (fileLineNumbers.length > 0) {
3624
+ const ranges = this.getRanges(fileLineNumbers)
3625
+ if (ranges.length > 0) {
3626
+ // Create a map from start offset to lineNumber for accurate mapping
3627
+ const startToLineNumber = new Map()
3628
+ for (const range of ranges) {
3629
+ if (range.index !== undefined) {
3630
+ startToLineNumber.set(range.start, range.index)
3631
+ }
3632
+ }
3633
+
3634
+ const groupedRanges = await this.fileHandler.groupedRanges(ranges)
3635
+
3636
+ const fs = await import('fs')
3637
+ const fd = await fs.promises.open(this.fileHandler.file, 'r')
3638
+
3639
+ try {
3640
+ for (const groupedRange of groupedRanges) {
3641
+ for await (const row of this.fileHandler.readGroupedRange(groupedRange, fd)) {
3642
+ try {
3643
+ const record = this.serializer.deserialize(row.line)
3644
+
3645
+ // Get line number from the row, fallback to start offset mapping
3646
+ let lineNumber = row._ !== null && row._ !== undefined ? row._ : (startToLineNumber.get(row.start) ?? 0)
3647
+
3648
+ // Restore term IDs to terms
3649
+ const recordWithTerms = this.restoreTermIdsAfterDeserialization(record)
3650
+
3651
+ // Add line number
3652
+ recordWithTerms._ = lineNumber
3653
+
3654
+ // Add score if includeScore is true (default is true)
3655
+ if (opts.includeScore !== false) {
3656
+ recordWithTerms.score = scoresByLineNumber.get(lineNumber) || 0
3657
+ }
3658
+
3659
+ results.push(recordWithTerms)
3660
+ } catch (error) {
3661
+ // Skip invalid lines
3662
+ if (this.opts.debugMode) {
3663
+ console.error('Error deserializing record in score():', error)
3664
+ }
3665
+ }
3666
+ }
3667
+ }
3668
+ } finally {
3669
+ await fd.close()
3670
+ }
3671
+ }
3672
+ }
3673
+
3674
+ // Read records from writeBuffer
3675
+ if (writeBufferLineNumbers.length > 0 && this.writeBuffer) {
3676
+ for (const lineNumber of writeBufferLineNumbers) {
3677
+ const writeBufferIndex = lineNumber - persistedCount
3678
+ if (writeBufferIndex >= 0 && writeBufferIndex < this.writeBuffer.length) {
3679
+ const record = this.writeBuffer[writeBufferIndex]
3680
+ if (record) {
2714
3681
  // Restore term IDs to terms
2715
3682
  const recordWithTerms = this.restoreTermIdsAfterDeserialization(record)
2716
3683
 
@@ -2723,16 +3690,9 @@ class Database extends EventEmitter {
2723
3690
  }
2724
3691
 
2725
3692
  results.push(recordWithTerms)
2726
- } catch (error) {
2727
- // Skip invalid lines
2728
- if (this.opts.debugMode) {
2729
- console.error('Error deserializing record in score():', error)
2730
- }
2731
3693
  }
2732
3694
  }
2733
3695
  }
2734
- } finally {
2735
- await fd.close()
2736
3696
  }
2737
3697
 
2738
3698
  // Re-sort results to maintain score order (since reads might be out of order)
@@ -2944,11 +3904,52 @@ class Database extends EventEmitter {
2944
3904
  }
2945
3905
 
2946
3906
  // CRITICAL FIX: Only remove processed items from writeBuffer after all async operations complete
2947
- // OPTIMIZATION: Use Set.has() for O(1) lookup - same Set used for processing
2948
3907
  const beforeLength = this.writeBuffer.length
2949
- this.writeBuffer = this.writeBuffer.filter(item => !itemsToProcess.has(item))
3908
+ if (beforeLength > 0) {
3909
+ const originalRecords = this.writeBuffer
3910
+ const originalOffsets = this.writeBufferOffsets
3911
+ const originalSizes = this.writeBufferSizes
3912
+ const retainedRecords = []
3913
+ const retainedOffsets = []
3914
+ const retainedSizes = []
3915
+ let retainedTotal = 0
3916
+ let removedCount = 0
3917
+
3918
+ for (let i = 0; i < originalRecords.length; i++) {
3919
+ const record = originalRecords[i]
3920
+ if (itemsToProcess.has(record)) {
3921
+ removedCount++
3922
+ continue
3923
+ }
3924
+
3925
+ retainedRecords.push(record)
3926
+ if (originalOffsets && i < originalOffsets.length) {
3927
+ retainedOffsets.push(originalOffsets[i])
3928
+ }
3929
+ if (originalSizes && i < originalSizes.length) {
3930
+ const size = originalSizes[i]
3931
+ if (size !== undefined) {
3932
+ retainedSizes.push(size)
3933
+ retainedTotal += size
3934
+ }
3935
+ }
3936
+ }
3937
+
3938
+ if (removedCount > 0) {
3939
+ this.writeBuffer = retainedRecords
3940
+ this.writeBufferOffsets = retainedOffsets
3941
+ this.writeBufferSizes = retainedSizes
3942
+ this.writeBufferTotalSize = retainedTotal
3943
+ }
3944
+ }
2950
3945
  const afterLength = this.writeBuffer.length
2951
3946
 
3947
+ if (afterLength === 0) {
3948
+ this.writeBufferOffsets = []
3949
+ this.writeBufferSizes = []
3950
+ this.writeBufferTotalSize = 0
3951
+ }
3952
+
2952
3953
  if (this.opts.debugMode && beforeLength !== afterLength) {
2953
3954
  console.log(`💾 _processWriteBuffer: Removed ${beforeLength - afterLength} items from writeBuffer (${beforeLength} -> ${afterLength})`)
2954
3955
  }
@@ -3033,9 +4034,11 @@ class Database extends EventEmitter {
3033
4034
  for (let i = 0; i < lines.length && i < this.offsets.length; i++) {
3034
4035
  try {
3035
4036
  const record = this.serializer.deserialize(lines[i])
3036
- if (record && !deletedIdsSnapshot.has(record.id)) {
4037
+ if (record && !deletedIdsSnapshot.has(String(record.id))) {
3037
4038
  // Check if this record is not being updated in writeBuffer
3038
- const updatedRecord = writeBufferSnapshot.find(r => r.id === record.id)
4039
+ // CRITICAL FIX: Normalize IDs to strings for consistent comparison
4040
+ const normalizedRecordId = String(record.id)
4041
+ const updatedRecord = writeBufferSnapshot.find(r => r && r.id && String(r.id) === normalizedRecordId)
3039
4042
  if (!updatedRecord) {
3040
4043
  existingRecords.push(record)
3041
4044
  }
@@ -3079,9 +4082,14 @@ class Database extends EventEmitter {
3079
4082
  // existingRecords.length = this.offsets.length
3080
4083
 
3081
4084
  // Create a map of updated records for quick lookup
4085
+ // CRITICAL FIX: Normalize IDs to strings for consistent comparison
3082
4086
  const updatedRecordsMap = new Map()
3083
4087
  writeBufferSnapshot.forEach(record => {
3084
- updatedRecordsMap.set(record.id, record)
4088
+ if (record && record.id !== undefined && record.id !== null) {
4089
+ // Normalize ID to string for consistent comparison
4090
+ const normalizedId = String(record.id)
4091
+ updatedRecordsMap.set(normalizedId, record)
4092
+ }
3085
4093
  })
3086
4094
 
3087
4095
  // OPTIMIZATION: Cache file stats to avoid repeated stat() calls
@@ -3249,7 +4257,8 @@ class Database extends EventEmitter {
3249
4257
  if (recordId !== undefined && recordId !== null) {
3250
4258
  recordId = String(recordId)
3251
4259
  // Check if this record needs full parsing (updated or deleted)
3252
- needsFullParse = updatedRecordsMap.has(recordId) || deletedIdsSnapshot.has(recordId)
4260
+ // CRITICAL FIX: Normalize ID to string for consistent comparison
4261
+ needsFullParse = updatedRecordsMap.has(recordId) || deletedIdsSnapshot.has(String(recordId))
3253
4262
  } else {
3254
4263
  needsFullParse = true
3255
4264
  }
@@ -3264,7 +4273,8 @@ class Database extends EventEmitter {
3264
4273
  const idMatch = trimmedLine.match(/"id"\s*:\s*"([^"]+)"|"id"\s*:\s*(\d+)/)
3265
4274
  if (idMatch) {
3266
4275
  recordId = idMatch[1] || idMatch[2]
3267
- needsFullParse = updatedRecordsMap.has(recordId) || deletedIdsSnapshot.has(recordId)
4276
+ // CRITICAL FIX: Normalize ID to string for consistent comparison
4277
+ needsFullParse = updatedRecordsMap.has(String(recordId)) || deletedIdsSnapshot.has(String(recordId))
3268
4278
  } else {
3269
4279
  needsFullParse = true
3270
4280
  }
@@ -3289,9 +4299,11 @@ class Database extends EventEmitter {
3289
4299
  // Use record directly (no need to restore term IDs)
3290
4300
  const recordWithIds = record
3291
4301
 
3292
- if (updatedRecordsMap.has(recordWithIds.id)) {
4302
+ // CRITICAL FIX: Normalize ID to string for consistent comparison
4303
+ const normalizedId = String(recordWithIds.id)
4304
+ if (updatedRecordsMap.has(normalizedId)) {
3293
4305
  // Replace with updated version
3294
- const updatedRecord = updatedRecordsMap.get(recordWithIds.id)
4306
+ const updatedRecord = updatedRecordsMap.get(normalizedId)
3295
4307
  if (this.opts.debugMode) {
3296
4308
  console.log(`💾 Save: Updated record ${recordWithIds.id} (${recordWithIds.name || 'Unnamed'})`)
3297
4309
  }
@@ -3301,7 +4313,7 @@ class Database extends EventEmitter {
3301
4313
  id: recordWithIds.id,
3302
4314
  needsParse: false
3303
4315
  }
3304
- } else if (!deletedIdsSnapshot.has(recordWithIds.id)) {
4316
+ } else if (!deletedIdsSnapshot.has(String(recordWithIds.id))) {
3305
4317
  // Keep existing record if not deleted
3306
4318
  if (this.opts.debugMode) {
3307
4319
  console.log(`💾 Save: Kept record ${recordWithIds.id} (${recordWithIds.name || 'Unnamed'})`)
@@ -3534,6 +4546,240 @@ class Database extends EventEmitter {
3534
4546
  }).filter(n => n !== undefined)
3535
4547
  }
3536
4548
 
4549
+ /**
4550
+ * Get the base line number for writeBuffer entries (number of persisted records)
4551
+ * @private
4552
+ */
4553
+ _getWriteBufferBaseLineNumber() {
4554
+ return Array.isArray(this.offsets) ? this.offsets.length : 0
4555
+ }
4556
+
4557
+ /**
4558
+ * Convert a writeBuffer index into an absolute line number
4559
+ * @param {number} writeBufferIndex - Index inside writeBuffer (0-based)
4560
+ * @returns {number} Absolute line number (0-based)
4561
+ * @private
4562
+ */
4563
+ _getAbsoluteLineNumber(writeBufferIndex) {
4564
+ if (typeof writeBufferIndex !== 'number' || writeBufferIndex < 0) {
4565
+ throw new Error('Invalid writeBuffer index')
4566
+ }
4567
+ return this._getWriteBufferBaseLineNumber() + writeBufferIndex
4568
+ }
4569
+
4570
+ async *_streamingRecoveryGenerator(criteria, options, alreadyYielded = 0, map = null, remainingSkipValue = 0) {
4571
+ if (this._offsetRecoveryInProgress) {
4572
+ return
4573
+ }
4574
+
4575
+ if (!this.fileHandler || !this.fileHandler.file) {
4576
+ return
4577
+ }
4578
+
4579
+ this._offsetRecoveryInProgress = true
4580
+
4581
+ const fsModule = this._fsModule || (this._fsModule = await import('fs'))
4582
+ let fd
4583
+
4584
+ try {
4585
+ fd = await fsModule.promises.open(this.fileHandler.file, 'r')
4586
+ } catch (error) {
4587
+ this._offsetRecoveryInProgress = false
4588
+ if (this.opts.debugMode) {
4589
+ console.warn(`⚠️ Offset recovery skipped: ${error.message}`)
4590
+ }
4591
+ return
4592
+ }
4593
+
4594
+ const chunkSize = this.opts.offsetRecoveryChunkSize || 64 * 1024
4595
+ let buffer = Buffer.alloc(0)
4596
+ let readOffset = 0
4597
+ const originalOffsets = Array.isArray(this.offsets) ? [...this.offsets] : []
4598
+ const newOffsets = []
4599
+ let offsetAdjusted = false
4600
+ let limitReached = false
4601
+ let lineIndex = 0
4602
+ let lastLineEnd = 0
4603
+ let producedTotal = alreadyYielded || 0
4604
+ let remainingSkip = remainingSkipValue || 0
4605
+ let remainingAlreadyYielded = alreadyYielded || 0
4606
+ const limit = typeof options?.limit === 'number' ? options.limit : null
4607
+ const includeOffsets = options?.includeOffsets === true
4608
+ const includeLinePosition = this.opts.includeLinePosition
4609
+ const mapSet = map instanceof Set ? new Set(map) : (Array.isArray(map) ? new Set(map) : null)
4610
+ const criteriaIsObject = criteria && typeof criteria === 'object' && !Array.isArray(criteria) && !(criteria instanceof Set)
4611
+ const hasCriteria = criteriaIsObject && Object.keys(criteria).length > 0
4612
+
4613
+ const decodeLineBuffer = (lineBuffer) => {
4614
+ let trimmed = lineBuffer
4615
+ if (trimmed.length > 0 && trimmed[trimmed.length - 1] === 0x0A) {
4616
+ trimmed = trimmed.subarray(0, trimmed.length - 1)
4617
+ }
4618
+ if (trimmed.length > 0 && trimmed[trimmed.length - 1] === 0x0D) {
4619
+ trimmed = trimmed.subarray(0, trimmed.length - 1)
4620
+ }
4621
+ return trimmed
4622
+ }
4623
+
4624
+ const processLine = async (lineBuffer, lineStart) => {
4625
+ const lineLength = lineBuffer.length
4626
+ newOffsets[lineIndex] = lineStart
4627
+ const expected = originalOffsets[lineIndex]
4628
+ if (expected !== undefined && expected !== lineStart) {
4629
+ offsetAdjusted = true
4630
+ if (this.opts.debugMode) {
4631
+ console.warn(`⚠️ Offset mismatch detected at line ${lineIndex}: expected ${expected}, actual ${lineStart}`)
4632
+ }
4633
+ } else if (expected === undefined) {
4634
+ offsetAdjusted = true
4635
+ }
4636
+
4637
+ lastLineEnd = Math.max(lastLineEnd, lineStart + lineLength)
4638
+
4639
+ let entryWithTerms = null
4640
+ let shouldYield = false
4641
+
4642
+ const decodedBuffer = decodeLineBuffer(lineBuffer)
4643
+ if (decodedBuffer.length > 0) {
4644
+ let lineString
4645
+ try {
4646
+ lineString = decodedBuffer.toString('utf8')
4647
+ } catch (error) {
4648
+ lineString = decodedBuffer.toString('utf8', { replacement: '?' })
4649
+ }
4650
+
4651
+ try {
4652
+ const record = await this.serializer.deserialize(lineString)
4653
+ if (record && typeof record === 'object') {
4654
+ entryWithTerms = this.restoreTermIdsAfterDeserialization(record)
4655
+ if (includeLinePosition) {
4656
+ entryWithTerms._ = lineIndex
4657
+ }
4658
+
4659
+ if (mapSet) {
4660
+ shouldYield = mapSet.has(lineIndex)
4661
+ if (shouldYield) {
4662
+ mapSet.delete(lineIndex)
4663
+ }
4664
+ } else if (hasCriteria) {
4665
+ shouldYield = this.queryManager.matchesCriteria(entryWithTerms, criteria, options)
4666
+ } else {
4667
+ shouldYield = true
4668
+ }
4669
+ }
4670
+ } catch (error) {
4671
+ if (this.opts.debugMode) {
4672
+ console.warn(`⚠️ Offset recovery failed to deserialize line ${lineIndex} at ${lineStart}: ${error.message}`)
4673
+ }
4674
+ }
4675
+ }
4676
+
4677
+ let yieldedEntry = null
4678
+
4679
+ if (shouldYield && entryWithTerms) {
4680
+ if (remainingSkip > 0) {
4681
+ remainingSkip--
4682
+ } else if (remainingAlreadyYielded > 0) {
4683
+ remainingAlreadyYielded--
4684
+ } else if (!limit || producedTotal < limit) {
4685
+ producedTotal++
4686
+ yieldedEntry = includeOffsets
4687
+ ? { entry: entryWithTerms, start: lineStart, _: lineIndex }
4688
+ : entryWithTerms
4689
+ } else {
4690
+ limitReached = true
4691
+ }
4692
+ }
4693
+
4694
+ lineIndex++
4695
+
4696
+ if (yieldedEntry) {
4697
+ return yieldedEntry
4698
+ }
4699
+ return null
4700
+ }
4701
+
4702
+ let recoveryFailed = false
4703
+
4704
+ try {
4705
+ while (true) {
4706
+ const tempBuffer = Buffer.allocUnsafe(chunkSize)
4707
+ const { bytesRead } = await fd.read(tempBuffer, 0, chunkSize, readOffset)
4708
+
4709
+ if (bytesRead === 0) {
4710
+ if (buffer.length > 0) {
4711
+ const lineStart = readOffset - buffer.length
4712
+ const yieldedEntry = await processLine(buffer, lineStart)
4713
+ if (yieldedEntry) {
4714
+ yield yieldedEntry
4715
+ }
4716
+ }
4717
+ break
4718
+ }
4719
+
4720
+ readOffset += bytesRead
4721
+ let chunk = buffer.length > 0
4722
+ ? Buffer.concat([buffer, tempBuffer.subarray(0, bytesRead)])
4723
+ : tempBuffer.subarray(0, bytesRead)
4724
+
4725
+ let processedUpTo = 0
4726
+ const chunkBaseOffset = readOffset - chunk.length
4727
+
4728
+ while (true) {
4729
+ const newlineIndex = chunk.indexOf(0x0A, processedUpTo)
4730
+ if (newlineIndex === -1) {
4731
+ break
4732
+ }
4733
+
4734
+ const lineBuffer = chunk.subarray(processedUpTo, newlineIndex + 1)
4735
+ const lineStart = chunkBaseOffset + processedUpTo
4736
+ const yieldedEntry = await processLine(lineBuffer, lineStart)
4737
+ processedUpTo = newlineIndex + 1
4738
+
4739
+ if (yieldedEntry) {
4740
+ yield yieldedEntry
4741
+ }
4742
+ }
4743
+
4744
+ buffer = chunk.subarray(processedUpTo)
4745
+ }
4746
+ } catch (error) {
4747
+ recoveryFailed = true
4748
+ if (this.opts.debugMode) {
4749
+ console.warn(`⚠️ Offset recovery aborted: ${error.message}`)
4750
+ }
4751
+ } finally {
4752
+ await fd.close().catch(() => {})
4753
+ this._offsetRecoveryInProgress = false
4754
+
4755
+ if (recoveryFailed) {
4756
+ return
4757
+ }
4758
+
4759
+ this.offsets = newOffsets
4760
+ if (lineIndex < this.offsets.length) {
4761
+ this.offsets.length = lineIndex
4762
+ }
4763
+
4764
+ if (originalOffsets.length !== newOffsets.length) {
4765
+ offsetAdjusted = true
4766
+ }
4767
+
4768
+ this.indexOffset = lastLineEnd
4769
+
4770
+ if (offsetAdjusted) {
4771
+ this.shouldSave = true
4772
+ try {
4773
+ await this._saveIndexDataToFile()
4774
+ } catch (error) {
4775
+ if (this.opts.debugMode) {
4776
+ console.warn(`⚠️ Failed to persist recovered offsets: ${error.message}`)
4777
+ }
4778
+ }
4779
+ }
4780
+ }
4781
+ }
4782
+
3537
4783
  /**
3538
4784
  * Walk through records using streaming (real implementation)
3539
4785
  */
@@ -3547,6 +4793,7 @@ class Database extends EventEmitter {
3547
4793
  if (this.indexOffset === 0 && this.writeBuffer.length === 0) return
3548
4794
 
3549
4795
  let count = 0
4796
+ let remainingSkip = options.skip || 0
3550
4797
 
3551
4798
  let map
3552
4799
  if (!Array.isArray(criteria)) {
@@ -3557,8 +4804,9 @@ class Database extends EventEmitter {
3557
4804
  map = [...this.indexManager.query(criteria, options)]
3558
4805
  } else {
3559
4806
  // For empty criteria {} or null/undefined, get all records
3560
- // Use writeBuffer length when indexOffset is 0 (data not saved yet)
3561
- const totalRecords = this.indexOffset > 0 ? this.indexOffset : this.writeBuffer.length
4807
+ const totalRecords = this.offsets && this.offsets.length > 0
4808
+ ? this.offsets.length
4809
+ : this.writeBuffer.length
3562
4810
  map = [...Array(totalRecords).keys()]
3563
4811
  }
3564
4812
  } else {
@@ -3577,6 +4825,10 @@ class Database extends EventEmitter {
3577
4825
  }
3578
4826
  const entry = this.writeBuffer[i]
3579
4827
  if (entry && this.queryManager.matchesCriteria(entry, criteria, options)) {
4828
+ if (remainingSkip > 0) {
4829
+ remainingSkip--
4830
+ continue
4831
+ }
3580
4832
  count++
3581
4833
  if (options.includeOffsets) {
3582
4834
  yield { entry, start: 0, _: i }
@@ -3597,6 +4849,10 @@ class Database extends EventEmitter {
3597
4849
  if (lineNumber < this.writeBuffer.length) {
3598
4850
  const entry = this.writeBuffer[lineNumber]
3599
4851
  if (entry) {
4852
+ if (remainingSkip > 0) {
4853
+ remainingSkip--
4854
+ continue
4855
+ }
3600
4856
  count++
3601
4857
  if (options.includeOffsets) {
3602
4858
  yield { entry, start: 0, _: lineNumber }
@@ -3657,6 +4913,11 @@ class Database extends EventEmitter {
3657
4913
  // SPACE OPTIMIZATION: Restore term IDs to terms for user
3658
4914
  const recordWithTerms = this.restoreTermIdsAfterDeserialization(record)
3659
4915
 
4916
+ if (remainingSkip > 0) {
4917
+ remainingSkip--
4918
+ continue
4919
+ }
4920
+
3660
4921
  count++
3661
4922
  if (options.includeOffsets) {
3662
4923
  yield { entry: recordWithTerms, start: row.start, _: row._ || 0 }
@@ -3667,7 +4928,21 @@ class Database extends EventEmitter {
3667
4928
  yield recordWithTerms
3668
4929
  }
3669
4930
  } catch (error) {
3670
- // Skip invalid lines
4931
+ // CRITICAL FIX: Log deserialization errors instead of silently ignoring them
4932
+ // This helps identify data corruption issues
4933
+ if (1||this.opts.debugMode) {
4934
+ console.warn(`⚠️ walk(): Failed to deserialize record at offset ${row.start}: ${error.message}`)
4935
+ console.warn(`⚠️ walk(): Problematic line (first 200 chars): ${row.line.substring(0, 200)}`)
4936
+ }
4937
+ if (!this._offsetRecoveryInProgress) {
4938
+ for await (const recoveredEntry of this._streamingRecoveryGenerator(criteria, options, count, map, remainingSkip)) {
4939
+ yield recoveredEntry
4940
+ count++
4941
+ }
4942
+ return
4943
+ }
4944
+ // Skip invalid lines but continue processing
4945
+ // This prevents one corrupted record from stopping the entire walk operation
3671
4946
  }
3672
4947
  }
3673
4948
  if (options.limit && count >= options.limit) {
@@ -3696,6 +4971,12 @@ class Database extends EventEmitter {
3696
4971
  if (options.limit && count >= options.limit) {
3697
4972
  break
3698
4973
  }
4974
+
4975
+ if (remainingSkip > 0) {
4976
+ remainingSkip--
4977
+ continue
4978
+ }
4979
+
3699
4980
  count++
3700
4981
 
3701
4982
  // SPACE OPTIMIZATION: Restore term IDs to terms for user
@@ -3732,20 +5013,44 @@ class Database extends EventEmitter {
3732
5013
  if (options.limit && count >= options.limit) {
3733
5014
  break
3734
5015
  }
3735
- const entry = await this.serializer.deserialize(row.line, { compress: this.opts.compress, v8: this.opts.v8 })
3736
- if (entry === null) continue
3737
5016
 
3738
- // SPACE OPTIMIZATION: Restore term IDs to terms for user
3739
- const entryWithTerms = this.restoreTermIdsAfterDeserialization(entry)
5017
+ try {
5018
+ const entry = await this.serializer.deserialize(row.line, { compress: this.opts.compress, v8: this.opts.v8 })
5019
+ if (entry === null) continue
5020
+
5021
+ // SPACE OPTIMIZATION: Restore term IDs to terms for user
5022
+ const entryWithTerms = this.restoreTermIdsAfterDeserialization(entry)
3740
5023
 
3741
- count++
3742
- if (options.includeOffsets) {
3743
- yield { entry: entryWithTerms, start: row.start, _: row._ || this.offsets.findIndex(n => n === row.start) }
3744
- } else {
3745
- if (this.opts.includeLinePosition) {
3746
- entryWithTerms._ = row._ || this.offsets.findIndex(n => n === row.start)
5024
+ if (remainingSkip > 0) {
5025
+ remainingSkip--
5026
+ continue
5027
+ }
5028
+
5029
+ count++
5030
+ if (options.includeOffsets) {
5031
+ yield { entry: entryWithTerms, start: row.start, _: row._ || this.offsets.findIndex(n => n === row.start) }
5032
+ } else {
5033
+ if (this.opts.includeLinePosition) {
5034
+ entryWithTerms._ = row._ || this.offsets.findIndex(n => n === row.start)
5035
+ }
5036
+ yield entryWithTerms
5037
+ }
5038
+ } catch (error) {
5039
+ // CRITICAL FIX: Log deserialization errors instead of silently ignoring them
5040
+ // This helps identify data corruption issues
5041
+ if (1||this.opts.debugMode) {
5042
+ console.warn(`⚠️ walk(): Failed to deserialize record at offset ${row.start}: ${error.message}`)
5043
+ console.warn(`⚠️ walk(): Problematic line (first 200 chars): ${row.line.substring(0, 200)}`)
3747
5044
  }
3748
- yield entryWithTerms
5045
+ if (!this._offsetRecoveryInProgress) {
5046
+ for await (const recoveredEntry of this._streamingRecoveryGenerator(criteria, options, count, map, remainingSkip)) {
5047
+ yield recoveredEntry
5048
+ count++
5049
+ }
5050
+ return
5051
+ }
5052
+ // Skip invalid lines but continue processing
5053
+ // This prevents one corrupted record from stopping the entire walk operation
3749
5054
  }
3750
5055
  }
3751
5056
  }
@@ -3899,16 +5204,20 @@ class Database extends EventEmitter {
3899
5204
 
3900
5205
  // Update record in writeBuffer or add to writeBuffer
3901
5206
  const index = this.writeBuffer.findIndex(r => r.id === record.id)
5207
+ let targetIndex
3902
5208
  if (index !== -1) {
3903
5209
  // Record is already in writeBuffer, update it
3904
5210
  this.writeBuffer[index] = record
5211
+ targetIndex = index
3905
5212
  } else {
3906
5213
  // Record is in file, add updated version to writeBuffer
3907
5214
  this.writeBuffer.push(record)
5215
+ targetIndex = this.writeBuffer.length - 1
3908
5216
  }
3909
5217
 
3910
5218
  // Update index
3911
- await this.indexManager.update(record, record, this.writeBuffer.length - 1)
5219
+ const absoluteLineNumber = this._getAbsoluteLineNumber(targetIndex)
5220
+ await this.indexManager.update(record, record, absoluteLineNumber)
3912
5221
  }
3913
5222
 
3914
5223
  if (this.opts.debugMode) {
@@ -3982,8 +5291,24 @@ class Database extends EventEmitter {
3982
5291
  this.writeBufferSizes = []
3983
5292
  }
3984
5293
  } else {
3985
- // Even if no data to save, ensure index data is persisted
3986
- await this._saveIndexDataToFile()
5294
+ // Only save index data if it actually has content
5295
+ // Don't overwrite a valid index with an empty one
5296
+ if (this.indexManager && this.indexManager.indexedFields && this.indexManager.indexedFields.length > 0) {
5297
+ let hasIndexData = false
5298
+ for (const field of this.indexManager.indexedFields) {
5299
+ if (this.indexManager.hasUsableIndexData(field)) {
5300
+ hasIndexData = true
5301
+ break
5302
+ }
5303
+ }
5304
+ // Only save if we have actual index data OR if offsets are populated
5305
+ // (offsets being populated means we've processed data)
5306
+ if (hasIndexData || (this.offsets && this.offsets.length > 0)) {
5307
+ await this._saveIndexDataToFile()
5308
+ } else if (this.opts.debugMode) {
5309
+ console.log('⚠️ close(): Skipping index save - index is empty and no offsets')
5310
+ }
5311
+ }
3987
5312
  }
3988
5313
 
3989
5314
  // 2. Mark as closed (but not destroyed) to allow reopening
@@ -4019,8 +5344,43 @@ class Database extends EventEmitter {
4019
5344
  if (this.indexManager) {
4020
5345
  try {
4021
5346
  const idxPath = this.normalizedFile.replace('.jdb', '.idx.jdb')
5347
+ const indexJSON = this.indexManager.indexedFields && this.indexManager.indexedFields.length > 0 ? this.indexManager.toJSON() : {}
5348
+
5349
+ // Check if index is empty
5350
+ const isEmpty = !indexJSON || Object.keys(indexJSON).length === 0 ||
5351
+ (this.indexManager.indexedFields && this.indexManager.indexedFields.every(field => {
5352
+ const fieldIndex = indexJSON[field]
5353
+ return !fieldIndex || (typeof fieldIndex === 'object' && Object.keys(fieldIndex).length === 0)
5354
+ }))
5355
+
5356
+ // PROTECTION: Don't overwrite a valid index file with empty data
5357
+ // If the .idx.jdb file exists and has data, and we're trying to save empty index,
5358
+ // skip the save to prevent corruption
5359
+ if (isEmpty && !this.offsets?.length) {
5360
+ const fs = await import('fs')
5361
+ if (fs.existsSync(idxPath)) {
5362
+ try {
5363
+ const existingData = JSON.parse(await fs.promises.readFile(idxPath, 'utf8'))
5364
+ const existingHasData = existingData.index && Object.keys(existingData.index).length > 0
5365
+ const existingHasOffsets = existingData.offsets && existingData.offsets.length > 0
5366
+
5367
+ if (existingHasData || existingHasOffsets) {
5368
+ if (this.opts.debugMode) {
5369
+ console.log(`⚠️ _saveIndexDataToFile: Skipping save - would overwrite valid index with empty data`)
5370
+ }
5371
+ return // Don't overwrite valid index with empty one
5372
+ }
5373
+ } catch (error) {
5374
+ // If we can't read existing file, proceed with save (might be corrupted)
5375
+ if (this.opts.debugMode) {
5376
+ console.log(`⚠️ _saveIndexDataToFile: Could not read existing index file, proceeding with save`)
5377
+ }
5378
+ }
5379
+ }
5380
+ }
5381
+
4022
5382
  const indexData = {
4023
- index: this.indexManager.indexedFields && this.indexManager.indexedFields.length > 0 ? this.indexManager.toJSON() : {},
5383
+ index: indexJSON,
4024
5384
  offsets: this.offsets, // Save actual offsets for efficient file operations
4025
5385
  indexOffset: this.indexOffset, // Save file size for proper range calculations
4026
5386
  // Save configuration for reuse when database exists