jexidb 2.1.4 → 2.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/Database.cjs CHANGED
@@ -1,7 +1,5 @@
1
1
  'use strict';
2
2
 
3
- Object.defineProperty(exports, '__esModule', { value: true });
4
-
5
3
  var events = require('events');
6
4
  var asyncMutex = require('async-mutex');
7
5
  var fs = require('fs');
@@ -2662,12 +2660,15 @@ class Serializer {
2662
2660
  * Advanced serialization with optimized JSON.stringify and buffer pooling
2663
2661
  */
2664
2662
  serializeAdvanced(data, addLinebreak) {
2663
+ // CRITICAL FIX: Sanitize data to remove problematic characters before serialization
2664
+ const sanitizedData = this.sanitizeDataForJSON(data);
2665
+
2665
2666
  // Validate encoding before serialization
2666
- this.validateEncodingBeforeSerialization(data);
2667
+ this.validateEncodingBeforeSerialization(sanitizedData);
2667
2668
 
2668
2669
  // Use optimized JSON.stringify without buffer pooling
2669
2670
  // NOTE: Buffer pool removed - using direct Buffer creation for simplicity and reliability
2670
- const json = this.optimizedStringify(data);
2671
+ const json = this.optimizedStringify(sanitizedData);
2671
2672
 
2672
2673
  // CRITICAL FIX: Normalize encoding before creating buffer
2673
2674
  const normalizedJson = this.normalizeEncoding(json);
@@ -2765,6 +2766,44 @@ class Serializer {
2765
2766
  /**
2766
2767
  * Validate encoding before serialization
2767
2768
  */
2769
+ /**
2770
+ * Sanitize data to remove problematic characters that break JSON parsing
2771
+ * CRITICAL FIX: Prevents "Expected ',' or ']'" and "Unterminated string" errors
2772
+ * by removing control characters that cannot be safely represented in JSON
2773
+ */
2774
+ sanitizeDataForJSON(data) {
2775
+ const sanitizeString = str => {
2776
+ if (typeof str !== 'string') return str;
2777
+ return str
2778
+ // Remove control characters that break JSON parsing (but keep \n, \r, \t as they can be escaped)
2779
+ // Remove: NUL, SOH, STX, ETX, EOT, ENQ, ACK, BEL, VT, FF, SO, SI, DLE, DC1-DC4, NAK, SYN, ETB, CAN, EM, SUB, ESC, FS, GS, RS, US, DEL, C1 controls
2780
+ .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]/g, '')
2781
+ // Limit string length to prevent performance issues
2782
+ .substring(0, 10000);
2783
+ };
2784
+ const sanitizeArray = arr => {
2785
+ if (!Array.isArray(arr)) return arr;
2786
+ return arr.map(item => this.sanitizeDataForJSON(item)).filter(item => item !== null && item !== undefined && item !== '');
2787
+ };
2788
+ if (typeof data === 'string') {
2789
+ return sanitizeString(data);
2790
+ }
2791
+ if (Array.isArray(data)) {
2792
+ return sanitizeArray(data);
2793
+ }
2794
+ if (data && typeof data === 'object') {
2795
+ const sanitized = {};
2796
+ for (const [key, value] of Object.entries(data)) {
2797
+ const sanitizedValue = this.sanitizeDataForJSON(value);
2798
+ // Only include non-null, non-undefined values
2799
+ if (sanitizedValue !== null && sanitizedValue !== undefined) {
2800
+ sanitized[key] = sanitizedValue;
2801
+ }
2802
+ }
2803
+ return sanitized;
2804
+ }
2805
+ return data;
2806
+ }
2768
2807
  validateEncodingBeforeSerialization(data) {
2769
2808
  const issues = [];
2770
2809
  const checkString = (str, path = '') => {
@@ -2859,12 +2898,15 @@ class Serializer {
2859
2898
  * Standard serialization (fallback)
2860
2899
  */
2861
2900
  serializeStandard(data, addLinebreak) {
2901
+ // CRITICAL FIX: Sanitize data to remove problematic characters before serialization
2902
+ const sanitizedData = this.sanitizeDataForJSON(data);
2903
+
2862
2904
  // Validate encoding before serialization
2863
- this.validateEncodingBeforeSerialization(data);
2905
+ this.validateEncodingBeforeSerialization(sanitizedData);
2864
2906
 
2865
2907
  // NOTE: Buffer pool removed - using direct Buffer creation for simplicity and reliability
2866
2908
  // CRITICAL: Normalize encoding for all string fields before stringify
2867
- const normalizedData = this.deepNormalizeEncoding(data);
2909
+ const normalizedData = this.deepNormalizeEncoding(sanitizedData);
2868
2910
  const json = JSON.stringify(normalizedData);
2869
2911
 
2870
2912
  // CRITICAL FIX: Normalize encoding before creating buffer
@@ -3068,11 +3110,14 @@ class Serializer {
3068
3110
  * Batch serialization for multiple records
3069
3111
  */
3070
3112
  serializeBatch(dataArray, opts = {}) {
3113
+ // CRITICAL FIX: Sanitize data to remove problematic characters before serialization
3114
+ const sanitizedDataArray = dataArray.map(data => this.sanitizeDataForJSON(data));
3115
+
3071
3116
  // Validate encoding before serialization
3072
- this.validateEncodingBeforeSerialization(dataArray);
3117
+ this.validateEncodingBeforeSerialization(sanitizedDataArray);
3073
3118
 
3074
3119
  // Convert all objects to array format for optimization
3075
- const convertedData = dataArray.map(data => this.convertToArrayFormat(data));
3120
+ const convertedData = sanitizedDataArray.map(data => this.convertToArrayFormat(data));
3076
3121
 
3077
3122
  // Track conversion statistics
3078
3123
  this.serializationStats.arraySerializations += convertedData.filter((item, index) => Array.isArray(item) && typeof dataArray[index] === 'object' && dataArray[index] !== null).length;
@@ -3872,6 +3917,141 @@ class FileHandler {
3872
3917
  }
3873
3918
  return groupedRanges;
3874
3919
  }
3920
+
3921
+ /**
3922
+ * Ensure a line is complete by reading until newline if JSON appears truncated
3923
+ * @param {string} line - The potentially incomplete line
3924
+ * @param {number} fd - File descriptor
3925
+ * @param {number} currentOffset - Current read offset
3926
+ * @returns {string} Complete line
3927
+ */
3928
+ async ensureCompleteLine(line, fd, currentOffset) {
3929
+ // Fast check: if line already ends with newline, it's likely complete
3930
+ if (line.endsWith('\n')) {
3931
+ return line;
3932
+ }
3933
+
3934
+ // Check if the line contains valid JSON by trying to parse it
3935
+ const trimmedLine = line.trim();
3936
+ if (trimmedLine.length === 0) {
3937
+ return line;
3938
+ }
3939
+
3940
+ // Try to parse as JSON to see if it's complete
3941
+ try {
3942
+ JSON.parse(trimmedLine);
3943
+ // If parsing succeeds, the line is complete (but missing newline)
3944
+ // This is unusual but possible, return as-is
3945
+ return line;
3946
+ } catch (jsonError) {
3947
+ // JSON is incomplete, try to read more until we find a newline
3948
+ const bufferSize = 2048; // Read in 2KB chunks for better performance
3949
+ const additionalBuffer = Buffer.allocUnsafe(bufferSize);
3950
+ let additionalOffset = currentOffset;
3951
+ let additionalContent = line;
3952
+
3953
+ // Try reading up to 20KB more to find the newline (increased for safety)
3954
+ const maxAdditionalRead = 20480;
3955
+ let totalAdditionalRead = 0;
3956
+ while (totalAdditionalRead < maxAdditionalRead) {
3957
+ const {
3958
+ bytesRead
3959
+ } = await fd.read(additionalBuffer, 0, bufferSize, additionalOffset);
3960
+ if (bytesRead === 0) {
3961
+ // EOF reached, check if the accumulated content is now valid JSON
3962
+ const finalTrimmed = additionalContent.trim();
3963
+ try {
3964
+ JSON.parse(finalTrimmed);
3965
+ // If parsing succeeds now, return the content
3966
+ return additionalContent;
3967
+ } catch {
3968
+ // Still invalid, return original line to avoid data loss
3969
+ return line;
3970
+ }
3971
+ }
3972
+ const chunk = additionalBuffer.toString('utf8', 0, bytesRead);
3973
+ additionalContent += chunk;
3974
+ totalAdditionalRead += bytesRead;
3975
+
3976
+ // Check if we found a newline in the entire accumulated content
3977
+ const newlineIndex = additionalContent.indexOf('\n', line.length);
3978
+ if (newlineIndex !== -1) {
3979
+ // Found newline, return content up to and including the newline
3980
+ const completeLine = additionalContent.substring(0, newlineIndex + 1);
3981
+
3982
+ // Validate that the complete line contains valid JSON
3983
+ const trimmedComplete = completeLine.trim();
3984
+ try {
3985
+ JSON.parse(trimmedComplete);
3986
+ return completeLine;
3987
+ } catch {
3988
+ // Even with newline, JSON is invalid - this suggests data corruption
3989
+ // Return original line to trigger normal error handling
3990
+ return line;
3991
+ }
3992
+ }
3993
+ additionalOffset += bytesRead;
3994
+ }
3995
+
3996
+ // If we couldn't find a newline within the limit, return the original line
3997
+ // This prevents infinite reading and excessive memory usage
3998
+ return line;
3999
+ }
4000
+ }
4001
+
4002
+ /**
4003
+ * Split content into complete JSON lines, handling special characters and escaped quotes
4004
+ * CRITICAL FIX: Prevents "Expected ',' or ']'" and "Unterminated string" errors by ensuring
4005
+ * each line is a complete, valid JSON object/array, even when containing special characters
4006
+ * @param {string} content - Raw content containing multiple JSON lines
4007
+ * @returns {string[]} Array of complete JSON lines
4008
+ */
4009
+ splitJsonLines(content) {
4010
+ const lines = [];
4011
+ let currentLine = '';
4012
+ let inString = false;
4013
+ let escapeNext = false;
4014
+ let braceCount = 0;
4015
+ let bracketCount = 0;
4016
+ for (let i = 0; i < content.length; i++) {
4017
+ const char = content[i];
4018
+ i > 0 ? content[i - 1] : null;
4019
+ currentLine += char;
4020
+ if (escapeNext) {
4021
+ escapeNext = false;
4022
+ continue;
4023
+ }
4024
+ if (char === '\\') {
4025
+ escapeNext = true;
4026
+ continue;
4027
+ }
4028
+ if (char === '"' && !escapeNext) {
4029
+ inString = !inString;
4030
+ continue;
4031
+ }
4032
+ if (!inString) {
4033
+ if (char === '{') braceCount++;else if (char === '}') braceCount--;else if (char === '[') bracketCount++;else if (char === ']') bracketCount--;else if (char === '\n' && braceCount === 0 && bracketCount === 0) {
4034
+ // Found complete JSON object/array at newline
4035
+ const trimmedLine = currentLine.trim();
4036
+ if (trimmedLine.length > 0) {
4037
+ lines.push(trimmedLine.replace(/\n$/, '')); // Remove trailing newline
4038
+ }
4039
+ currentLine = '';
4040
+ braceCount = 0;
4041
+ bracketCount = 0;
4042
+ inString = false;
4043
+ escapeNext = false;
4044
+ }
4045
+ }
4046
+ }
4047
+
4048
+ // Add remaining content if it's a complete JSON object/array
4049
+ const trimmedLine = currentLine.trim();
4050
+ if (trimmedLine.length > 0 && braceCount === 0 && bracketCount === 0) {
4051
+ lines.push(trimmedLine);
4052
+ }
4053
+ return lines.filter(line => line.trim().length > 0);
4054
+ }
3875
4055
  readGroupedRange(groupedRange, fd) {
3876
4056
  var _this = this;
3877
4057
  return _wrapAsyncGenerator(function* () {
@@ -3899,9 +4079,16 @@ class FileHandler {
3899
4079
  });
3900
4080
  }
3901
4081
 
3902
- // CRITICAL FIX: Remove trailing newlines and whitespace for single range too
3903
- // Optimized: Use trimEnd() which efficiently removes all trailing whitespace (faster than manual checks)
3904
- lineString = lineString.trimEnd();
4082
+ // CRITICAL FIX: For single ranges, check if JSON appears truncated and try to complete it
4083
+ // Only attempt completion if the line doesn't end with newline (indicating possible truncation)
4084
+ if (!lineString.endsWith('\n')) {
4085
+ const completeLine = yield _awaitAsyncGenerator(_this.ensureCompleteLine(lineString, fd, range.start + actualBuffer.length));
4086
+ if (completeLine !== lineString) {
4087
+ lineString = completeLine.trimEnd();
4088
+ }
4089
+ } else {
4090
+ lineString = lineString.trimEnd();
4091
+ }
3905
4092
  yield {
3906
4093
  line: lineString,
3907
4094
  start: range.start,
@@ -3936,10 +4123,29 @@ class FileHandler {
3936
4123
  });
3937
4124
  }
3938
4125
 
4126
+ // CRITICAL FIX: Validate buffer completeness to prevent UTF-8 corruption
4127
+ // When reading non-adjacent ranges, the buffer may be incomplete (last line cut mid-character)
4128
+ const lastNewlineIndex = content.lastIndexOf('\n');
4129
+ if (lastNewlineIndex === -1 || lastNewlineIndex < content.length - 2) {
4130
+ // Buffer may be incomplete - truncate to last complete line
4131
+ if (_this.opts.debugMode) {
4132
+ console.warn(`⚠️ Incomplete buffer detected at offset ${firstRange.start}, truncating to last complete line`);
4133
+ }
4134
+ if (lastNewlineIndex > 0) {
4135
+ content = content.substring(0, lastNewlineIndex + 1);
4136
+ } else {
4137
+ // No complete lines found - may be a serious issue
4138
+ if (_this.opts.debugMode) {
4139
+ console.warn(`⚠️ No complete lines found in buffer at offset ${firstRange.start}`);
4140
+ }
4141
+ }
4142
+ }
4143
+
3939
4144
  // CRITICAL FIX: Handle ranges more carefully to prevent corruption
3940
4145
  if (groupedRange.length === 2 && groupedRange[0].end === groupedRange[1].start) {
3941
- // Special case: Adjacent ranges - split by newlines to prevent corruption
3942
- const lines = content.split('\n').filter(line => line.trim().length > 0);
4146
+ // Special case: Adjacent ranges - split by COMPLETE JSON lines, not just newlines
4147
+ // This prevents corruption when lines contain special characters or unescaped quotes
4148
+ const lines = _this.splitJsonLines(content);
3943
4149
  for (let i = 0; i < Math.min(lines.length, groupedRange.length); i++) {
3944
4150
  const range = groupedRange[i];
3945
4151
  yield {
@@ -3975,6 +4181,7 @@ class FileHandler {
3975
4181
 
3976
4182
  // OPTIMIZATION 4: Direct character check instead of regex/trimEnd
3977
4183
  // Remove trailing newlines and whitespace efficiently
4184
+ // CRITICAL FIX: Prevents incomplete JSON line reading that caused "Expected ',' or ']'" parsing errors
3978
4185
  // trimEnd() is actually optimized in V8, but we can check if there's anything to trim first
3979
4186
  const len = rangeContent.length;
3980
4187
  if (len > 0) {
@@ -3986,8 +4193,24 @@ class FileHandler {
3986
4193
  }
3987
4194
  }
3988
4195
  if (rangeContent.length === 0) continue;
4196
+
4197
+ // CRITICAL FIX: For multiple ranges, we cannot safely expand reading
4198
+ // because offsets are pre-calculated. Instead, validate JSON and let
4199
+ // the deserializer handle incomplete lines (which will trigger recovery)
4200
+ const trimmedContent = rangeContent.trim();
4201
+ let finalContent = rangeContent;
4202
+ if (trimmedContent.length > 0) {
4203
+ try {
4204
+ JSON.parse(trimmedContent);
4205
+ // JSON is valid, use as-is
4206
+ } catch (jsonError) {
4207
+ // JSON appears incomplete - this is expected for truncated ranges
4208
+ // Let the deserializer handle it (will trigger streaming recovery if needed)
4209
+ // We don't try to expand reading here because offsets are pre-calculated
4210
+ }
4211
+ }
3989
4212
  yield {
3990
- line: rangeContent,
4213
+ line: finalContent,
3991
4214
  start: range.start,
3992
4215
  _: range.index !== undefined ? range.index : range._ || null
3993
4216
  };
@@ -3998,41 +4221,47 @@ class FileHandler {
3998
4221
  walk(ranges) {
3999
4222
  var _this2 = this;
4000
4223
  return _wrapAsyncGenerator(function* () {
4001
- // Check if file exists before trying to read it
4002
- if (!(yield _awaitAsyncGenerator(_this2.exists()))) {
4003
- return; // Return empty generator if file doesn't exist
4004
- }
4005
- const fd = yield _awaitAsyncGenerator(fs.promises.open(_this2.file, 'r'));
4224
+ // CRITICAL FIX: Acquire file mutex to prevent race conditions with concurrent writes
4225
+ const release = _this2.fileMutex ? yield _awaitAsyncGenerator(_this2.fileMutex.acquire()) : () => {};
4006
4226
  try {
4007
- const groupedRanges = yield _awaitAsyncGenerator(_this2.groupedRanges(ranges));
4008
- for (const groupedRange of groupedRanges) {
4009
- var _iteratorAbruptCompletion2 = false;
4010
- var _didIteratorError2 = false;
4011
- var _iteratorError2;
4012
- try {
4013
- for (var _iterator2 = _asyncIterator(_this2.readGroupedRange(groupedRange, fd)), _step2; _iteratorAbruptCompletion2 = !(_step2 = yield _awaitAsyncGenerator(_iterator2.next())).done; _iteratorAbruptCompletion2 = false) {
4014
- const row = _step2.value;
4015
- {
4016
- yield row;
4017
- }
4018
- }
4019
- } catch (err) {
4020
- _didIteratorError2 = true;
4021
- _iteratorError2 = err;
4022
- } finally {
4227
+ // Check if file exists before trying to read it
4228
+ if (!(yield _awaitAsyncGenerator(_this2.exists()))) {
4229
+ return; // Return empty generator if file doesn't exist
4230
+ }
4231
+ const fd = yield _awaitAsyncGenerator(fs.promises.open(_this2.file, 'r'));
4232
+ try {
4233
+ const groupedRanges = yield _awaitAsyncGenerator(_this2.groupedRanges(ranges));
4234
+ for (const groupedRange of groupedRanges) {
4235
+ var _iteratorAbruptCompletion2 = false;
4236
+ var _didIteratorError2 = false;
4237
+ var _iteratorError2;
4023
4238
  try {
4024
- if (_iteratorAbruptCompletion2 && _iterator2.return != null) {
4025
- yield _awaitAsyncGenerator(_iterator2.return());
4239
+ for (var _iterator2 = _asyncIterator(_this2.readGroupedRange(groupedRange, fd)), _step2; _iteratorAbruptCompletion2 = !(_step2 = yield _awaitAsyncGenerator(_iterator2.next())).done; _iteratorAbruptCompletion2 = false) {
4240
+ const row = _step2.value;
4241
+ {
4242
+ yield row;
4243
+ }
4026
4244
  }
4245
+ } catch (err) {
4246
+ _didIteratorError2 = true;
4247
+ _iteratorError2 = err;
4027
4248
  } finally {
4028
- if (_didIteratorError2) {
4029
- throw _iteratorError2;
4249
+ try {
4250
+ if (_iteratorAbruptCompletion2 && _iterator2.return != null) {
4251
+ yield _awaitAsyncGenerator(_iterator2.return());
4252
+ }
4253
+ } finally {
4254
+ if (_didIteratorError2) {
4255
+ throw _iteratorError2;
4256
+ }
4030
4257
  }
4031
4258
  }
4032
4259
  }
4260
+ } finally {
4261
+ yield _awaitAsyncGenerator(fd.close());
4033
4262
  }
4034
4263
  } finally {
4035
- yield _awaitAsyncGenerator(fd.close());
4264
+ release();
4036
4265
  }
4037
4266
  })();
4038
4267
  }
@@ -4158,7 +4387,9 @@ class FileHandler {
4158
4387
  JSON.parse(lines[i]);
4159
4388
  validLines.push(lines[i]);
4160
4389
  } catch (error) {
4161
- console.warn(`⚠️ Invalid JSON in temp file at line ${i + 1}, skipping:`, lines[i].substring(0, 100));
4390
+ if (this.opts.debugMode) {
4391
+ console.warn(`⚠️ Invalid JSON in temp file at line ${i + 1}, skipping:`, lines[i].substring(0, 100));
4392
+ }
4162
4393
  hasInvalidJson = true;
4163
4394
  }
4164
4395
  }
@@ -4784,7 +5015,9 @@ class FileHandler {
4784
5015
  content = buffer.toString('utf8');
4785
5016
  } catch (error) {
4786
5017
  // If UTF-8 decoding fails, try to recover by finding valid UTF-8 boundaries
4787
- console.warn(`UTF-8 decoding failed for file ${this.file}, attempting recovery`);
5018
+ if (this.opts.debugMode) {
5019
+ console.warn(`UTF-8 decoding failed for file ${this.file}, attempting recovery`);
5020
+ }
4788
5021
 
4789
5022
  // Find the last complete UTF-8 character
4790
5023
  let validLength = buffer.length;
@@ -7873,6 +8106,23 @@ class Database extends events.EventEmitter {
7873
8106
  loadTime: 0
7874
8107
  };
7875
8108
 
8109
+ // Initialize integrity correction tracking
8110
+ this.integrityCorrections = {
8111
+ indexSync: 0,
8112
+ // index.totalLines vs offsets.length corrections
8113
+ indexInconsistency: 0,
8114
+ // Index record count vs offsets mismatch
8115
+ writeBufferForced: 0,
8116
+ // WriteBuffer not cleared after save
8117
+ indexSaveFailures: 0,
8118
+ // Failed to save index data
8119
+ dataIntegrity: 0,
8120
+ // General data integrity issues
8121
+ utf8Recovery: 0,
8122
+ // UTF-8 decoding failures recovered
8123
+ jsonRecovery: 0 // JSON parsing failures recovered
8124
+ };
8125
+
7876
8126
  // Initialize usage stats for QueryManager
7877
8127
  this.usageStats = {
7878
8128
  totalQueries: 0,
@@ -8057,7 +8307,9 @@ class Database extends events.EventEmitter {
8057
8307
  }
8058
8308
  }
8059
8309
  if (arrayStringFields.length > 0) {
8060
- console.warn(`⚠️ Warning: The following array:string indexed fields were not added to term mapping: ${arrayStringFields.join(', ')}. This may impact performance.`);
8310
+ if (this.opts.debugMode) {
8311
+ console.warn(`⚠️ Warning: The following array:string indexed fields were not added to term mapping: ${arrayStringFields.join(', ')}. This may impact performance.`);
8312
+ }
8061
8313
  }
8062
8314
  }
8063
8315
  if (this.opts.debugMode) {
@@ -8098,13 +8350,17 @@ class Database extends events.EventEmitter {
8098
8350
  }
8099
8351
 
8100
8352
  /**
8101
- * Get term mapping fields from indexes (auto-detected)
8353
+ * Get term mapping fields from configuration or indexes (auto-detected)
8102
8354
  * @returns {string[]} Array of field names that use term mapping
8103
8355
  */
8104
8356
  getTermMappingFields() {
8105
- if (!this.opts.indexes) return [];
8357
+ // If termMappingFields is explicitly configured, use it
8358
+ if (this.opts.termMappingFields && Array.isArray(this.opts.termMappingFields)) {
8359
+ return [...this.opts.termMappingFields];
8360
+ }
8106
8361
 
8107
- // Auto-detect fields that benefit from term mapping
8362
+ // Auto-detect fields that benefit from term mapping from indexes
8363
+ if (!this.opts.indexes) return [];
8108
8364
  const termMappingFields = [];
8109
8365
  for (const [field, type] of Object.entries(this.opts.indexes)) {
8110
8366
  // Fields that should use term mapping (only array fields)
@@ -8219,6 +8475,18 @@ class Database extends events.EventEmitter {
8219
8475
  }
8220
8476
  }
8221
8477
 
8478
+ // CRITICAL INTEGRITY CHECK: Ensure IndexManager is consistent with loaded offsets
8479
+ // This must happen immediately after load() to prevent any subsequent operations from seeing inconsistent state
8480
+ if (this.indexManager && this.offsets && this.offsets.length > 0) {
8481
+ const currentTotalLines = this.indexManager.totalLines || 0;
8482
+ if (currentTotalLines !== this.offsets.length) {
8483
+ this.indexManager.setTotalLines(this.offsets.length);
8484
+ if (this.opts.debugMode) {
8485
+ console.log(`🔧 Post-load integrity sync: IndexManager totalLines ${currentTotalLines} → ${this.offsets.length}`);
8486
+ }
8487
+ }
8488
+ }
8489
+
8222
8490
  // Manual save is now the default behavior
8223
8491
 
8224
8492
  // CRITICAL FIX: Ensure IndexManager totalLines is consistent with offsets
@@ -8366,11 +8634,11 @@ class Database extends events.EventEmitter {
8366
8634
  this.offsets = parsedIdxData.offsets;
8367
8635
  // CRITICAL FIX: Update IndexManager totalLines to match offsets length
8368
8636
  // This ensures queries and length property work correctly even if offsets are reset later
8369
- if (this.indexManager && this.offsets.length > 0) {
8637
+ if (this.indexManager) {
8370
8638
  this.indexManager.setTotalLines(this.offsets.length);
8371
- }
8372
- if (this.opts.debugMode) {
8373
- console.log(`📂 Loaded ${this.offsets.length} offsets from ${idxPath}`);
8639
+ if (this.opts.debugMode) {
8640
+ console.log(`📂 Loaded ${this.offsets.length} offsets from ${idxPath}, synced IndexManager totalLines`);
8641
+ }
8374
8642
  }
8375
8643
  }
8376
8644
 
@@ -9162,7 +9430,12 @@ class Database extends events.EventEmitter {
9162
9430
  // Check that all indexed records have valid line numbers
9163
9431
  const indexedRecordCount = this.indexManager.getIndexedRecordCount?.() || allData.length;
9164
9432
  if (indexedRecordCount !== this.offsets.length) {
9165
- console.warn(`⚠️ Index inconsistency detected: indexed ${indexedRecordCount} records but offsets has ${this.offsets.length} entries`);
9433
+ this.integrityCorrections.indexInconsistency++;
9434
+ console.log(`🔧 Auto-corrected index consistency: ${indexedRecordCount} indexed → ${this.offsets.length} offsets`);
9435
+ if (this.integrityCorrections.indexInconsistency > 5) {
9436
+ console.warn(`⚠️ Frequent index inconsistencies detected (${this.integrityCorrections.indexInconsistency} times)`);
9437
+ }
9438
+
9166
9439
  // Force consistency by setting totalLines to match offsets
9167
9440
  this.indexManager.setTotalLines(this.offsets.length);
9168
9441
  } else {
@@ -9706,37 +9979,31 @@ class Database extends events.EventEmitter {
9706
9979
  }
9707
9980
  try {
9708
9981
  // INTEGRITY CHECK: Validate data consistency before querying
9709
- // Check if index and offsets are synchronized
9982
+ // This is a safety net for unexpected inconsistencies - should rarely trigger
9710
9983
  if (this.indexManager && this.offsets && this.offsets.length > 0) {
9711
9984
  const indexTotalLines = this.indexManager.totalLines || 0;
9712
9985
  const offsetsLength = this.offsets.length;
9713
9986
  if (indexTotalLines !== offsetsLength) {
9714
- console.warn(`⚠️ Data integrity issue detected: index.totalLines=${indexTotalLines}, offsets.length=${offsetsLength}`);
9715
- // Auto-correct by updating index totalLines to match offsets
9716
- this.indexManager.setTotalLines(offsetsLength);
9987
+ // This should be extremely rare - indicates a real bug if it happens frequently
9988
+ this.integrityCorrections.dataIntegrity++;
9989
+
9990
+ // Only show in debug mode - these corrections indicate real issues
9717
9991
  if (this.opts.debugMode) {
9718
- console.log(`🔧 Auto-corrected index totalLines to ${offsetsLength}`);
9992
+ console.log(`🔧 Integrity correction needed: index.totalLines ${indexTotalLines} ${offsetsLength} (${this.integrityCorrections.dataIntegrity} total)`);
9719
9993
  }
9720
9994
 
9721
- // CRITICAL FIX: Also save the corrected index to prevent persistence of inconsistency
9722
- // This ensures the .idx.jdb file contains the correct totalLines value
9995
+ // Warn if corrections are becoming frequent (indicates a real problem)
9996
+ if (this.integrityCorrections.dataIntegrity > 5) {
9997
+ console.warn(`⚠️ Frequent integrity corrections (${this.integrityCorrections.dataIntegrity} times) - this indicates a systemic issue`);
9998
+ }
9999
+ this.indexManager.setTotalLines(offsetsLength);
10000
+
10001
+ // Try to persist the fix, but don't fail the operation if it doesn't work
9723
10002
  try {
9724
10003
  await this._saveIndexDataToFile();
9725
- if (this.opts.debugMode) {
9726
- console.log(`💾 Saved corrected index data to prevent future inconsistencies`);
9727
- }
9728
10004
  } catch (error) {
9729
- if (this.opts.debugMode) {
9730
- console.warn(`⚠️ Failed to save corrected index: ${error.message}`);
9731
- }
9732
- }
9733
-
9734
- // Verify the fix worked
9735
- const newIndexTotalLines = this.indexManager.totalLines || 0;
9736
- if (newIndexTotalLines === offsetsLength) {
9737
- console.log(`✅ Data integrity successfully corrected: index.totalLines=${newIndexTotalLines}, offsets.length=${offsetsLength}`);
9738
- } else {
9739
- console.error(`❌ Data integrity correction failed: index.totalLines=${newIndexTotalLines}, offsets.length=${offsetsLength}`);
10005
+ // Just track the failure - don't throw since this is a safety net
10006
+ this.integrityCorrections.indexSaveFailures++;
9740
10007
  }
9741
10008
  }
9742
10009
  }
@@ -12112,6 +12379,74 @@ class Database extends events.EventEmitter {
12112
12379
  }
12113
12380
  return this._getWriteBufferBaseLineNumber() + writeBufferIndex;
12114
12381
  }
12382
+
12383
+ /**
12384
+ * Attempts to recover a corrupted line by cleaning invalid characters and fixing common JSON issues
12385
+ * @param {string} line - The corrupted line to recover
12386
+ * @returns {string|null} - The recovered line or null if recovery is not possible
12387
+ */
12388
+ _tryRecoverLine(line) {
12389
+ if (!line || typeof line !== 'string') {
12390
+ return null;
12391
+ }
12392
+ try {
12393
+ // Try parsing as-is first
12394
+ JSON.parse(line);
12395
+ return line; // Line is already valid
12396
+ } catch (e) {
12397
+ // Line is corrupted, attempt recovery
12398
+ }
12399
+ let recovered = line.trim();
12400
+
12401
+ // Remove invalid control characters (except \n, \r, \t)
12402
+ recovered = recovered.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, '');
12403
+
12404
+ // Try to close unclosed strings
12405
+ // Count quotes and ensure they're balanced
12406
+ const quoteCount = (recovered.match(/"/g) || []).length;
12407
+ if (quoteCount % 2 !== 0) {
12408
+ // Odd number of quotes - try to close the string
12409
+ const lastQuoteIndex = recovered.lastIndexOf('"');
12410
+ if (lastQuoteIndex > 0) {
12411
+ // Check if we're inside a string (not escaped)
12412
+ const beforeLastQuote = recovered.substring(0, lastQuoteIndex);
12413
+ const escapedQuotes = (beforeLastQuote.match(/\\"/g) || []).length;
12414
+ const unescapedQuotes = (beforeLastQuote.match(/"/g) || []).length - escapedQuotes;
12415
+ if (unescapedQuotes % 2 !== 0) {
12416
+ // We're inside an unclosed string - try to close it
12417
+ recovered = recovered + '"';
12418
+ }
12419
+ }
12420
+ }
12421
+
12422
+ // Try to close unclosed arrays/objects
12423
+ const openBraces = (recovered.match(/\{/g) || []).length;
12424
+ const closeBraces = (recovered.match(/\}/g) || []).length;
12425
+ const openBrackets = (recovered.match(/\[/g) || []).length;
12426
+ const closeBrackets = (recovered.match(/\]/g) || []).length;
12427
+
12428
+ // Remove trailing commas before closing braces/brackets
12429
+ recovered = recovered.replace(/,\s*([}\]])/g, '$1');
12430
+
12431
+ // Try to close arrays
12432
+ if (openBrackets > closeBrackets) {
12433
+ recovered = recovered + ']'.repeat(openBrackets - closeBrackets);
12434
+ }
12435
+
12436
+ // Try to close objects
12437
+ if (openBraces > closeBraces) {
12438
+ recovered = recovered + '}'.repeat(openBraces - closeBraces);
12439
+ }
12440
+
12441
+ // Final validation - try to parse
12442
+ try {
12443
+ JSON.parse(recovered);
12444
+ return recovered;
12445
+ } catch (e) {
12446
+ // Recovery failed
12447
+ return null;
12448
+ }
12449
+ }
12115
12450
  _streamingRecoveryGenerator(_x, _x2) {
12116
12451
  var _this = this;
12117
12452
  return _wrapAsyncGenerator(function* (criteria, options, alreadyYielded = 0, map = null, remainingSkipValue = 0) {
@@ -12311,6 +12646,17 @@ class Database extends events.EventEmitter {
12311
12646
 
12312
12647
  // If no data at all, return empty
12313
12648
  if (_this2.indexOffset === 0 && _this2.writeBuffer.length === 0) return;
12649
+
12650
+ // CRITICAL FIX: Wait for any ongoing save operations to complete
12651
+ // This prevents reading partially written data
12652
+ if (_this2.isSaving) {
12653
+ if (_this2.opts.debugMode) {
12654
+ console.log('🔍 walk(): waiting for save operation to complete');
12655
+ }
12656
+ while (_this2.isSaving) {
12657
+ yield _awaitAsyncGenerator(new Promise(resolve => setTimeout(resolve, 10)));
12658
+ }
12659
+ }
12314
12660
  let count = 0;
12315
12661
  let remainingSkip = options.skip || 0;
12316
12662
  let map;
@@ -12456,10 +12802,49 @@ class Database extends events.EventEmitter {
12456
12802
  } catch (error) {
12457
12803
  // CRITICAL FIX: Log deserialization errors instead of silently ignoring them
12458
12804
  // This helps identify data corruption issues
12459
- if (1 || _this2.opts.debugMode) {
12805
+ if (_this2.opts.debugMode) {
12460
12806
  console.warn(`⚠️ walk(): Failed to deserialize record at offset ${row.start}: ${error.message}`);
12461
12807
  console.warn(`⚠️ walk(): Problematic line (first 200 chars): ${row.line.substring(0, 200)}`);
12462
12808
  }
12809
+
12810
+ // CRITICAL FIX: Attempt to recover corrupted line before giving up
12811
+ const recoveredLine = _this2._tryRecoverLine(row.line);
12812
+ if (recoveredLine) {
12813
+ try {
12814
+ const record = _this2.serializer.deserialize(recoveredLine);
12815
+ if (record !== null) {
12816
+ _this2.integrityCorrections.jsonRecovery++;
12817
+ console.log(`🔧 Recovered corrupted JSON line (${_this2.integrityCorrections.jsonRecovery} recoveries)`);
12818
+ if (_this2.integrityCorrections.jsonRecovery > 20) {
12819
+ console.warn(`⚠️ Frequent JSON recovery detected (${_this2.integrityCorrections.jsonRecovery} times) - may indicate data corruption`);
12820
+ }
12821
+ const recordWithTerms = _this2.restoreTermIdsAfterDeserialization(record);
12822
+ if (remainingSkip > 0) {
12823
+ remainingSkip--;
12824
+ continue;
12825
+ }
12826
+ count++;
12827
+ if (options.includeOffsets) {
12828
+ yield {
12829
+ entry: recordWithTerms,
12830
+ start: row.start,
12831
+ _: row._ || 0
12832
+ };
12833
+ } else {
12834
+ if (_this2.opts.includeLinePosition) {
12835
+ recordWithTerms._ = row._ || 0;
12836
+ }
12837
+ yield recordWithTerms;
12838
+ }
12839
+ continue; // Successfully recovered and yielded
12840
+ }
12841
+ } catch (recoveryError) {
12842
+ // Recovery attempt failed, continue with normal error handling
12843
+ if (_this2.opts.debugMode) {
12844
+ console.warn(`⚠️ walk(): Line recovery failed: ${recoveryError.message}`);
12845
+ }
12846
+ }
12847
+ }
12463
12848
  if (!_this2._offsetRecoveryInProgress) {
12464
12849
  var _iteratorAbruptCompletion5 = false;
12465
12850
  var _didIteratorError5 = false;
@@ -12608,10 +12993,52 @@ class Database extends events.EventEmitter {
12608
12993
  } catch (error) {
12609
12994
  // CRITICAL FIX: Log deserialization errors instead of silently ignoring them
12610
12995
  // This helps identify data corruption issues
12611
- if (1 || _this2.opts.debugMode) {
12996
+ if (_this2.opts.debugMode) {
12612
12997
  console.warn(`⚠️ walk(): Failed to deserialize record at offset ${row.start}: ${error.message}`);
12613
12998
  console.warn(`⚠️ walk(): Problematic line (first 200 chars): ${row.line.substring(0, 200)}`);
12614
12999
  }
13000
+
13001
+ // CRITICAL FIX: Attempt to recover corrupted line before giving up
13002
+ const recoveredLine = _this2._tryRecoverLine(row.line);
13003
+ if (recoveredLine) {
13004
+ try {
13005
+ const entry = yield _awaitAsyncGenerator(_this2.serializer.deserialize(recoveredLine, {
13006
+ compress: _this2.opts.compress,
13007
+ v8: _this2.opts.v8
13008
+ }));
13009
+ if (entry !== null) {
13010
+ _this2.integrityCorrections.jsonRecovery++;
13011
+ console.log(`🔧 Recovered corrupted JSON line (${_this2.integrityCorrections.jsonRecovery} recoveries)`);
13012
+ if (_this2.integrityCorrections.jsonRecovery > 20) {
13013
+ console.warn(`⚠️ Frequent JSON recovery detected (${_this2.integrityCorrections.jsonRecovery} times) - may indicate data corruption`);
13014
+ }
13015
+ const entryWithTerms = _this2.restoreTermIdsAfterDeserialization(entry);
13016
+ if (remainingSkip > 0) {
13017
+ remainingSkip--;
13018
+ continue;
13019
+ }
13020
+ count++;
13021
+ if (options.includeOffsets) {
13022
+ yield {
13023
+ entry: entryWithTerms,
13024
+ start: row.start,
13025
+ _: row._ || _this2.offsets.findIndex(n => n === row.start)
13026
+ };
13027
+ } else {
13028
+ if (_this2.opts.includeLinePosition) {
13029
+ entryWithTerms._ = row._ || _this2.offsets.findIndex(n => n === row.start);
13030
+ }
13031
+ yield entryWithTerms;
13032
+ }
13033
+ continue; // Successfully recovered and yielded
13034
+ }
13035
+ } catch (recoveryError) {
13036
+ // Recovery attempt failed, continue with normal error handling
13037
+ if (_this2.opts.debugMode) {
13038
+ console.warn(`⚠️ walk(): Line recovery failed: ${recoveryError.message}`);
13039
+ }
13040
+ }
13041
+ }
12615
13042
  if (!_this2._offsetRecoveryInProgress) {
12616
13043
  var _iteratorAbruptCompletion7 = false;
12617
13044
  var _didIteratorError7 = false;
@@ -12904,7 +13331,11 @@ class Database extends events.EventEmitter {
12904
13331
  await this.save();
12905
13332
  // Ensure writeBuffer is cleared after save
12906
13333
  if (this.writeBuffer.length > 0) {
12907
- console.warn('⚠️ WriteBuffer not cleared after save() - forcing clear');
13334
+ this.integrityCorrections.writeBufferForced++;
13335
+ console.log(`🔧 Forced WriteBuffer clear after save (${this.writeBuffer.length} items remaining)`);
13336
+ if (this.integrityCorrections.writeBufferForced > 3) {
13337
+ console.warn(`⚠️ Frequent WriteBuffer clear issues detected (${this.integrityCorrections.writeBufferForced} times)`);
13338
+ }
12908
13339
  this.writeBuffer = [];
12909
13340
  this.writeBufferOffsets = [];
12910
13341
  this.writeBufferSizes = [];
@@ -13025,7 +13456,8 @@ class Database extends events.EventEmitter {
13025
13456
  console.log(`💾 Index data saved to ${idxPath}`);
13026
13457
  }
13027
13458
  } catch (error) {
13028
- console.warn('Failed to save index data:', error.message);
13459
+ this.integrityCorrections.indexSaveFailures++;
13460
+ console.warn(`⚠️ Index save failure (${this.integrityCorrections.indexSaveFailures} times): ${error.message}`);
13029
13461
  throw error; // Re-throw to let caller handle
13030
13462
  }
13031
13463
  }
@@ -13096,4 +13528,3 @@ class Database extends events.EventEmitter {
13096
13528
  }
13097
13529
 
13098
13530
  exports.Database = Database;
13099
- exports.default = Database;