jexidb 2.1.4 → 2.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -232,37 +232,192 @@ export default class FileHandler {
232
232
  return groupedRanges
233
233
  }
234
234
 
235
+ /**
236
+ * Ensure a line is complete by reading until newline if JSON appears truncated
237
+ * @param {string} line - The potentially incomplete line
238
+ * @param {number} fd - File descriptor
239
+ * @param {number} currentOffset - Current read offset
240
+ * @returns {string} Complete line
241
+ */
242
+ async ensureCompleteLine(line, fd, currentOffset) {
243
+ // Fast check: if line already ends with newline, it's likely complete
244
+ if (line.endsWith('\n')) {
245
+ return line
246
+ }
247
+
248
+ // Check if the line contains valid JSON by trying to parse it
249
+ const trimmedLine = line.trim()
250
+ if (trimmedLine.length === 0) {
251
+ return line
252
+ }
253
+
254
+ // Try to parse as JSON to see if it's complete
255
+ try {
256
+ JSON.parse(trimmedLine)
257
+ // If parsing succeeds, the line is complete (but missing newline)
258
+ // This is unusual but possible, return as-is
259
+ return line
260
+ } catch (jsonError) {
261
+ // JSON is incomplete, try to read more until we find a newline
262
+ const bufferSize = 2048 // Read in 2KB chunks for better performance
263
+ const additionalBuffer = Buffer.allocUnsafe(bufferSize)
264
+ let additionalOffset = currentOffset
265
+ let additionalContent = line
266
+
267
+ // Try reading up to 20KB more to find the newline (increased for safety)
268
+ const maxAdditionalRead = 20480
269
+ let totalAdditionalRead = 0
270
+
271
+ while (totalAdditionalRead < maxAdditionalRead) {
272
+ const { bytesRead } = await fd.read(additionalBuffer, 0, bufferSize, additionalOffset)
273
+
274
+ if (bytesRead === 0) {
275
+ // EOF reached, check if the accumulated content is now valid JSON
276
+ const finalTrimmed = additionalContent.trim()
277
+ try {
278
+ JSON.parse(finalTrimmed)
279
+ // If parsing succeeds now, return the content
280
+ return additionalContent
281
+ } catch {
282
+ // Still invalid, return original line to avoid data loss
283
+ return line
284
+ }
285
+ }
286
+
287
+ const chunk = additionalBuffer.toString('utf8', 0, bytesRead)
288
+ additionalContent += chunk
289
+ totalAdditionalRead += bytesRead
290
+
291
+ // Check if we found a newline in the entire accumulated content
292
+ const newlineIndex = additionalContent.indexOf('\n', line.length)
293
+ if (newlineIndex !== -1) {
294
+ // Found newline, return content up to and including the newline
295
+ const completeLine = additionalContent.substring(0, newlineIndex + 1)
296
+
297
+ // Validate that the complete line contains valid JSON
298
+ const trimmedComplete = completeLine.trim()
299
+ try {
300
+ JSON.parse(trimmedComplete)
301
+ return completeLine
302
+ } catch {
303
+ // Even with newline, JSON is invalid - this suggests data corruption
304
+ // Return original line to trigger normal error handling
305
+ return line
306
+ }
307
+ }
308
+
309
+ additionalOffset += bytesRead
310
+ }
311
+
312
+ // If we couldn't find a newline within the limit, return the original line
313
+ // This prevents infinite reading and excessive memory usage
314
+ return line
315
+ }
316
+ }
317
+
318
+ /**
319
+ * Split content into complete JSON lines, handling special characters and escaped quotes
320
+ * CRITICAL FIX: Prevents "Expected ',' or ']'" and "Unterminated string" errors by ensuring
321
+ * each line is a complete, valid JSON object/array, even when containing special characters
322
+ * @param {string} content - Raw content containing multiple JSON lines
323
+ * @returns {string[]} Array of complete JSON lines
324
+ */
325
+ splitJsonLines(content) {
326
+ const lines = []
327
+ let currentLine = ''
328
+ let inString = false
329
+ let escapeNext = false
330
+ let braceCount = 0
331
+ let bracketCount = 0
332
+
333
+ for (let i = 0; i < content.length; i++) {
334
+ const char = content[i]
335
+ const prevChar = i > 0 ? content[i - 1] : null
336
+
337
+ currentLine += char
338
+
339
+ if (escapeNext) {
340
+ escapeNext = false
341
+ continue
342
+ }
343
+
344
+ if (char === '\\') {
345
+ escapeNext = true
346
+ continue
347
+ }
348
+
349
+ if (char === '"' && !escapeNext) {
350
+ inString = !inString
351
+ continue
352
+ }
353
+
354
+ if (!inString) {
355
+ if (char === '{') braceCount++
356
+ else if (char === '}') braceCount--
357
+ else if (char === '[') bracketCount++
358
+ else if (char === ']') bracketCount--
359
+ else if (char === '\n' && braceCount === 0 && bracketCount === 0) {
360
+ // Found complete JSON object/array at newline
361
+ const trimmedLine = currentLine.trim()
362
+ if (trimmedLine.length > 0) {
363
+ lines.push(trimmedLine.replace(/\n$/, '')) // Remove trailing newline
364
+ }
365
+ currentLine = ''
366
+ braceCount = 0
367
+ bracketCount = 0
368
+ inString = false
369
+ escapeNext = false
370
+ }
371
+ }
372
+ }
373
+
374
+ // Add remaining content if it's a complete JSON object/array
375
+ const trimmedLine = currentLine.trim()
376
+ if (trimmedLine.length > 0 && braceCount === 0 && bracketCount === 0) {
377
+ lines.push(trimmedLine)
378
+ }
379
+
380
+ return lines.filter(line => line.trim().length > 0)
381
+ }
382
+
235
383
  async *readGroupedRange(groupedRange, fd) {
236
384
  if (groupedRange.length === 0) return
237
-
385
+
238
386
  // OPTIMIZATION: For single range, use direct approach
239
387
  if (groupedRange.length === 1) {
240
388
  const range = groupedRange[0]
241
389
  const bufferSize = range.end - range.start
242
-
390
+
243
391
  if (bufferSize <= 0 || bufferSize > this.maxBufferSize) {
244
392
  throw new Error(`Invalid buffer size: ${bufferSize}. Start: ${range.start}, End: ${range.end}. Max allowed: ${this.maxBufferSize}`)
245
393
  }
246
-
394
+
247
395
  const buffer = Buffer.allocUnsafe(bufferSize)
248
396
  const { bytesRead } = await fd.read(buffer, 0, bufferSize, range.start)
249
397
  const actualBuffer = bytesRead < bufferSize ? buffer.subarray(0, bytesRead) : buffer
250
-
398
+
251
399
  if (actualBuffer.length === 0) return
252
-
400
+
253
401
  let lineString
254
402
  try {
255
403
  lineString = actualBuffer.toString('utf8')
256
404
  } catch (error) {
257
405
  lineString = actualBuffer.toString('utf8', { replacement: '?' })
258
406
  }
259
-
260
- // CRITICAL FIX: Remove trailing newlines and whitespace for single range too
261
- // Optimized: Use trimEnd() which efficiently removes all trailing whitespace (faster than manual checks)
262
- lineString = lineString.trimEnd()
263
-
264
- yield {
265
- line: lineString,
407
+
408
+ // CRITICAL FIX: For single ranges, check if JSON appears truncated and try to complete it
409
+ // Only attempt completion if the line doesn't end with newline (indicating possible truncation)
410
+ if (!lineString.endsWith('\n')) {
411
+ const completeLine = await this.ensureCompleteLine(lineString, fd, range.start + actualBuffer.length)
412
+ if (completeLine !== lineString) {
413
+ lineString = completeLine.trimEnd()
414
+ }
415
+ } else {
416
+ lineString = lineString.trimEnd()
417
+ }
418
+
419
+ yield {
420
+ line: lineString,
266
421
  start: range.start,
267
422
  _: range.index !== undefined ? range.index : (range._ || null)
268
423
  }
@@ -293,15 +448,34 @@ export default class FileHandler {
293
448
  content = actualBuffer.toString('utf8', { replacement: '?' })
294
449
  }
295
450
 
451
+ // CRITICAL FIX: Validate buffer completeness to prevent UTF-8 corruption
452
+ // When reading non-adjacent ranges, the buffer may be incomplete (last line cut mid-character)
453
+ const lastNewlineIndex = content.lastIndexOf('\n')
454
+ if (lastNewlineIndex === -1 || lastNewlineIndex < content.length - 2) {
455
+ // Buffer may be incomplete - truncate to last complete line
456
+ if (this.opts.debugMode) {
457
+ console.warn(`⚠️ Incomplete buffer detected at offset ${firstRange.start}, truncating to last complete line`)
458
+ }
459
+ if (lastNewlineIndex > 0) {
460
+ content = content.substring(0, lastNewlineIndex + 1)
461
+ } else {
462
+ // No complete lines found - may be a serious issue
463
+ if (this.opts.debugMode) {
464
+ console.warn(`⚠️ No complete lines found in buffer at offset ${firstRange.start}`)
465
+ }
466
+ }
467
+ }
468
+
296
469
  // CRITICAL FIX: Handle ranges more carefully to prevent corruption
297
470
  if (groupedRange.length === 2 && groupedRange[0].end === groupedRange[1].start) {
298
- // Special case: Adjacent ranges - split by newlines to prevent corruption
299
- const lines = content.split('\n').filter(line => line.trim().length > 0)
300
-
471
+ // Special case: Adjacent ranges - split by COMPLETE JSON lines, not just newlines
472
+ // This prevents corruption when lines contain special characters or unescaped quotes
473
+ const lines = this.splitJsonLines(content)
474
+
301
475
  for (let i = 0; i < Math.min(lines.length, groupedRange.length); i++) {
302
476
  const range = groupedRange[i]
303
- yield {
304
- line: lines[i],
477
+ yield {
478
+ line: lines[i],
305
479
  start: range.start,
306
480
  _: range.index !== undefined ? range.index : (range._ || null)
307
481
  }
@@ -333,6 +507,7 @@ export default class FileHandler {
333
507
 
334
508
  // OPTIMIZATION 4: Direct character check instead of regex/trimEnd
335
509
  // Remove trailing newlines and whitespace efficiently
510
+ // CRITICAL FIX: Prevents incomplete JSON line reading that caused "Expected ',' or ']'" parsing errors
336
511
  // trimEnd() is actually optimized in V8, but we can check if there's anything to trim first
337
512
  const len = rangeContent.length
338
513
  if (len > 0) {
@@ -345,9 +520,26 @@ export default class FileHandler {
345
520
  }
346
521
 
347
522
  if (rangeContent.length === 0) continue
348
-
349
- yield {
350
- line: rangeContent,
523
+
524
+ // CRITICAL FIX: For multiple ranges, we cannot safely expand reading
525
+ // because offsets are pre-calculated. Instead, validate JSON and let
526
+ // the deserializer handle incomplete lines (which will trigger recovery)
527
+ const trimmedContent = rangeContent.trim()
528
+ let finalContent = rangeContent
529
+
530
+ if (trimmedContent.length > 0) {
531
+ try {
532
+ JSON.parse(trimmedContent)
533
+ // JSON is valid, use as-is
534
+ } catch (jsonError) {
535
+ // JSON appears incomplete - this is expected for truncated ranges
536
+ // Let the deserializer handle it (will trigger streaming recovery if needed)
537
+ // We don't try to expand reading here because offsets are pre-calculated
538
+ }
539
+ }
540
+
541
+ yield {
542
+ line: finalContent,
351
543
  start: range.start,
352
544
  _: range.index !== undefined ? range.index : (range._ || null)
353
545
  }
@@ -356,21 +548,27 @@ export default class FileHandler {
356
548
  }
357
549
 
358
550
  async *walk(ranges) {
359
- // Check if file exists before trying to read it
360
- if (!await this.exists()) {
361
- return // Return empty generator if file doesn't exist
362
- }
363
-
364
- const fd = await fs.promises.open(this.file, 'r')
551
+ // CRITICAL FIX: Acquire file mutex to prevent race conditions with concurrent writes
552
+ const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
365
553
  try {
366
- const groupedRanges = await this.groupedRanges(ranges)
367
- for(const groupedRange of groupedRanges) {
368
- for await (const row of this.readGroupedRange(groupedRange, fd)) {
369
- yield row
554
+ // Check if file exists before trying to read it
555
+ if (!await this.exists()) {
556
+ return // Return empty generator if file doesn't exist
557
+ }
558
+
559
+ const fd = await fs.promises.open(this.file, 'r')
560
+ try {
561
+ const groupedRanges = await this.groupedRanges(ranges)
562
+ for(const groupedRange of groupedRanges) {
563
+ for await (const row of this.readGroupedRange(groupedRange, fd)) {
564
+ yield row
565
+ }
370
566
  }
567
+ } finally {
568
+ await fd.close()
371
569
  }
372
570
  } finally {
373
- await fd.close()
571
+ release()
374
572
  }
375
573
  }
376
574
 
@@ -504,7 +702,9 @@ export default class FileHandler {
504
702
  JSON.parse(lines[i]);
505
703
  validLines.push(lines[i]);
506
704
  } catch (error) {
507
- console.warn(`⚠️ Invalid JSON in temp file at line ${i + 1}, skipping:`, lines[i].substring(0, 100));
705
+ if (this.opts.debugMode) {
706
+ console.warn(`⚠️ Invalid JSON in temp file at line ${i + 1}, skipping:`, lines[i].substring(0, 100));
707
+ }
508
708
  hasInvalidJson = true;
509
709
  }
510
710
  }
@@ -1094,7 +1294,9 @@ export default class FileHandler {
1094
1294
  content = buffer.toString('utf8')
1095
1295
  } catch (error) {
1096
1296
  // If UTF-8 decoding fails, try to recover by finding valid UTF-8 boundaries
1097
- console.warn(`UTF-8 decoding failed for file ${this.file}, attempting recovery`)
1297
+ if (this.opts.debugMode) {
1298
+ console.warn(`UTF-8 decoding failed for file ${this.file}, attempting recovery`)
1299
+ }
1098
1300
 
1099
1301
  // Find the last complete UTF-8 character
1100
1302
  let validLength = buffer.length
@@ -120,12 +120,15 @@ export default class Serializer {
120
120
  * Advanced serialization with optimized JSON.stringify and buffer pooling
121
121
  */
122
122
  serializeAdvanced(data, addLinebreak) {
123
+ // CRITICAL FIX: Sanitize data to remove problematic characters before serialization
124
+ const sanitizedData = this.sanitizeDataForJSON(data)
125
+
123
126
  // Validate encoding before serialization
124
- this.validateEncodingBeforeSerialization(data)
125
-
127
+ this.validateEncodingBeforeSerialization(sanitizedData)
128
+
126
129
  // Use optimized JSON.stringify without buffer pooling
127
130
  // NOTE: Buffer pool removed - using direct Buffer creation for simplicity and reliability
128
- const json = this.optimizedStringify(data)
131
+ const json = this.optimizedStringify(sanitizedData)
129
132
 
130
133
  // CRITICAL FIX: Normalize encoding before creating buffer
131
134
  const normalizedJson = this.normalizeEncoding(json)
@@ -239,6 +242,54 @@ export default class Serializer {
239
242
  /**
240
243
  * Validate encoding before serialization
241
244
  */
245
+ /**
246
+ * Sanitize data to remove problematic characters that break JSON parsing
247
+ * CRITICAL FIX: Prevents "Expected ',' or ']'" and "Unterminated string" errors
248
+ * by removing control characters that cannot be safely represented in JSON
249
+ */
250
+ sanitizeDataForJSON(data) {
251
+ const sanitizeString = (str) => {
252
+ if (typeof str !== 'string') return str
253
+
254
+ return str
255
+ // Remove control characters that break JSON parsing (but keep \n, \r, \t as they can be escaped)
256
+ // Remove: NUL, SOH, STX, ETX, EOT, ENQ, ACK, BEL, VT, FF, SO, SI, DLE, DC1-DC4, NAK, SYN, ETB, CAN, EM, SUB, ESC, FS, GS, RS, US, DEL, C1 controls
257
+ .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]/g, '')
258
+ // Limit string length to prevent performance issues
259
+ .substring(0, 10000)
260
+ }
261
+
262
+ const sanitizeArray = (arr) => {
263
+ if (!Array.isArray(arr)) return arr
264
+
265
+ return arr
266
+ .map(item => this.sanitizeDataForJSON(item))
267
+ .filter(item => item !== null && item !== undefined && item !== '')
268
+ }
269
+
270
+ if (typeof data === 'string') {
271
+ return sanitizeString(data)
272
+ }
273
+
274
+ if (Array.isArray(data)) {
275
+ return sanitizeArray(data)
276
+ }
277
+
278
+ if (data && typeof data === 'object') {
279
+ const sanitized = {}
280
+ for (const [key, value] of Object.entries(data)) {
281
+ const sanitizedValue = this.sanitizeDataForJSON(value)
282
+ // Only include non-null, non-undefined values
283
+ if (sanitizedValue !== null && sanitizedValue !== undefined) {
284
+ sanitized[key] = sanitizedValue
285
+ }
286
+ }
287
+ return sanitized
288
+ }
289
+
290
+ return data
291
+ }
292
+
242
293
  validateEncodingBeforeSerialization(data) {
243
294
  const issues = []
244
295
 
@@ -347,12 +398,15 @@ export default class Serializer {
347
398
  * Standard serialization (fallback)
348
399
  */
349
400
  serializeStandard(data, addLinebreak) {
401
+ // CRITICAL FIX: Sanitize data to remove problematic characters before serialization
402
+ const sanitizedData = this.sanitizeDataForJSON(data)
403
+
350
404
  // Validate encoding before serialization
351
- this.validateEncodingBeforeSerialization(data)
405
+ this.validateEncodingBeforeSerialization(sanitizedData)
352
406
 
353
407
  // NOTE: Buffer pool removed - using direct Buffer creation for simplicity and reliability
354
408
  // CRITICAL: Normalize encoding for all string fields before stringify
355
- const normalizedData = this.deepNormalizeEncoding(data)
409
+ const normalizedData = this.deepNormalizeEncoding(sanitizedData)
356
410
  const json = JSON.stringify(normalizedData)
357
411
 
358
412
  // CRITICAL FIX: Normalize encoding before creating buffer
@@ -575,11 +629,14 @@ export default class Serializer {
575
629
  * Batch serialization for multiple records
576
630
  */
577
631
  serializeBatch(dataArray, opts = {}) {
632
+ // CRITICAL FIX: Sanitize data to remove problematic characters before serialization
633
+ const sanitizedDataArray = dataArray.map(data => this.sanitizeDataForJSON(data))
634
+
578
635
  // Validate encoding before serialization
579
- this.validateEncodingBeforeSerialization(dataArray)
580
-
636
+ this.validateEncodingBeforeSerialization(sanitizedDataArray)
637
+
581
638
  // Convert all objects to array format for optimization
582
- const convertedData = dataArray.map(data => this.convertToArrayFormat(data))
639
+ const convertedData = sanitizedDataArray.map(data => this.convertToArrayFormat(data))
583
640
 
584
641
  // Track conversion statistics
585
642
  this.serializationStats.arraySerializations += convertedData.filter((item, index) =>