jexidb 2.1.3 → 2.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +339 -191
- package/dist/Database.cjs +713 -137
- package/package.json +4 -7
- package/src/Database.mjs +435 -75
- package/src/FileHandler.mjs +235 -33
- package/src/SchemaManager.mjs +3 -31
- package/src/Serializer.mjs +65 -8
- package/src/managers/IndexManager.mjs +15 -4
- package/src/managers/QueryManager.mjs +3 -3
package/src/FileHandler.mjs
CHANGED
|
@@ -232,37 +232,192 @@ export default class FileHandler {
|
|
|
232
232
|
return groupedRanges
|
|
233
233
|
}
|
|
234
234
|
|
|
235
|
+
/**
|
|
236
|
+
* Ensure a line is complete by reading until newline if JSON appears truncated
|
|
237
|
+
* @param {string} line - The potentially incomplete line
|
|
238
|
+
* @param {number} fd - File descriptor
|
|
239
|
+
* @param {number} currentOffset - Current read offset
|
|
240
|
+
* @returns {string} Complete line
|
|
241
|
+
*/
|
|
242
|
+
async ensureCompleteLine(line, fd, currentOffset) {
|
|
243
|
+
// Fast check: if line already ends with newline, it's likely complete
|
|
244
|
+
if (line.endsWith('\n')) {
|
|
245
|
+
return line
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// Check if the line contains valid JSON by trying to parse it
|
|
249
|
+
const trimmedLine = line.trim()
|
|
250
|
+
if (trimmedLine.length === 0) {
|
|
251
|
+
return line
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// Try to parse as JSON to see if it's complete
|
|
255
|
+
try {
|
|
256
|
+
JSON.parse(trimmedLine)
|
|
257
|
+
// If parsing succeeds, the line is complete (but missing newline)
|
|
258
|
+
// This is unusual but possible, return as-is
|
|
259
|
+
return line
|
|
260
|
+
} catch (jsonError) {
|
|
261
|
+
// JSON is incomplete, try to read more until we find a newline
|
|
262
|
+
const bufferSize = 2048 // Read in 2KB chunks for better performance
|
|
263
|
+
const additionalBuffer = Buffer.allocUnsafe(bufferSize)
|
|
264
|
+
let additionalOffset = currentOffset
|
|
265
|
+
let additionalContent = line
|
|
266
|
+
|
|
267
|
+
// Try reading up to 20KB more to find the newline (increased for safety)
|
|
268
|
+
const maxAdditionalRead = 20480
|
|
269
|
+
let totalAdditionalRead = 0
|
|
270
|
+
|
|
271
|
+
while (totalAdditionalRead < maxAdditionalRead) {
|
|
272
|
+
const { bytesRead } = await fd.read(additionalBuffer, 0, bufferSize, additionalOffset)
|
|
273
|
+
|
|
274
|
+
if (bytesRead === 0) {
|
|
275
|
+
// EOF reached, check if the accumulated content is now valid JSON
|
|
276
|
+
const finalTrimmed = additionalContent.trim()
|
|
277
|
+
try {
|
|
278
|
+
JSON.parse(finalTrimmed)
|
|
279
|
+
// If parsing succeeds now, return the content
|
|
280
|
+
return additionalContent
|
|
281
|
+
} catch {
|
|
282
|
+
// Still invalid, return original line to avoid data loss
|
|
283
|
+
return line
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
const chunk = additionalBuffer.toString('utf8', 0, bytesRead)
|
|
288
|
+
additionalContent += chunk
|
|
289
|
+
totalAdditionalRead += bytesRead
|
|
290
|
+
|
|
291
|
+
// Check if we found a newline in the entire accumulated content
|
|
292
|
+
const newlineIndex = additionalContent.indexOf('\n', line.length)
|
|
293
|
+
if (newlineIndex !== -1) {
|
|
294
|
+
// Found newline, return content up to and including the newline
|
|
295
|
+
const completeLine = additionalContent.substring(0, newlineIndex + 1)
|
|
296
|
+
|
|
297
|
+
// Validate that the complete line contains valid JSON
|
|
298
|
+
const trimmedComplete = completeLine.trim()
|
|
299
|
+
try {
|
|
300
|
+
JSON.parse(trimmedComplete)
|
|
301
|
+
return completeLine
|
|
302
|
+
} catch {
|
|
303
|
+
// Even with newline, JSON is invalid - this suggests data corruption
|
|
304
|
+
// Return original line to trigger normal error handling
|
|
305
|
+
return line
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
additionalOffset += bytesRead
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
// If we couldn't find a newline within the limit, return the original line
|
|
313
|
+
// This prevents infinite reading and excessive memory usage
|
|
314
|
+
return line
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
/**
|
|
319
|
+
* Split content into complete JSON lines, handling special characters and escaped quotes
|
|
320
|
+
* CRITICAL FIX: Prevents "Expected ',' or ']'" and "Unterminated string" errors by ensuring
|
|
321
|
+
* each line is a complete, valid JSON object/array, even when containing special characters
|
|
322
|
+
* @param {string} content - Raw content containing multiple JSON lines
|
|
323
|
+
* @returns {string[]} Array of complete JSON lines
|
|
324
|
+
*/
|
|
325
|
+
splitJsonLines(content) {
|
|
326
|
+
const lines = []
|
|
327
|
+
let currentLine = ''
|
|
328
|
+
let inString = false
|
|
329
|
+
let escapeNext = false
|
|
330
|
+
let braceCount = 0
|
|
331
|
+
let bracketCount = 0
|
|
332
|
+
|
|
333
|
+
for (let i = 0; i < content.length; i++) {
|
|
334
|
+
const char = content[i]
|
|
335
|
+
const prevChar = i > 0 ? content[i - 1] : null
|
|
336
|
+
|
|
337
|
+
currentLine += char
|
|
338
|
+
|
|
339
|
+
if (escapeNext) {
|
|
340
|
+
escapeNext = false
|
|
341
|
+
continue
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
if (char === '\\') {
|
|
345
|
+
escapeNext = true
|
|
346
|
+
continue
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
if (char === '"' && !escapeNext) {
|
|
350
|
+
inString = !inString
|
|
351
|
+
continue
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
if (!inString) {
|
|
355
|
+
if (char === '{') braceCount++
|
|
356
|
+
else if (char === '}') braceCount--
|
|
357
|
+
else if (char === '[') bracketCount++
|
|
358
|
+
else if (char === ']') bracketCount--
|
|
359
|
+
else if (char === '\n' && braceCount === 0 && bracketCount === 0) {
|
|
360
|
+
// Found complete JSON object/array at newline
|
|
361
|
+
const trimmedLine = currentLine.trim()
|
|
362
|
+
if (trimmedLine.length > 0) {
|
|
363
|
+
lines.push(trimmedLine.replace(/\n$/, '')) // Remove trailing newline
|
|
364
|
+
}
|
|
365
|
+
currentLine = ''
|
|
366
|
+
braceCount = 0
|
|
367
|
+
bracketCount = 0
|
|
368
|
+
inString = false
|
|
369
|
+
escapeNext = false
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
// Add remaining content if it's a complete JSON object/array
|
|
375
|
+
const trimmedLine = currentLine.trim()
|
|
376
|
+
if (trimmedLine.length > 0 && braceCount === 0 && bracketCount === 0) {
|
|
377
|
+
lines.push(trimmedLine)
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
return lines.filter(line => line.trim().length > 0)
|
|
381
|
+
}
|
|
382
|
+
|
|
235
383
|
async *readGroupedRange(groupedRange, fd) {
|
|
236
384
|
if (groupedRange.length === 0) return
|
|
237
|
-
|
|
385
|
+
|
|
238
386
|
// OPTIMIZATION: For single range, use direct approach
|
|
239
387
|
if (groupedRange.length === 1) {
|
|
240
388
|
const range = groupedRange[0]
|
|
241
389
|
const bufferSize = range.end - range.start
|
|
242
|
-
|
|
390
|
+
|
|
243
391
|
if (bufferSize <= 0 || bufferSize > this.maxBufferSize) {
|
|
244
392
|
throw new Error(`Invalid buffer size: ${bufferSize}. Start: ${range.start}, End: ${range.end}. Max allowed: ${this.maxBufferSize}`)
|
|
245
393
|
}
|
|
246
|
-
|
|
394
|
+
|
|
247
395
|
const buffer = Buffer.allocUnsafe(bufferSize)
|
|
248
396
|
const { bytesRead } = await fd.read(buffer, 0, bufferSize, range.start)
|
|
249
397
|
const actualBuffer = bytesRead < bufferSize ? buffer.subarray(0, bytesRead) : buffer
|
|
250
|
-
|
|
398
|
+
|
|
251
399
|
if (actualBuffer.length === 0) return
|
|
252
|
-
|
|
400
|
+
|
|
253
401
|
let lineString
|
|
254
402
|
try {
|
|
255
403
|
lineString = actualBuffer.toString('utf8')
|
|
256
404
|
} catch (error) {
|
|
257
405
|
lineString = actualBuffer.toString('utf8', { replacement: '?' })
|
|
258
406
|
}
|
|
259
|
-
|
|
260
|
-
// CRITICAL FIX:
|
|
261
|
-
//
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
407
|
+
|
|
408
|
+
// CRITICAL FIX: For single ranges, check if JSON appears truncated and try to complete it
|
|
409
|
+
// Only attempt completion if the line doesn't end with newline (indicating possible truncation)
|
|
410
|
+
if (!lineString.endsWith('\n')) {
|
|
411
|
+
const completeLine = await this.ensureCompleteLine(lineString, fd, range.start + actualBuffer.length)
|
|
412
|
+
if (completeLine !== lineString) {
|
|
413
|
+
lineString = completeLine.trimEnd()
|
|
414
|
+
}
|
|
415
|
+
} else {
|
|
416
|
+
lineString = lineString.trimEnd()
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
yield {
|
|
420
|
+
line: lineString,
|
|
266
421
|
start: range.start,
|
|
267
422
|
_: range.index !== undefined ? range.index : (range._ || null)
|
|
268
423
|
}
|
|
@@ -293,15 +448,34 @@ export default class FileHandler {
|
|
|
293
448
|
content = actualBuffer.toString('utf8', { replacement: '?' })
|
|
294
449
|
}
|
|
295
450
|
|
|
451
|
+
// CRITICAL FIX: Validate buffer completeness to prevent UTF-8 corruption
|
|
452
|
+
// When reading non-adjacent ranges, the buffer may be incomplete (last line cut mid-character)
|
|
453
|
+
const lastNewlineIndex = content.lastIndexOf('\n')
|
|
454
|
+
if (lastNewlineIndex === -1 || lastNewlineIndex < content.length - 2) {
|
|
455
|
+
// Buffer may be incomplete - truncate to last complete line
|
|
456
|
+
if (this.opts.debugMode) {
|
|
457
|
+
console.warn(`⚠️ Incomplete buffer detected at offset ${firstRange.start}, truncating to last complete line`)
|
|
458
|
+
}
|
|
459
|
+
if (lastNewlineIndex > 0) {
|
|
460
|
+
content = content.substring(0, lastNewlineIndex + 1)
|
|
461
|
+
} else {
|
|
462
|
+
// No complete lines found - may be a serious issue
|
|
463
|
+
if (this.opts.debugMode) {
|
|
464
|
+
console.warn(`⚠️ No complete lines found in buffer at offset ${firstRange.start}`)
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
|
|
296
469
|
// CRITICAL FIX: Handle ranges more carefully to prevent corruption
|
|
297
470
|
if (groupedRange.length === 2 && groupedRange[0].end === groupedRange[1].start) {
|
|
298
|
-
// Special case: Adjacent ranges - split by
|
|
299
|
-
|
|
300
|
-
|
|
471
|
+
// Special case: Adjacent ranges - split by COMPLETE JSON lines, not just newlines
|
|
472
|
+
// This prevents corruption when lines contain special characters or unescaped quotes
|
|
473
|
+
const lines = this.splitJsonLines(content)
|
|
474
|
+
|
|
301
475
|
for (let i = 0; i < Math.min(lines.length, groupedRange.length); i++) {
|
|
302
476
|
const range = groupedRange[i]
|
|
303
|
-
yield {
|
|
304
|
-
line: lines[i],
|
|
477
|
+
yield {
|
|
478
|
+
line: lines[i],
|
|
305
479
|
start: range.start,
|
|
306
480
|
_: range.index !== undefined ? range.index : (range._ || null)
|
|
307
481
|
}
|
|
@@ -333,6 +507,7 @@ export default class FileHandler {
|
|
|
333
507
|
|
|
334
508
|
// OPTIMIZATION 4: Direct character check instead of regex/trimEnd
|
|
335
509
|
// Remove trailing newlines and whitespace efficiently
|
|
510
|
+
// CRITICAL FIX: Prevents incomplete JSON line reading that caused "Expected ',' or ']'" parsing errors
|
|
336
511
|
// trimEnd() is actually optimized in V8, but we can check if there's anything to trim first
|
|
337
512
|
const len = rangeContent.length
|
|
338
513
|
if (len > 0) {
|
|
@@ -345,9 +520,26 @@ export default class FileHandler {
|
|
|
345
520
|
}
|
|
346
521
|
|
|
347
522
|
if (rangeContent.length === 0) continue
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
523
|
+
|
|
524
|
+
// CRITICAL FIX: For multiple ranges, we cannot safely expand reading
|
|
525
|
+
// because offsets are pre-calculated. Instead, validate JSON and let
|
|
526
|
+
// the deserializer handle incomplete lines (which will trigger recovery)
|
|
527
|
+
const trimmedContent = rangeContent.trim()
|
|
528
|
+
let finalContent = rangeContent
|
|
529
|
+
|
|
530
|
+
if (trimmedContent.length > 0) {
|
|
531
|
+
try {
|
|
532
|
+
JSON.parse(trimmedContent)
|
|
533
|
+
// JSON is valid, use as-is
|
|
534
|
+
} catch (jsonError) {
|
|
535
|
+
// JSON appears incomplete - this is expected for truncated ranges
|
|
536
|
+
// Let the deserializer handle it (will trigger streaming recovery if needed)
|
|
537
|
+
// We don't try to expand reading here because offsets are pre-calculated
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
yield {
|
|
542
|
+
line: finalContent,
|
|
351
543
|
start: range.start,
|
|
352
544
|
_: range.index !== undefined ? range.index : (range._ || null)
|
|
353
545
|
}
|
|
@@ -356,21 +548,27 @@ export default class FileHandler {
|
|
|
356
548
|
}
|
|
357
549
|
|
|
358
550
|
async *walk(ranges) {
|
|
359
|
-
//
|
|
360
|
-
|
|
361
|
-
return // Return empty generator if file doesn't exist
|
|
362
|
-
}
|
|
363
|
-
|
|
364
|
-
const fd = await fs.promises.open(this.file, 'r')
|
|
551
|
+
// CRITICAL FIX: Acquire file mutex to prevent race conditions with concurrent writes
|
|
552
|
+
const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
|
|
365
553
|
try {
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
554
|
+
// Check if file exists before trying to read it
|
|
555
|
+
if (!await this.exists()) {
|
|
556
|
+
return // Return empty generator if file doesn't exist
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
const fd = await fs.promises.open(this.file, 'r')
|
|
560
|
+
try {
|
|
561
|
+
const groupedRanges = await this.groupedRanges(ranges)
|
|
562
|
+
for(const groupedRange of groupedRanges) {
|
|
563
|
+
for await (const row of this.readGroupedRange(groupedRange, fd)) {
|
|
564
|
+
yield row
|
|
565
|
+
}
|
|
370
566
|
}
|
|
567
|
+
} finally {
|
|
568
|
+
await fd.close()
|
|
371
569
|
}
|
|
372
570
|
} finally {
|
|
373
|
-
|
|
571
|
+
release()
|
|
374
572
|
}
|
|
375
573
|
}
|
|
376
574
|
|
|
@@ -504,7 +702,9 @@ export default class FileHandler {
|
|
|
504
702
|
JSON.parse(lines[i]);
|
|
505
703
|
validLines.push(lines[i]);
|
|
506
704
|
} catch (error) {
|
|
507
|
-
|
|
705
|
+
if (this.opts.debugMode) {
|
|
706
|
+
console.warn(`⚠️ Invalid JSON in temp file at line ${i + 1}, skipping:`, lines[i].substring(0, 100));
|
|
707
|
+
}
|
|
508
708
|
hasInvalidJson = true;
|
|
509
709
|
}
|
|
510
710
|
}
|
|
@@ -1094,7 +1294,9 @@ export default class FileHandler {
|
|
|
1094
1294
|
content = buffer.toString('utf8')
|
|
1095
1295
|
} catch (error) {
|
|
1096
1296
|
// If UTF-8 decoding fails, try to recover by finding valid UTF-8 boundaries
|
|
1097
|
-
|
|
1297
|
+
if (this.opts.debugMode) {
|
|
1298
|
+
console.warn(`UTF-8 decoding failed for file ${this.file}, attempting recovery`)
|
|
1299
|
+
}
|
|
1098
1300
|
|
|
1099
1301
|
// Find the last complete UTF-8 character
|
|
1100
1302
|
let validLength = buffer.length
|
package/src/SchemaManager.mjs
CHANGED
|
@@ -163,38 +163,10 @@ export default class SchemaManager {
|
|
|
163
163
|
const obj = {}
|
|
164
164
|
const idIndex = this.schema.indexOf('id')
|
|
165
165
|
|
|
166
|
-
//
|
|
167
|
-
//
|
|
168
|
-
//
|
|
169
|
-
// 1. 'id' is not in current schema
|
|
170
|
-
// 2. Array has significantly more elements than current schema (2+ extra elements)
|
|
171
|
-
// This suggests the old schema had more fields, and 'id' was likely the first
|
|
172
|
-
// 3. First element is a very short string (max 20 chars) that looks like a generated ID
|
|
173
|
-
// (typically alphanumeric, often starting with letters like 'mit...' or similar patterns)
|
|
174
|
-
// 4. First field in current schema is not 'id' (to avoid false positives)
|
|
175
|
-
// 5. First element is not an array (to avoid false positives with array fields)
|
|
166
|
+
// DISABLED: Schema migration detection was causing field mapping corruption
|
|
167
|
+
// The logic was incorrectly assuming ID was in first position when it's appended at the end
|
|
168
|
+
// This caused fields to be shifted incorrectly during object-to-array-to-object conversion
|
|
176
169
|
let arrayOffset = 0
|
|
177
|
-
if (idIndex === -1 && arr.length >= this.schema.length + 2 && this.schema.length > 0) {
|
|
178
|
-
// Only apply if array has at least 2 extra elements (suggests old schema had more fields)
|
|
179
|
-
const firstElement = arr[0]
|
|
180
|
-
const firstFieldName = this.schema[0]
|
|
181
|
-
|
|
182
|
-
// Only apply shift if:
|
|
183
|
-
// - First field is not 'id'
|
|
184
|
-
// - First element is a very short string (max 20 chars) that looks like a generated ID
|
|
185
|
-
// - First element is not an array (to avoid false positives)
|
|
186
|
-
// - Array has at least 2 extra elements (strong indicator of schema migration)
|
|
187
|
-
if (firstFieldName !== 'id' &&
|
|
188
|
-
typeof firstElement === 'string' &&
|
|
189
|
-
!Array.isArray(firstElement) &&
|
|
190
|
-
firstElement.length > 0 &&
|
|
191
|
-
firstElement.length <= 20 && // Very conservative: max 20 chars (typical ID length)
|
|
192
|
-
/^[a-zA-Z0-9_-]+$/.test(firstElement)) {
|
|
193
|
-
// First element is likely the ID from old schema
|
|
194
|
-
obj.id = firstElement
|
|
195
|
-
arrayOffset = 1
|
|
196
|
-
}
|
|
197
|
-
}
|
|
198
170
|
|
|
199
171
|
// Map array values to object properties
|
|
200
172
|
// Only include fields that are in the schema
|
package/src/Serializer.mjs
CHANGED
|
@@ -120,12 +120,15 @@ export default class Serializer {
|
|
|
120
120
|
* Advanced serialization with optimized JSON.stringify and buffer pooling
|
|
121
121
|
*/
|
|
122
122
|
serializeAdvanced(data, addLinebreak) {
|
|
123
|
+
// CRITICAL FIX: Sanitize data to remove problematic characters before serialization
|
|
124
|
+
const sanitizedData = this.sanitizeDataForJSON(data)
|
|
125
|
+
|
|
123
126
|
// Validate encoding before serialization
|
|
124
|
-
this.validateEncodingBeforeSerialization(
|
|
125
|
-
|
|
127
|
+
this.validateEncodingBeforeSerialization(sanitizedData)
|
|
128
|
+
|
|
126
129
|
// Use optimized JSON.stringify without buffer pooling
|
|
127
130
|
// NOTE: Buffer pool removed - using direct Buffer creation for simplicity and reliability
|
|
128
|
-
const json = this.optimizedStringify(
|
|
131
|
+
const json = this.optimizedStringify(sanitizedData)
|
|
129
132
|
|
|
130
133
|
// CRITICAL FIX: Normalize encoding before creating buffer
|
|
131
134
|
const normalizedJson = this.normalizeEncoding(json)
|
|
@@ -239,6 +242,54 @@ export default class Serializer {
|
|
|
239
242
|
/**
|
|
240
243
|
* Validate encoding before serialization
|
|
241
244
|
*/
|
|
245
|
+
/**
|
|
246
|
+
* Sanitize data to remove problematic characters that break JSON parsing
|
|
247
|
+
* CRITICAL FIX: Prevents "Expected ',' or ']'" and "Unterminated string" errors
|
|
248
|
+
* by removing control characters that cannot be safely represented in JSON
|
|
249
|
+
*/
|
|
250
|
+
sanitizeDataForJSON(data) {
|
|
251
|
+
const sanitizeString = (str) => {
|
|
252
|
+
if (typeof str !== 'string') return str
|
|
253
|
+
|
|
254
|
+
return str
|
|
255
|
+
// Remove control characters that break JSON parsing (but keep \n, \r, \t as they can be escaped)
|
|
256
|
+
// Remove: NUL, SOH, STX, ETX, EOT, ENQ, ACK, BEL, VT, FF, SO, SI, DLE, DC1-DC4, NAK, SYN, ETB, CAN, EM, SUB, ESC, FS, GS, RS, US, DEL, C1 controls
|
|
257
|
+
.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]/g, '')
|
|
258
|
+
// Limit string length to prevent performance issues
|
|
259
|
+
.substring(0, 10000)
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
const sanitizeArray = (arr) => {
|
|
263
|
+
if (!Array.isArray(arr)) return arr
|
|
264
|
+
|
|
265
|
+
return arr
|
|
266
|
+
.map(item => this.sanitizeDataForJSON(item))
|
|
267
|
+
.filter(item => item !== null && item !== undefined && item !== '')
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
if (typeof data === 'string') {
|
|
271
|
+
return sanitizeString(data)
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
if (Array.isArray(data)) {
|
|
275
|
+
return sanitizeArray(data)
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
if (data && typeof data === 'object') {
|
|
279
|
+
const sanitized = {}
|
|
280
|
+
for (const [key, value] of Object.entries(data)) {
|
|
281
|
+
const sanitizedValue = this.sanitizeDataForJSON(value)
|
|
282
|
+
// Only include non-null, non-undefined values
|
|
283
|
+
if (sanitizedValue !== null && sanitizedValue !== undefined) {
|
|
284
|
+
sanitized[key] = sanitizedValue
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
return sanitized
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
return data
|
|
291
|
+
}
|
|
292
|
+
|
|
242
293
|
validateEncodingBeforeSerialization(data) {
|
|
243
294
|
const issues = []
|
|
244
295
|
|
|
@@ -347,12 +398,15 @@ export default class Serializer {
|
|
|
347
398
|
* Standard serialization (fallback)
|
|
348
399
|
*/
|
|
349
400
|
serializeStandard(data, addLinebreak) {
|
|
401
|
+
// CRITICAL FIX: Sanitize data to remove problematic characters before serialization
|
|
402
|
+
const sanitizedData = this.sanitizeDataForJSON(data)
|
|
403
|
+
|
|
350
404
|
// Validate encoding before serialization
|
|
351
|
-
this.validateEncodingBeforeSerialization(
|
|
405
|
+
this.validateEncodingBeforeSerialization(sanitizedData)
|
|
352
406
|
|
|
353
407
|
// NOTE: Buffer pool removed - using direct Buffer creation for simplicity and reliability
|
|
354
408
|
// CRITICAL: Normalize encoding for all string fields before stringify
|
|
355
|
-
const normalizedData = this.deepNormalizeEncoding(
|
|
409
|
+
const normalizedData = this.deepNormalizeEncoding(sanitizedData)
|
|
356
410
|
const json = JSON.stringify(normalizedData)
|
|
357
411
|
|
|
358
412
|
// CRITICAL FIX: Normalize encoding before creating buffer
|
|
@@ -575,11 +629,14 @@ export default class Serializer {
|
|
|
575
629
|
* Batch serialization for multiple records
|
|
576
630
|
*/
|
|
577
631
|
serializeBatch(dataArray, opts = {}) {
|
|
632
|
+
// CRITICAL FIX: Sanitize data to remove problematic characters before serialization
|
|
633
|
+
const sanitizedDataArray = dataArray.map(data => this.sanitizeDataForJSON(data))
|
|
634
|
+
|
|
578
635
|
// Validate encoding before serialization
|
|
579
|
-
this.validateEncodingBeforeSerialization(
|
|
580
|
-
|
|
636
|
+
this.validateEncodingBeforeSerialization(sanitizedDataArray)
|
|
637
|
+
|
|
581
638
|
// Convert all objects to array format for optimization
|
|
582
|
-
const convertedData =
|
|
639
|
+
const convertedData = sanitizedDataArray.map(data => this.convertToArrayFormat(data))
|
|
583
640
|
|
|
584
641
|
// Track conversion statistics
|
|
585
642
|
this.serializationStats.arraySerializations += convertedData.filter((item, index) =>
|
|
@@ -1022,11 +1022,11 @@ export default class IndexManager {
|
|
|
1022
1022
|
continue;
|
|
1023
1023
|
}
|
|
1024
1024
|
|
|
1025
|
-
if (typeof criteriaValue === 'object' && !Array.isArray(criteriaValue)) {
|
|
1025
|
+
if (typeof criteriaValue === 'object' && !Array.isArray(criteriaValue) && criteriaValue !== null) {
|
|
1026
1026
|
const fieldIndex = data[field];
|
|
1027
|
-
|
|
1027
|
+
|
|
1028
1028
|
// Handle $in operator for array queries
|
|
1029
|
-
if (criteriaValue.$in !== undefined) {
|
|
1029
|
+
if (criteriaValue.$in !== undefined && criteriaValue.$in !== null) {
|
|
1030
1030
|
const inValues = Array.isArray(criteriaValue.$in) ? criteriaValue.$in : [criteriaValue.$in];
|
|
1031
1031
|
|
|
1032
1032
|
// PERFORMANCE: Cache term mapping field check once
|
|
@@ -1969,6 +1969,14 @@ export default class IndexManager {
|
|
|
1969
1969
|
return
|
|
1970
1970
|
}
|
|
1971
1971
|
|
|
1972
|
+
// Restore totalLines from saved data
|
|
1973
|
+
if (index.totalLines !== undefined) {
|
|
1974
|
+
this.totalLines = index.totalLines
|
|
1975
|
+
if (this.opts.debugMode) {
|
|
1976
|
+
console.log(`🔍 IndexManager.load: Restored totalLines=${this.totalLines}`)
|
|
1977
|
+
}
|
|
1978
|
+
}
|
|
1979
|
+
|
|
1972
1980
|
this.index = processedIndex
|
|
1973
1981
|
}
|
|
1974
1982
|
|
|
@@ -2008,7 +2016,10 @@ export default class IndexManager {
|
|
|
2008
2016
|
* This resolves the issue where Sets appear as empty objects in JSON.stringify
|
|
2009
2017
|
*/
|
|
2010
2018
|
toJSON() {
|
|
2011
|
-
const serializable = {
|
|
2019
|
+
const serializable = {
|
|
2020
|
+
data: {},
|
|
2021
|
+
totalLines: this.totalLines
|
|
2022
|
+
}
|
|
2012
2023
|
|
|
2013
2024
|
// Check if this is a term mapping field for conversion
|
|
2014
2025
|
const isTermMappingField = (field) => {
|
|
@@ -1215,7 +1215,7 @@ export class QueryManager {
|
|
|
1215
1215
|
return false;
|
|
1216
1216
|
}
|
|
1217
1217
|
|
|
1218
|
-
if (typeof condition === 'object' && !Array.isArray(condition)) {
|
|
1218
|
+
if (typeof condition === 'object' && !Array.isArray(condition) && condition !== null) {
|
|
1219
1219
|
const operators = Object.keys(condition).map(op => normalizeOperator(op));
|
|
1220
1220
|
if (this.opts.debugMode) {
|
|
1221
1221
|
console.log(`🔍 Field '${field}' has operators:`, operators)
|
|
@@ -1532,8 +1532,8 @@ export class QueryManager {
|
|
|
1532
1532
|
|
|
1533
1533
|
if (termMappingFields.includes(field)) {
|
|
1534
1534
|
// For term mapping fields, simple equality or $in queries work well
|
|
1535
|
-
if (typeof condition === 'string' ||
|
|
1536
|
-
(typeof condition === 'object' && condition.$in && Array.isArray(condition.$in))) {
|
|
1535
|
+
if (typeof condition === 'string' ||
|
|
1536
|
+
(typeof condition === 'object' && condition !== null && condition.$in && Array.isArray(condition.$in))) {
|
|
1537
1537
|
return true;
|
|
1538
1538
|
}
|
|
1539
1539
|
}
|