jexidb 2.1.8 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1439 +1,1439 @@
1
- import fs from 'fs'
2
- import path from 'path'
3
- import readline from 'readline'
4
- import pLimit from 'p-limit'
5
- import pRetry from 'p-retry'
6
-
7
- export default class FileHandler {
8
- constructor(file, fileMutex = null, opts = {}) {
9
- this.file = file
10
- this.indexFile = file ? file.replace(/\.jdb$/, '.idx.jdb') : null
11
- this.fileMutex = fileMutex
12
- this.opts = opts
13
- this.maxBufferSize = opts.maxBufferSize || 4 * 1024 * 1024 // 4MB default
14
- // Global I/O limiter to prevent file descriptor exhaustion in concurrent operations
15
- this.readLimiter = pLimit(opts.maxConcurrentReads || 4)
16
- }
17
-
18
- _getIoTimeoutMs(override) {
19
- if (typeof override === 'number') return override
20
- if (typeof this.opts.ioTimeoutMs === 'number') return this.opts.ioTimeoutMs
21
- return 0
22
- }
23
-
24
- async _withIoTimeout(fn, timeoutMs, onTimeout) {
25
- if (!timeoutMs || timeoutMs <= 0) {
26
- return fn()
27
- }
28
- let timeoutId
29
- const timeoutPromise = new Promise((_, reject) => {
30
- timeoutId = setTimeout(() => {
31
- if (onTimeout) {
32
- try { onTimeout() } catch {}
33
- }
34
- const err = new Error(`I/O timeout after ${timeoutMs}ms`)
35
- err.code = 'ETIMEDOUT'
36
- reject(err)
37
- }, timeoutMs)
38
- })
39
- try {
40
- return await Promise.race([fn(), timeoutPromise])
41
- } finally {
42
- if (timeoutId) clearTimeout(timeoutId)
43
- }
44
- }
45
-
46
- async _readWithTimeout(fd, buffer, offset, length, position, timeoutMs) {
47
- return this._withIoTimeout(
48
- () => fd.read(buffer, offset, length, position),
49
- timeoutMs,
50
- () => fd.close().catch(() => {})
51
- )
52
- }
53
-
54
- async truncate(offset) {
55
- try {
56
- await fs.promises.access(this.file, fs.constants.F_OK)
57
- await fs.promises.truncate(this.file, offset)
58
- } catch (err) {
59
- await fs.promises.writeFile(this.file, '')
60
- }
61
- }
62
-
63
- async writeOffsets(data) {
64
- // Write offsets to the index file (will be combined with index data)
65
- await fs.promises.writeFile(this.indexFile, data)
66
- }
67
-
68
- async readOffsets() {
69
- try {
70
- return await fs.promises.readFile(this.indexFile)
71
- } catch (err) {
72
- return null
73
- }
74
- }
75
-
76
- async writeIndex(data) {
77
- // Write index data to the index file (will be combined with offsets)
78
- // Use Windows-specific retry logic for file operations
79
- await this._writeFileWithRetry(this.indexFile, data)
80
- }
81
-
82
- async readIndex() {
83
- try {
84
- return await fs.promises.readFile(this.indexFile)
85
- } catch (err) {
86
- return null
87
- }
88
- }
89
-
90
- async exists() {
91
- try {
92
- await fs.promises.access(this.file, fs.constants.F_OK)
93
- return true
94
- } catch (err) {
95
- return false
96
- }
97
- }
98
-
99
-
100
- async indexExists() {
101
- try {
102
- await fs.promises.access(this.indexFile, fs.constants.F_OK)
103
- return true
104
- } catch (err) {
105
- return false
106
- }
107
- }
108
-
109
- async isLegacyFormat() {
110
- if (!await this.exists()) return false
111
- if (await this.indexExists()) return false
112
-
113
- // Check if main file contains offsets at the end (legacy format)
114
- try {
115
- const lastLine = await this.readLastLine()
116
- if (!lastLine || !lastLine.length) return false
117
-
118
- // Try to parse as offsets array
119
- const content = lastLine.toString('utf-8').trim()
120
- const parsed = JSON.parse(content)
121
- return Array.isArray(parsed)
122
- } catch (err) {
123
- return false
124
- }
125
- }
126
-
127
- async migrateLegacyFormat(serializer) {
128
- if (!await this.isLegacyFormat()) return false
129
-
130
- console.log('Migrating from legacy format to new 3-file format...')
131
-
132
- // Read the legacy file
133
- const lastLine = await this.readLastLine()
134
- const offsets = JSON.parse(lastLine.toString('utf-8').trim())
135
-
136
- // Get index offset and truncate offsets array
137
- const indexOffset = offsets[offsets.length - 2]
138
- const dataOffsets = offsets.slice(0, -2)
139
-
140
- // Read index data
141
- const indexStart = indexOffset
142
- const indexEnd = offsets[offsets.length - 1]
143
- const indexBuffer = await this.readRange(indexStart, indexEnd)
144
- const indexData = await serializer.deserialize(indexBuffer)
145
-
146
- // Write offsets to separate file
147
- const offsetsString = await serializer.serialize(dataOffsets, { linebreak: false })
148
- await this.writeOffsets(offsetsString)
149
-
150
- // Write index to separate file
151
- const indexString = await serializer.serialize(indexData, { linebreak: false })
152
- await this.writeIndex(indexString)
153
-
154
- // Truncate main file to remove index and offsets
155
- await this.truncate(indexOffset)
156
-
157
- console.log('Migration completed successfully!')
158
- return true
159
- }
160
-
161
- async readRange(start, end) {
162
- // Check if file exists before trying to read it
163
- if (!await this.exists()) {
164
- return Buffer.alloc(0) // Return empty buffer if file doesn't exist
165
- }
166
-
167
- const timeoutMs = this._getIoTimeoutMs()
168
- let fd = await fs.promises.open(this.file, 'r')
169
- try {
170
- // CRITICAL FIX: Check file size before attempting to read
171
- const stats = await fd.stat()
172
- const fileSize = stats.size
173
-
174
- // If start position is beyond file size, return empty buffer
175
- if (start >= fileSize) {
176
- await fd.close()
177
- return Buffer.alloc(0)
178
- }
179
-
180
- // Adjust end position if it's beyond file size
181
- const actualEnd = Math.min(end, fileSize)
182
- const length = actualEnd - start
183
-
184
- // If length is 0 or negative, return empty buffer
185
- if (length <= 0) {
186
- await fd.close()
187
- return Buffer.alloc(0)
188
- }
189
-
190
- let buffer = Buffer.alloc(length)
191
- const { bytesRead } = await this._readWithTimeout(fd, buffer, 0, length, start, timeoutMs)
192
- await fd.close()
193
-
194
- // CRITICAL FIX: Ensure we read the expected amount of data
195
- if (bytesRead !== length) {
196
- const errorMsg = `CRITICAL: Expected to read ${length} bytes, but read ${bytesRead} bytes at position ${start}`
197
- console.error(`⚠️ ${errorMsg}`)
198
-
199
- // This indicates a race condition or file corruption
200
- // Don't retry - the caller should handle synchronization properly
201
- if (bytesRead === 0) {
202
- throw new Error(`File corruption detected: ${errorMsg}`)
203
- }
204
-
205
- // Return partial data with warning - caller should handle this
206
- return buffer.subarray(0, bytesRead)
207
- }
208
-
209
- return buffer
210
- } catch (error) {
211
- await fd.close().catch(() => {})
212
- throw error
213
- }
214
- }
215
-
216
- async readRanges(ranges, mapper) {
217
- const lines = {}
218
-
219
- // Check if file exists before trying to read it
220
- if (!await this.exists()) {
221
- return lines // Return empty object if file doesn't exist
222
- }
223
-
224
- const fd = await fs.promises.open(this.file, 'r')
225
- const groupedRanges = await this.groupedRanges(ranges)
226
- try {
227
- await Promise.allSettled(groupedRanges.map(async (groupedRange) => {
228
- await this.readLimiter(async () => {
229
- for await (const row of this.readGroupedRange(groupedRange, fd)) {
230
- lines[row.start] = mapper ? (await mapper(row.line, { start: row.start, end: row.start + row.line.length })) : row.line
231
- }
232
- })
233
- }))
234
- } catch (e) {
235
- console.error('Error reading ranges:', e)
236
- } finally {
237
- await fd.close()
238
- }
239
- return lines
240
- }
241
-
242
- async groupedRanges(ranges) { // expects ordered ranges from Database.getRanges()
243
- const readSize = 512 * 1024 // 512KB
244
- const groupedRanges = []
245
- let currentGroup = []
246
- let currentSize = 0
247
-
248
- // each range is a {start: number, end: number} object
249
- for (let i = 0; i < ranges.length; i++) {
250
- const range = ranges[i]
251
- const rangeSize = range.end - range.start
252
-
253
- if (currentGroup.length > 0) {
254
- const lastRange = currentGroup[currentGroup.length - 1]
255
- if (lastRange.end !== range.start || currentSize + rangeSize > readSize) {
256
- groupedRanges.push(currentGroup)
257
- currentGroup = []
258
- currentSize = 0
259
- }
260
- }
261
-
262
- currentGroup.push(range)
263
- currentSize += rangeSize
264
- }
265
-
266
- if (currentGroup.length > 0) {
267
- groupedRanges.push(currentGroup)
268
- }
269
-
270
- return groupedRanges
271
- }
272
-
273
- /**
274
- * Ensure a line is complete by reading until newline if JSON appears truncated
275
- * @param {string} line - The potentially incomplete line
276
- * @param {number} fd - File descriptor
277
- * @param {number} currentOffset - Current read offset
278
- * @returns {string} Complete line
279
- */
280
- async ensureCompleteLine(line, fd, currentOffset) {
281
- // Fast check: if line already ends with newline, it's likely complete
282
- if (line.endsWith('\n')) {
283
- return line
284
- }
285
-
286
- // Check if the line contains valid JSON by trying to parse it
287
- const trimmedLine = line.trim()
288
- if (trimmedLine.length === 0) {
289
- return line
290
- }
291
-
292
- // Try to parse as JSON to see if it's complete
293
- try {
294
- JSON.parse(trimmedLine)
295
- // If parsing succeeds, the line is complete (but missing newline)
296
- // This is unusual but possible, return as-is
297
- return line
298
- } catch (jsonError) {
299
- // JSON is incomplete, try to read more until we find a newline
300
- const bufferSize = 2048 // Read in 2KB chunks for better performance
301
- const additionalBuffer = Buffer.allocUnsafe(bufferSize)
302
- let additionalOffset = currentOffset
303
- let additionalContent = line
304
-
305
- // Try reading up to 20KB more to find the newline (increased for safety)
306
- const maxAdditionalRead = 20480
307
- let totalAdditionalRead = 0
308
-
309
- while (totalAdditionalRead < maxAdditionalRead) {
310
- const { bytesRead } = await fd.read(additionalBuffer, 0, bufferSize, additionalOffset)
311
-
312
- if (bytesRead === 0) {
313
- // EOF reached, check if the accumulated content is now valid JSON
314
- const finalTrimmed = additionalContent.trim()
315
- try {
316
- JSON.parse(finalTrimmed)
317
- // If parsing succeeds now, return the content
318
- return additionalContent
319
- } catch {
320
- // Still invalid, return original line to avoid data loss
321
- return line
322
- }
323
- }
324
-
325
- const chunk = additionalBuffer.toString('utf8', 0, bytesRead)
326
- additionalContent += chunk
327
- totalAdditionalRead += bytesRead
328
-
329
- // Check if we found a newline in the entire accumulated content
330
- const newlineIndex = additionalContent.indexOf('\n', line.length)
331
- if (newlineIndex !== -1) {
332
- // Found newline, return content up to and including the newline
333
- const completeLine = additionalContent.substring(0, newlineIndex + 1)
334
-
335
- // Validate that the complete line contains valid JSON
336
- const trimmedComplete = completeLine.trim()
337
- try {
338
- JSON.parse(trimmedComplete)
339
- return completeLine
340
- } catch {
341
- // Even with newline, JSON is invalid - this suggests data corruption
342
- // Return original line to trigger normal error handling
343
- return line
344
- }
345
- }
346
-
347
- additionalOffset += bytesRead
348
- }
349
-
350
- // If we couldn't find a newline within the limit, return the original line
351
- // This prevents infinite reading and excessive memory usage
352
- return line
353
- }
354
- }
355
-
356
- /**
357
- * Split content into complete JSON lines, handling special characters and escaped quotes
358
- * CRITICAL FIX: Prevents "Expected ',' or ']'" and "Unterminated string" errors by ensuring
359
- * each line is a complete, valid JSON object/array, even when containing special characters
360
- * @param {string} content - Raw content containing multiple JSON lines
361
- * @returns {string[]} Array of complete JSON lines
362
- */
363
- splitJsonLines(content) {
364
- const lines = []
365
- let currentLine = ''
366
- let inString = false
367
- let escapeNext = false
368
- let braceCount = 0
369
- let bracketCount = 0
370
-
371
- for (let i = 0; i < content.length; i++) {
372
- const char = content[i]
373
- const prevChar = i > 0 ? content[i - 1] : null
374
-
375
- currentLine += char
376
-
377
- if (escapeNext) {
378
- escapeNext = false
379
- continue
380
- }
381
-
382
- if (char === '\\') {
383
- escapeNext = true
384
- continue
385
- }
386
-
387
- if (char === '"' && !escapeNext) {
388
- inString = !inString
389
- continue
390
- }
391
-
392
- if (!inString) {
393
- if (char === '{') braceCount++
394
- else if (char === '}') braceCount--
395
- else if (char === '[') bracketCount++
396
- else if (char === ']') bracketCount--
397
- else if (char === '\n' && braceCount === 0 && bracketCount === 0) {
398
- // Found complete JSON object/array at newline
399
- const trimmedLine = currentLine.trim()
400
- if (trimmedLine.length > 0) {
401
- lines.push(trimmedLine.replace(/\n$/, '')) // Remove trailing newline
402
- }
403
- currentLine = ''
404
- braceCount = 0
405
- bracketCount = 0
406
- inString = false
407
- escapeNext = false
408
- }
409
- }
410
- }
411
-
412
- // Add remaining content if it's a complete JSON object/array
413
- const trimmedLine = currentLine.trim()
414
- if (trimmedLine.length > 0 && braceCount === 0 && bracketCount === 0) {
415
- lines.push(trimmedLine)
416
- }
417
-
418
- return lines.filter(line => line.trim().length > 0)
419
- }
420
-
421
- async *readGroupedRange(groupedRange, fd) {
422
- if (groupedRange.length === 0) return
423
-
424
- // OPTIMIZATION: For single range, use direct approach
425
- if (groupedRange.length === 1) {
426
- const range = groupedRange[0]
427
- const bufferSize = range.end - range.start
428
-
429
- if (bufferSize <= 0 || bufferSize > this.maxBufferSize) {
430
- throw new Error(`Invalid buffer size: ${bufferSize}. Start: ${range.start}, End: ${range.end}. Max allowed: ${this.maxBufferSize}`)
431
- }
432
-
433
- const buffer = Buffer.allocUnsafe(bufferSize)
434
- const { bytesRead } = await fd.read(buffer, 0, bufferSize, range.start)
435
- const actualBuffer = bytesRead < bufferSize ? buffer.subarray(0, bytesRead) : buffer
436
-
437
- if (actualBuffer.length === 0) return
438
-
439
- let lineString
440
- try {
441
- lineString = actualBuffer.toString('utf8')
442
- } catch (error) {
443
- lineString = actualBuffer.toString('utf8', { replacement: '?' })
444
- }
445
-
446
- // CRITICAL FIX: For single ranges, check if JSON appears truncated and try to complete it
447
- // Only attempt completion if the line doesn't end with newline (indicating possible truncation)
448
- if (!lineString.endsWith('\n')) {
449
- const completeLine = await this.ensureCompleteLine(lineString, fd, range.start + actualBuffer.length)
450
- if (completeLine !== lineString) {
451
- lineString = completeLine.trimEnd()
452
- }
453
- } else {
454
- lineString = lineString.trimEnd()
455
- }
456
-
457
- yield {
458
- line: lineString,
459
- start: range.start,
460
- _: range.index !== undefined ? range.index : (range._ || null)
461
- }
462
- return
463
- }
464
-
465
- // OPTIMIZATION: For multiple ranges, read as single buffer and split by offsets
466
- const firstRange = groupedRange[0]
467
- const lastRange = groupedRange[groupedRange.length - 1]
468
- const totalSize = lastRange.end - firstRange.start
469
-
470
- if (totalSize <= 0 || totalSize > this.maxBufferSize) {
471
- throw new Error(`Invalid total buffer size: ${totalSize}. Start: ${firstRange.start}, End: ${lastRange.end}. Max allowed: ${this.maxBufferSize}`)
472
- }
473
-
474
- // Read entire grouped range as single buffer
475
- const buffer = Buffer.allocUnsafe(totalSize)
476
- const { bytesRead } = await fd.read(buffer, 0, totalSize, firstRange.start)
477
- const actualBuffer = bytesRead < totalSize ? buffer.subarray(0, bytesRead) : buffer
478
-
479
- if (actualBuffer.length === 0) return
480
-
481
- // Convert to string once
482
- let content
483
- try {
484
- content = actualBuffer.toString('utf8')
485
- } catch (error) {
486
- content = actualBuffer.toString('utf8', { replacement: '?' })
487
- }
488
-
489
- // CRITICAL FIX: Validate buffer completeness to prevent UTF-8 corruption
490
- // When reading non-adjacent ranges, the buffer may be incomplete (last line cut mid-character)
491
- const lastNewlineIndex = content.lastIndexOf('\n')
492
- if (lastNewlineIndex === -1 || lastNewlineIndex < content.length - 2) {
493
- // Buffer may be incomplete - truncate to last complete line
494
- if (this.opts.debugMode) {
495
- console.warn(`⚠️ Incomplete buffer detected at offset ${firstRange.start}, truncating to last complete line`)
496
- }
497
- if (lastNewlineIndex > 0) {
498
- content = content.substring(0, lastNewlineIndex + 1)
499
- } else {
500
- // No complete lines found - may be a serious issue
501
- if (this.opts.debugMode) {
502
- console.warn(`⚠️ No complete lines found in buffer at offset ${firstRange.start}`)
503
- }
504
- }
505
- }
506
-
507
- // CRITICAL FIX: Handle ranges more carefully to prevent corruption
508
- if (groupedRange.length === 2 && groupedRange[0].end === groupedRange[1].start) {
509
- // Special case: Adjacent ranges - split by COMPLETE JSON lines, not just newlines
510
- // This prevents corruption when lines contain special characters or unescaped quotes
511
- const lines = this.splitJsonLines(content)
512
-
513
- for (let i = 0; i < Math.min(lines.length, groupedRange.length); i++) {
514
- const range = groupedRange[i]
515
- yield {
516
- line: lines[i],
517
- start: range.start,
518
- _: range.index !== undefined ? range.index : (range._ || null)
519
- }
520
- }
521
- } else {
522
- // CRITICAL FIX: For non-adjacent ranges, use the range.end directly
523
- // because range.end already excludes the newline (calculated as offsets[n+1] - 1)
524
- // We just need to find the line start (beginning of the line in the buffer)
525
- for (let i = 0; i < groupedRange.length; i++) {
526
- const range = groupedRange[i]
527
- const relativeStart = range.start - firstRange.start
528
- const relativeEnd = range.end - firstRange.start
529
-
530
- // OPTIMIZATION 2: Find line start only if necessary
531
- // Check if we're already at a line boundary to avoid unnecessary backwards search
532
- let lineStart = relativeStart
533
- if (relativeStart > 0 && content[relativeStart - 1] !== '\n') {
534
- // Only search backwards if we're not already at a line boundary
535
- while (lineStart > 0 && content[lineStart - 1] !== '\n') {
536
- lineStart--
537
- }
538
- }
539
-
540
- // OPTIMIZATION 3: Use slice() instead of substring() for better performance
541
- // CRITICAL FIX: range.end = offsets[n+1] - 1 points to the newline character
542
- // slice(start, end) includes characters from start to end-1 (end is exclusive)
543
- // So if relativeEnd points to the newline, slice will include it
544
- let rangeContent = content.slice(lineStart, relativeEnd)
545
-
546
- // OPTIMIZATION 4: Direct character check instead of regex/trimEnd
547
- // Remove trailing newlines and whitespace efficiently
548
- // CRITICAL FIX: Prevents incomplete JSON line reading that caused "Expected ',' or ']'" parsing errors
549
- // trimEnd() is actually optimized in V8, but we can check if there's anything to trim first
550
- const len = rangeContent.length
551
- if (len > 0) {
552
- // Quick check: if last char is not whitespace, skip trimEnd
553
- const lastChar = rangeContent[len - 1]
554
- if (lastChar === '\n' || lastChar === '\r' || lastChar === ' ' || lastChar === '\t') {
555
- // Only call trimEnd if we detected trailing whitespace
556
- rangeContent = rangeContent.trimEnd()
557
- }
558
- }
559
-
560
- if (rangeContent.length === 0) continue
561
-
562
- // CRITICAL FIX: For multiple ranges, we cannot safely expand reading
563
- // because offsets are pre-calculated. Instead, validate JSON and let
564
- // the deserializer handle incomplete lines (which will trigger recovery)
565
- const trimmedContent = rangeContent.trim()
566
- let finalContent = rangeContent
567
-
568
- if (trimmedContent.length > 0) {
569
- try {
570
- JSON.parse(trimmedContent)
571
- // JSON is valid, use as-is
572
- } catch (jsonError) {
573
- // JSON appears incomplete - this is expected for truncated ranges
574
- // Let the deserializer handle it (will trigger streaming recovery if needed)
575
- // We don't try to expand reading here because offsets are pre-calculated
576
- }
577
- }
578
-
579
- yield {
580
- line: finalContent,
581
- start: range.start,
582
- _: range.index !== undefined ? range.index : (range._ || null)
583
- }
584
- }
585
- }
586
- }
587
-
588
- async *walk(ranges) {
589
- // CRITICAL FIX: Acquire file mutex to prevent race conditions with concurrent writes
590
- const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
591
- try {
592
- // Check if file exists before trying to read it
593
- if (!await this.exists()) {
594
- return // Return empty generator if file doesn't exist
595
- }
596
-
597
- const fd = await fs.promises.open(this.file, 'r')
598
- try {
599
- const groupedRanges = await this.groupedRanges(ranges)
600
- for(const groupedRange of groupedRanges) {
601
- for await (const row of this.readGroupedRange(groupedRange, fd)) {
602
- yield row
603
- }
604
- }
605
- } finally {
606
- await fd.close()
607
- }
608
- } finally {
609
- release()
610
- }
611
- }
612
-
613
- async replaceLines(ranges, lines) {
614
- // CRITICAL: Always use file mutex to prevent concurrent file operations
615
- if (this.fileMutex) {
616
- return this.fileMutex.runExclusive(async () => {
617
- // Add a small delay to ensure any pending operations complete
618
- await new Promise(resolve => setTimeout(resolve, 10));
619
- return this._replaceLinesInternal(ranges, lines);
620
- });
621
- } else {
622
- return this._replaceLinesInternal(ranges, lines);
623
- }
624
- }
625
-
626
- async _replaceLinesInternal(ranges, lines) {
627
- const tmpFile = this.file + '.tmp';
628
- let writer, reader;
629
-
630
- try {
631
- writer = await fs.promises.open(tmpFile, 'w+');
632
-
633
- // Check if the main file exists before trying to read it
634
- if (await this.exists()) {
635
- reader = await fs.promises.open(this.file, 'r');
636
- } else {
637
- // If file doesn't exist, we'll just write the new lines
638
- reader = null;
639
- }
640
-
641
- // Sort ranges by start position to ensure correct order
642
- const sortedRanges = [...ranges].sort((a, b) => a.start - b.start);
643
-
644
- let position = 0;
645
- let lineIndex = 0;
646
-
647
- for (const range of sortedRanges) {
648
- // Write existing content before the range (only if file exists)
649
- if (reader && position < range.start) {
650
- const buffer = await this.readRange(position, range.start);
651
- await writer.write(buffer);
652
- }
653
-
654
- // Write new line if provided, otherwise skip the range (for delete operations)
655
- if (lineIndex < lines.length && lines[lineIndex]) {
656
- const line = lines[lineIndex];
657
- // Ensure line ends with newline
658
- let formattedBuffer;
659
- if (Buffer.isBuffer(line)) {
660
- const needsNewline = line.length === 0 || line[line.length - 1] !== 0x0A;
661
- formattedBuffer = needsNewline ? Buffer.concat([line, Buffer.from('\n')]) : line;
662
- } else {
663
- const withNewline = line.endsWith('\n') ? line : line + '\n';
664
- formattedBuffer = Buffer.from(withNewline, 'utf8');
665
- }
666
- await writer.write(formattedBuffer);
667
- }
668
-
669
- // Update position to range.end to avoid overlapping writes
670
- position = range.end;
671
- lineIndex++;
672
- }
673
-
674
- // Write remaining content after the last range (only if file exists)
675
- if (reader) {
676
- const { size } = await reader.stat();
677
- if (position < size) {
678
- const buffer = await this.readRange(position, size);
679
- await writer.write(buffer);
680
- }
681
- }
682
-
683
- // Ensure all data is written to disk
684
- await writer.sync();
685
- if (reader) await reader.close();
686
- await writer.close();
687
-
688
- // Validate the temp file before renaming
689
- await this._validateTempFile(tmpFile);
690
-
691
- // CRITICAL: Retry logic for Windows EPERM errors
692
- await this._safeRename(tmpFile, this.file);
693
-
694
- } catch (e) {
695
- console.error('Erro ao substituir linhas:', e);
696
- throw e;
697
- } finally {
698
- if (reader) await reader.close().catch(() => { });
699
- if (writer) await writer.close().catch(() => { });
700
- await fs.promises.unlink(tmpFile).catch(() => { });
701
- }
702
- }
703
-
704
- async _safeRename(tmpFile, targetFile, maxRetries = 3) {
705
- for (let attempt = 1; attempt <= maxRetries; attempt++) {
706
- try {
707
- await fs.promises.rename(tmpFile, targetFile);
708
- return; // Success
709
- } catch (error) {
710
- if (error.code === 'EPERM' && attempt < maxRetries) {
711
- // Quick delay: 50ms, 100ms, 200ms
712
- const delay = 50 * attempt;
713
- console.log(`🔄 EPERM retry ${attempt}/${maxRetries}, waiting ${delay}ms...`);
714
- await new Promise(resolve => setTimeout(resolve, delay));
715
- continue;
716
- }
717
-
718
- // If all retries failed, try Windows fallback approach
719
- if (error.code === 'EPERM' && attempt === maxRetries) {
720
- console.log(`⚠️ All EPERM retries failed, trying Windows fallback...`);
721
- return this._windowsFallbackRename(tmpFile, targetFile);
722
- }
723
-
724
- throw error; // Re-throw if not EPERM or max retries reached
725
- }
726
- }
727
- }
728
-
729
- async _validateTempFile(tmpFile) {
730
- try {
731
- // Read the temp file and validate JSON structure
732
- const content = await fs.promises.readFile(tmpFile, 'utf8');
733
- const lines = content.split('\n').filter(line => line.trim());
734
-
735
- let hasInvalidJson = false;
736
- const validLines = [];
737
-
738
- for (let i = 0; i < lines.length; i++) {
739
- try {
740
- JSON.parse(lines[i]);
741
- validLines.push(lines[i]);
742
- } catch (error) {
743
- if (this.opts.debugMode) {
744
- console.warn(`⚠️ Invalid JSON in temp file at line ${i + 1}, skipping:`, lines[i].substring(0, 100));
745
- }
746
- hasInvalidJson = true;
747
- }
748
- }
749
-
750
- // If we found invalid JSON, rewrite the file with only valid lines
751
- if (hasInvalidJson && validLines.length > 0) {
752
- console.log(`🔧 Rewriting temp file with ${validLines.length} valid lines`);
753
- const correctedContent = validLines.join('\n') + '\n';
754
- await fs.promises.writeFile(tmpFile, correctedContent, 'utf8');
755
- }
756
-
757
- console.log(`✅ Temp file validation passed: ${validLines.length} valid JSON lines`);
758
- } catch (error) {
759
- console.error(`❌ Temp file validation failed:`, error.message);
760
- throw error;
761
- }
762
- }
763
-
764
- async _windowsFallbackRename(tmpFile, targetFile) {
765
- try {
766
- // Windows fallback: copy content instead of rename
767
- console.log(`🔄 Using Windows fallback: copy + delete approach`);
768
-
769
- // Validate temp file before copying
770
- await this._validateTempFile(tmpFile);
771
-
772
- // Read the temp file content
773
- const content = await fs.promises.readFile(tmpFile, 'utf8');
774
-
775
- // Write to target file directly
776
- await fs.promises.writeFile(targetFile, content, 'utf8');
777
-
778
- // Delete temp file
779
- await fs.promises.unlink(tmpFile);
780
-
781
- console.log(`✅ Windows fallback successful`);
782
- return;
783
- } catch (fallbackError) {
784
- console.error(`❌ Windows fallback also failed:`, fallbackError);
785
- throw fallbackError;
786
- }
787
- }
788
-
789
- async writeData(data, immediate, fd) {
790
- await fd.write(data)
791
- }
792
-
793
- async writeDataAsync(data) {
794
- // CRITICAL FIX: Ensure directory exists before writing
795
- const dir = path.dirname(this.file)
796
- await fs.promises.mkdir(dir, { recursive: true })
797
-
798
- await fs.promises.appendFile(this.file, data)
799
- }
800
-
801
- /**
802
- * Check if data appears to be binary (always false since we only use JSON now)
803
- */
804
- isBinaryData(data) {
805
- // All data is now JSON format
806
- return false
807
- }
808
-
809
- /**
810
- * Check if file is binary (always false since we only use JSON now)
811
- */
812
- async isBinaryFile() {
813
- // All files are now JSON format
814
- return false
815
- }
816
-
817
- async readLastLine() {
818
- // Use global read limiter to prevent file descriptor exhaustion
819
- return this.readLimiter(async () => {
820
- // Check if file exists before trying to read it
821
- if (!await this.exists()) {
822
- return null // Return null if file doesn't exist
823
- }
824
-
825
- const reader = await fs.promises.open(this.file, 'r')
826
- try {
827
- const { size } = await reader.stat()
828
- if (size < 1) throw 'empty file'
829
- this.size = size
830
- const bufferSize = 16384
831
- let buffer, isFirstRead = true, lastReadSize, readPosition = Math.max(size - bufferSize, 0)
832
- while (readPosition >= 0) {
833
- const readSize = Math.min(bufferSize, size - readPosition)
834
- if (readSize !== lastReadSize) {
835
- lastReadSize = readSize
836
- buffer = Buffer.alloc(readSize)
837
- }
838
- const { bytesRead } = await reader.read(buffer, 0, isFirstRead ? (readSize - 1) : readSize, readPosition)
839
- if (isFirstRead) isFirstRead = false
840
- if (bytesRead === 0) break
841
- const newlineIndex = buffer.lastIndexOf(10)
842
- const start = readPosition + newlineIndex + 1
843
- if (newlineIndex !== -1) {
844
- const lastLine = Buffer.alloc(size - start)
845
- await reader.read(lastLine, 0, size - start, start)
846
- if (!lastLine || !lastLine.length) {
847
- throw 'no metadata or empty file'
848
- }
849
- return lastLine
850
- } else {
851
- readPosition -= bufferSize
852
- }
853
- }
854
- } catch (e) {
855
- String(e).includes('empty file') || console.error('Error reading last line:', e)
856
- } finally {
857
- reader.close()
858
- }
859
- })
860
- }
861
-
862
- /**
863
- * Read records with streaming using readline
864
- * @param {Object} criteria - Filter criteria
865
- * @param {Object} options - Options (limit, skip)
866
- * @param {Function} matchesCriteria - Function to check if record matches criteria
867
- * @returns {Promise<Array>} - Array of records
868
- */
869
- async readWithStreaming(criteria, options = {}, matchesCriteria, serializer = null) {
870
- // CRITICAL: Always use file mutex to prevent concurrent file operations
871
- if (this.fileMutex) {
872
- return this.fileMutex.runExclusive(async () => {
873
- // Add a small delay to ensure any pending operations complete
874
- await new Promise(resolve => setTimeout(resolve, 5));
875
- // Use global read limiter to prevent file descriptor exhaustion
876
- return this.readLimiter(() => this._readWithStreamingRetry(criteria, options, matchesCriteria, serializer));
877
- });
878
- } else {
879
- // Use global read limiter to prevent file descriptor exhaustion
880
- return this.readLimiter(() => this._readWithStreamingRetry(criteria, options, matchesCriteria, serializer));
881
- }
882
- }
883
-
884
- async _readWithStreamingRetry(criteria, options = {}, matchesCriteria, serializer = null) {
885
- // If no timeout configured, use original implementation without retry
886
- if (!options.ioTimeoutMs) {
887
- return this._readWithStreamingInternal(criteria, options, matchesCriteria, serializer);
888
- }
889
-
890
- const timeoutMs = options.ioTimeoutMs || 5000; // Default 5s timeout per attempt
891
- const maxRetries = options.maxRetries || 3;
892
-
893
- return pRetry(async (attempt) => {
894
- const controller = new AbortController();
895
- const timeout = setTimeout(() => controller.abort(), timeoutMs);
896
-
897
- try {
898
- const results = await this._readWithStreamingInternal(criteria, options, matchesCriteria, serializer, controller.signal);
899
- return results;
900
- } catch (error) {
901
- if (error.name === 'AbortError' || error.code === 'ETIMEDOUT') {
902
- if (this.opts.debugMode) {
903
- console.log(`⚠️ Streaming read attempt ${attempt} timed out, retrying...`);
904
- }
905
- throw error; // p-retry will retry
906
- }
907
- // For other errors, don't retry
908
- throw new pRetry.AbortError(error);
909
- } finally {
910
- clearTimeout(timeout);
911
- }
912
- }, {
913
- retries: maxRetries,
914
- minTimeout: 100,
915
- maxTimeout: 1000,
916
- onFailedAttempt: (error) => {
917
- if (this.opts.debugMode) {
918
- console.log(`Streaming read failed (attempt ${error.attemptNumber}), ${error.retriesLeft} retries left`);
919
- }
920
- }
921
- });
922
- }
923
-
924
- async _readWithStreamingInternal(criteria, options = {}, matchesCriteria, serializer = null, signal = null) {
925
- const { limit, skip = 0 } = options; // No default limit
926
- const results = [];
927
- let lineNumber = 0;
928
- let processed = 0;
929
- let skipped = 0;
930
- let matched = 0;
931
-
932
- try {
933
- // Check if file exists before trying to read it
934
- if (!await this.exists()) {
935
- return results; // Return empty results if file doesn't exist
936
- }
937
-
938
- // All files are now JSONL format - use line-by-line reading
939
- // Create optimized read stream
940
- const stream = fs.createReadStream(this.file, {
941
- highWaterMark: 64 * 1024, // 64KB chunks
942
- encoding: 'utf8'
943
- });
944
-
945
- // Create readline interface
946
- const rl = readline.createInterface({
947
- input: stream,
948
- crlfDelay: Infinity // Better performance
949
- });
950
-
951
- // Handle abort signal
952
- if (signal) {
953
- signal.addEventListener('abort', () => {
954
- stream.destroy();
955
- rl.close();
956
- });
957
- }
958
-
959
- // Process line by line
960
- for await (const line of rl) {
961
- if (signal && signal.aborted) {
962
- break; // Stop if aborted
963
- }
964
-
965
- lineNumber++;
966
-
967
- // Skip lines that were already processed in previous attempts
968
- if (lineNumber <= skip) {
969
- skipped++;
970
- continue;
971
- }
972
-
973
- try {
974
- let record;
975
- if (serializer && typeof serializer.deserialize === 'function') {
976
- // Use serializer for deserialization
977
- record = serializer.deserialize(line);
978
- } else {
979
- // Fallback to JSON.parse for backward compatibility
980
- record = JSON.parse(line);
981
- }
982
-
983
- if (record && matchesCriteria(record, criteria)) {
984
- // Return raw data - term mapping will be handled by Database layer
985
- results.push({ ...record, _: lineNumber });
986
- matched++;
987
-
988
- // Check if we've reached the limit
989
- if (results.length >= limit) {
990
- break;
991
- }
992
- }
993
- } catch (error) {
994
- // CRITICAL FIX: Only log errors if they're not expected during concurrent operations
995
- // Don't log JSON parsing errors that occur during file writes
996
- if (this.opts && this.opts.debugMode && !error.message.includes('Unexpected')) {
997
- console.log(`Error reading line ${lineNumber}:`, error.message);
998
- }
999
- // Ignore invalid lines - they may be partial writes
1000
- }
1001
-
1002
- processed++;
1003
- }
1004
-
1005
- if (this.opts && this.opts.debugMode) {
1006
- console.log(`📊 Streaming read completed: ${results.length} results, ${processed} processed, ${skipped} skipped, ${matched} matched`);
1007
- }
1008
-
1009
- return results;
1010
-
1011
- } catch (error) {
1012
- if (error.message === 'AbortError') {
1013
- // Return partial results if aborted
1014
- return results;
1015
- }
1016
- console.error('Error in readWithStreaming:', error);
1017
- throw error;
1018
- }
1019
- }
1020
-
1021
- /**
1022
- * Count records with streaming
1023
- * @param {Object} criteria - Filter criteria
1024
- * @param {Object} options - Options (limit)
1025
- * @param {Function} matchesCriteria - Function to check if record matches criteria
1026
- * @returns {Promise<number>} - Number of records
1027
- */
1028
- async countWithStreaming(criteria, options = {}, matchesCriteria, serializer = null) {
1029
- const { limit } = options;
1030
- let count = 0;
1031
- let processed = 0;
1032
-
1033
- try {
1034
- const stream = fs.createReadStream(this.file, {
1035
- highWaterMark: 64 * 1024,
1036
- encoding: 'utf8'
1037
- });
1038
-
1039
- const rl = readline.createInterface({
1040
- input: stream,
1041
- crlfDelay: Infinity
1042
- });
1043
-
1044
- for await (const line of rl) {
1045
- if (limit && count >= limit) {
1046
- break;
1047
- }
1048
-
1049
- try {
1050
- let record;
1051
- if (serializer) {
1052
- // Use serializer for deserialization
1053
- record = await serializer.deserialize(line);
1054
- } else {
1055
- // Fallback to JSON.parse for backward compatibility
1056
- record = JSON.parse(line);
1057
- }
1058
-
1059
- if (record && matchesCriteria(record, criteria)) {
1060
- count++;
1061
- }
1062
- } catch (error) {
1063
- // Ignore invalid lines
1064
- }
1065
-
1066
- processed++;
1067
- }
1068
-
1069
- return count;
1070
-
1071
- } catch (error) {
1072
- throw error;
1073
- }
1074
- }
1075
-
1076
- /**
1077
- * Get file statistics
1078
- * @returns {Promise<Object>} - File statistics
1079
- */
1080
- async getFileStats() {
1081
- try {
1082
- const stats = await fs.promises.stat(this.file);
1083
- const lineCount = await this.countLines();
1084
-
1085
- return {
1086
- filePath: this.file,
1087
- size: stats.size,
1088
- lineCount,
1089
- lastModified: stats.mtime
1090
- };
1091
- } catch (error) {
1092
- throw error;
1093
- }
1094
- }
1095
-
1096
- /**
1097
- * Count lines in file
1098
- * @returns {Promise<number>} - Number of lines
1099
- */
1100
- async countLines() {
1101
- let lineCount = 0;
1102
-
1103
- try {
1104
- const stream = fs.createReadStream(this.file, {
1105
- highWaterMark: 64 * 1024,
1106
- encoding: 'utf8'
1107
- });
1108
-
1109
- const rl = readline.createInterface({
1110
- input: stream,
1111
- crlfDelay: Infinity
1112
- });
1113
-
1114
- for await (const line of rl) {
1115
- lineCount++;
1116
- }
1117
-
1118
- return lineCount;
1119
- } catch (error) {
1120
- throw error;
1121
- }
1122
- }
1123
-
1124
- async destroy() {
1125
- // CRITICAL FIX: Close all file handles to prevent resource leaks
1126
- try {
1127
- // Close any open file descriptors
1128
- if (this.fd) {
1129
- await this.fd.close().catch(() => {})
1130
- this.fd = null
1131
- }
1132
-
1133
- // Close any open readers/writers
1134
- if (this.reader) {
1135
- await this.reader.close().catch(() => {})
1136
- this.reader = null
1137
- }
1138
-
1139
- if (this.writer) {
1140
- await this.writer.close().catch(() => {})
1141
- this.writer = null
1142
- }
1143
-
1144
- // Clear any cached file handles
1145
- this.cachedFd = null
1146
-
1147
- } catch (error) {
1148
- // Ignore errors during cleanup
1149
- }
1150
- }
1151
-
1152
- async delete() {
1153
- try {
1154
- // Delete main file
1155
- await fs.promises.unlink(this.file).catch(() => {})
1156
-
1157
- // Delete index file (which now contains both index and offsets data)
1158
- await fs.promises.unlink(this.indexFile).catch(() => {})
1159
- } catch (error) {
1160
- // Ignore errors if files don't exist
1161
- }
1162
- }
1163
-
1164
- async writeAll(data) {
1165
- const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
1166
- try {
1167
- // Use Windows-specific retry logic for file operations
1168
- await this._writeWithRetry(data)
1169
- } finally {
1170
- release()
1171
- }
1172
- }
1173
-
1174
- /**
1175
- * Optimized batch write operation (OPTIMIZATION)
1176
- * @param {Array} dataChunks - Array of data chunks to write
1177
- * @param {boolean} append - Whether to append or overwrite
1178
- */
1179
- async writeBatch(dataChunks, append = false) {
1180
- if (!dataChunks || !dataChunks.length) return
1181
-
1182
- const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
1183
- try {
1184
- // OPTIMIZATION: Use streaming write for better performance
1185
- if (dataChunks.length === 1 && Buffer.isBuffer(dataChunks[0])) {
1186
- // Single buffer - use direct write
1187
- if (append) {
1188
- await fs.promises.appendFile(this.file, dataChunks[0])
1189
- } else {
1190
- await this._writeFileWithRetry(this.file, dataChunks[0])
1191
- }
1192
- } else {
1193
- // Multiple chunks - use streaming approach
1194
- await this._writeBatchStreaming(dataChunks, append)
1195
- }
1196
- } finally {
1197
- release()
1198
- }
1199
- }
1200
-
1201
- /**
1202
- * OPTIMIZATION: Streaming write for multiple chunks
1203
- * @param {Array} dataChunks - Array of data chunks to write
1204
- * @param {boolean} append - Whether to append or overwrite
1205
- */
1206
- async _writeBatchStreaming(dataChunks, append = false) {
1207
- // OPTIMIZATION: Use createWriteStream for better performance
1208
- const writeStream = fs.createWriteStream(this.file, {
1209
- flags: append ? 'a' : 'w',
1210
- highWaterMark: 64 * 1024 // 64KB buffer
1211
- })
1212
-
1213
- return new Promise((resolve, reject) => {
1214
- writeStream.on('error', reject)
1215
- writeStream.on('finish', resolve)
1216
-
1217
- // Write chunks sequentially
1218
- let index = 0
1219
- const writeNext = () => {
1220
- if (index >= dataChunks.length) {
1221
- writeStream.end()
1222
- return
1223
- }
1224
-
1225
- const chunk = dataChunks[index++]
1226
- const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk, 'utf8')
1227
-
1228
- if (!writeStream.write(buffer)) {
1229
- writeStream.once('drain', writeNext)
1230
- } else {
1231
- writeNext()
1232
- }
1233
- }
1234
-
1235
- writeNext()
1236
- })
1237
- }
1238
-
1239
- /**
1240
- * Optimized append operation for single data chunk (OPTIMIZATION)
1241
- * @param {string|Buffer} data - Data to append
1242
- */
1243
- async appendOptimized(data) {
1244
- const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
1245
- try {
1246
- // OPTIMIZATION: Direct append without retry logic for better performance
1247
- await fs.promises.appendFile(this.file, data)
1248
- } finally {
1249
- release()
1250
- }
1251
- }
1252
-
1253
- /**
1254
- * Windows-specific retry logic for fs.promises.writeFile operations
1255
- * Based on node-graceful-fs workarounds for EPERM issues
1256
- */
1257
- async _writeFileWithRetry(filePath, data, maxRetries = 3) {
1258
- const isWindows = process.platform === 'win32'
1259
-
1260
- for (let attempt = 0; attempt < maxRetries; attempt++) {
1261
- try {
1262
- // Ensure data is properly formatted as string or buffer
1263
- if (Buffer.isBuffer(data)) {
1264
- await fs.promises.writeFile(filePath, data)
1265
- } else {
1266
- await fs.promises.writeFile(filePath, data.toString())
1267
- }
1268
-
1269
- // Windows: add small delay after write operation
1270
- // This helps prevent EPERM issues caused by file handle not being released immediately
1271
- if (isWindows) {
1272
- await new Promise(resolve => setTimeout(resolve, 10))
1273
- }
1274
-
1275
- // Success - return immediately
1276
- return
1277
-
1278
- } catch (err) {
1279
- // Only retry on EPERM errors on Windows
1280
- if (err.code === 'EPERM' && isWindows && attempt < maxRetries - 1) {
1281
- // Exponential backoff: 10ms, 50ms, 250ms
1282
- const delay = Math.pow(10, attempt + 1)
1283
- await new Promise(resolve => setTimeout(resolve, delay))
1284
- continue
1285
- }
1286
-
1287
- // Re-throw if not a retryable error or max retries reached
1288
- throw err
1289
- }
1290
- }
1291
- }
1292
-
1293
- /**
1294
- * Windows-specific retry logic for file operations
1295
- * Based on node-graceful-fs workarounds for EPERM issues
1296
- */
1297
- async _writeWithRetry(data, maxRetries = 3) {
1298
- const isWindows = process.platform === 'win32'
1299
-
1300
- for (let attempt = 0; attempt < maxRetries; attempt++) {
1301
- try {
1302
- // CRITICAL FIX: Ensure directory exists before writing file
1303
- const dir = path.dirname(this.file)
1304
- await fs.promises.mkdir(dir, { recursive: true })
1305
-
1306
- const fd = await fs.promises.open(this.file, 'w')
1307
- try {
1308
- // Ensure data is properly formatted as string or buffer
1309
- if (Buffer.isBuffer(data)) {
1310
- await fd.write(data)
1311
- } else {
1312
- await fd.write(data.toString())
1313
- }
1314
- } finally {
1315
- await fd.close()
1316
-
1317
- // Windows: add small delay after closing file handle
1318
- // This helps prevent EPERM issues caused by file handle not being released immediately
1319
- if (isWindows) {
1320
- await new Promise(resolve => setTimeout(resolve, 10))
1321
- }
1322
- }
1323
-
1324
- // Success - return immediately
1325
- return
1326
-
1327
- } catch (err) {
1328
- // Only retry on EPERM errors on Windows
1329
- if (err.code === 'EPERM' && isWindows && attempt < maxRetries - 1) {
1330
- // Exponential backoff: 10ms, 50ms, 250ms
1331
- const delay = Math.pow(10, attempt + 1)
1332
- await new Promise(resolve => setTimeout(resolve, delay))
1333
- continue
1334
- }
1335
-
1336
- // Re-throw if not a retryable error or max retries reached
1337
- throw err
1338
- }
1339
- }
1340
- }
1341
-
1342
- async readAll() {
1343
- const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
1344
- try {
1345
- // Check if file exists before trying to read it
1346
- if (!await this.exists()) {
1347
- return '' // Return empty string if file doesn't exist
1348
- }
1349
-
1350
- const fd = await fs.promises.open(this.file, 'r')
1351
- try {
1352
- const stats = await fd.stat()
1353
- const buffer = Buffer.allocUnsafe(stats.size)
1354
- await fd.read(buffer, 0, stats.size, 0)
1355
- return buffer.toString('utf8')
1356
- } finally {
1357
- await fd.close()
1358
- }
1359
- } finally {
1360
- release()
1361
- }
1362
- }
1363
-
1364
- /**
1365
- * Read specific lines from the file using line numbers
1366
- * This is optimized for partial reads when using indexed queries
1367
- * @param {number[]} lineNumbers - Array of line numbers to read (1-based)
1368
- * @returns {Promise<string>} - Content of the specified lines
1369
- */
1370
- async readSpecificLines(lineNumbers) {
1371
- if (!lineNumbers || lineNumbers.length === 0) {
1372
- return ''
1373
- }
1374
-
1375
- const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
1376
- try {
1377
- // Check if file exists before trying to read it
1378
- if (!await this.exists()) {
1379
- return '' // Return empty string if file doesn't exist
1380
- }
1381
-
1382
- const fd = await fs.promises.open(this.file, 'r')
1383
- try {
1384
- const stats = await fd.stat()
1385
- const buffer = Buffer.allocUnsafe(stats.size)
1386
- await fd.read(buffer, 0, stats.size, 0)
1387
-
1388
- // CRITICAL FIX: Ensure proper UTF-8 decoding for multi-byte characters
1389
- let content
1390
- try {
1391
- content = buffer.toString('utf8')
1392
- } catch (error) {
1393
- // If UTF-8 decoding fails, try to recover by finding valid UTF-8 boundaries
1394
- if (this.opts.debugMode) {
1395
- console.warn(`UTF-8 decoding failed for file ${this.file}, attempting recovery`)
1396
- }
1397
-
1398
- // Find the last complete UTF-8 character
1399
- let validLength = buffer.length
1400
- for (let i = buffer.length - 1; i >= 0; i--) {
1401
- const byte = buffer[i]
1402
- // CRITICAL FIX: Correct UTF-8 start character detection
1403
- // Check if this is the start of a UTF-8 character (not a continuation byte)
1404
- if ((byte & 0x80) === 0 || // ASCII (1 byte) - 0xxxxxxx
1405
- (byte & 0xE0) === 0xC0 || // 2-byte UTF-8 start - 110xxxxx
1406
- (byte & 0xF0) === 0xE0 || // 3-byte UTF-8 start - 1110xxxx
1407
- (byte & 0xF8) === 0xF0) { // 4-byte UTF-8 start - 11110xxx
1408
- validLength = i + 1
1409
- break
1410
- }
1411
- }
1412
-
1413
- const validBuffer = buffer.subarray(0, validLength)
1414
- content = validBuffer.toString('utf8')
1415
- }
1416
-
1417
- // Split content into lines and extract only the requested lines
1418
- const lines = content.split('\n')
1419
- const result = []
1420
-
1421
- for (const lineNum of lineNumbers) {
1422
- // Convert to 0-based index and check bounds
1423
- const index = lineNum - 1
1424
- if (index >= 0 && index < lines.length) {
1425
- result.push(lines[index])
1426
- }
1427
- }
1428
-
1429
- return result.join('\n')
1430
- } finally {
1431
- await fd.close()
1432
- }
1433
- } finally {
1434
- release()
1435
- }
1436
- }
1437
-
1438
- }
1439
-
1
+ import fs from 'fs'
2
+ import path from 'path'
3
+ import readline from 'readline'
4
+ import pLimit from 'p-limit'
5
+ import pRetry from 'p-retry'
6
+
7
+ export default class FileHandler {
8
+ constructor(file, fileMutex = null, opts = {}) {
9
+ this.file = file
10
+ this.indexFile = file ? file.replace(/\.jdb$/, '.idx.jdb') : null
11
+ this.fileMutex = fileMutex
12
+ this.opts = opts
13
+ this.maxBufferSize = opts.maxBufferSize || 4 * 1024 * 1024 // 4MB default
14
+ // Global I/O limiter to prevent file descriptor exhaustion in concurrent operations
15
+ this.readLimiter = pLimit(opts.maxConcurrentReads || 4)
16
+ }
17
+
18
+ _getIoTimeoutMs(override) {
19
+ if (typeof override === 'number') return override
20
+ if (typeof this.opts.ioTimeoutMs === 'number') return this.opts.ioTimeoutMs
21
+ return 0
22
+ }
23
+
24
+ async _withIoTimeout(fn, timeoutMs, onTimeout) {
25
+ if (!timeoutMs || timeoutMs <= 0) {
26
+ return fn()
27
+ }
28
+ let timeoutId
29
+ const timeoutPromise = new Promise((_, reject) => {
30
+ timeoutId = setTimeout(() => {
31
+ if (onTimeout) {
32
+ try { onTimeout() } catch {}
33
+ }
34
+ const err = new Error(`I/O timeout after ${timeoutMs}ms`)
35
+ err.code = 'ETIMEDOUT'
36
+ reject(err)
37
+ }, timeoutMs)
38
+ })
39
+ try {
40
+ return await Promise.race([fn(), timeoutPromise])
41
+ } finally {
42
+ if (timeoutId) clearTimeout(timeoutId)
43
+ }
44
+ }
45
+
46
+ async _readWithTimeout(fd, buffer, offset, length, position, timeoutMs) {
47
+ return this._withIoTimeout(
48
+ () => fd.read(buffer, offset, length, position),
49
+ timeoutMs,
50
+ () => fd.close().catch(() => {})
51
+ )
52
+ }
53
+
54
+ async truncate(offset) {
55
+ try {
56
+ await fs.promises.access(this.file, fs.constants.F_OK)
57
+ await fs.promises.truncate(this.file, offset)
58
+ } catch (err) {
59
+ await fs.promises.writeFile(this.file, '')
60
+ }
61
+ }
62
+
63
+ async writeOffsets(data) {
64
+ // Write offsets to the index file (will be combined with index data)
65
+ await fs.promises.writeFile(this.indexFile, data)
66
+ }
67
+
68
+ async readOffsets() {
69
+ try {
70
+ return await fs.promises.readFile(this.indexFile)
71
+ } catch (err) {
72
+ return null
73
+ }
74
+ }
75
+
76
+ async writeIndex(data) {
77
+ // Write index data to the index file (will be combined with offsets)
78
+ // Use Windows-specific retry logic for file operations
79
+ await this._writeFileWithRetry(this.indexFile, data)
80
+ }
81
+
82
+ async readIndex() {
83
+ try {
84
+ return await fs.promises.readFile(this.indexFile)
85
+ } catch (err) {
86
+ return null
87
+ }
88
+ }
89
+
90
+ async exists() {
91
+ try {
92
+ await fs.promises.access(this.file, fs.constants.F_OK)
93
+ return true
94
+ } catch (err) {
95
+ return false
96
+ }
97
+ }
98
+
99
+
100
+ async indexExists() {
101
+ try {
102
+ await fs.promises.access(this.indexFile, fs.constants.F_OK)
103
+ return true
104
+ } catch (err) {
105
+ return false
106
+ }
107
+ }
108
+
109
+ async isLegacyFormat() {
110
+ if (!await this.exists()) return false
111
+ if (await this.indexExists()) return false
112
+
113
+ // Check if main file contains offsets at the end (legacy format)
114
+ try {
115
+ const lastLine = await this.readLastLine()
116
+ if (!lastLine || !lastLine.length) return false
117
+
118
+ // Try to parse as offsets array
119
+ const content = lastLine.toString('utf-8').trim()
120
+ const parsed = JSON.parse(content)
121
+ return Array.isArray(parsed)
122
+ } catch (err) {
123
+ return false
124
+ }
125
+ }
126
+
127
+ async migrateLegacyFormat(serializer) {
128
+ if (!await this.isLegacyFormat()) return false
129
+
130
+ console.log('Migrating from legacy format to new 3-file format...')
131
+
132
+ // Read the legacy file
133
+ const lastLine = await this.readLastLine()
134
+ const offsets = JSON.parse(lastLine.toString('utf-8').trim())
135
+
136
+ // Get index offset and truncate offsets array
137
+ const indexOffset = offsets[offsets.length - 2]
138
+ const dataOffsets = offsets.slice(0, -2)
139
+
140
+ // Read index data
141
+ const indexStart = indexOffset
142
+ const indexEnd = offsets[offsets.length - 1]
143
+ const indexBuffer = await this.readRange(indexStart, indexEnd)
144
+ const indexData = await serializer.deserialize(indexBuffer)
145
+
146
+ // Write offsets to separate file
147
+ const offsetsString = await serializer.serialize(dataOffsets, { linebreak: false })
148
+ await this.writeOffsets(offsetsString)
149
+
150
+ // Write index to separate file
151
+ const indexString = await serializer.serialize(indexData, { linebreak: false })
152
+ await this.writeIndex(indexString)
153
+
154
+ // Truncate main file to remove index and offsets
155
+ await this.truncate(indexOffset)
156
+
157
+ console.log('Migration completed successfully!')
158
+ return true
159
+ }
160
+
161
+ async readRange(start, end) {
162
+ // Check if file exists before trying to read it
163
+ if (!await this.exists()) {
164
+ return Buffer.alloc(0) // Return empty buffer if file doesn't exist
165
+ }
166
+
167
+ const timeoutMs = this._getIoTimeoutMs()
168
+ let fd = await fs.promises.open(this.file, 'r')
169
+ try {
170
+ // CRITICAL FIX: Check file size before attempting to read
171
+ const stats = await fd.stat()
172
+ const fileSize = stats.size
173
+
174
+ // If start position is beyond file size, return empty buffer
175
+ if (start >= fileSize) {
176
+ await fd.close()
177
+ return Buffer.alloc(0)
178
+ }
179
+
180
+ // Adjust end position if it's beyond file size
181
+ const actualEnd = Math.min(end, fileSize)
182
+ const length = actualEnd - start
183
+
184
+ // If length is 0 or negative, return empty buffer
185
+ if (length <= 0) {
186
+ await fd.close()
187
+ return Buffer.alloc(0)
188
+ }
189
+
190
+ let buffer = Buffer.alloc(length)
191
+ const { bytesRead } = await this._readWithTimeout(fd, buffer, 0, length, start, timeoutMs)
192
+ await fd.close()
193
+
194
+ // CRITICAL FIX: Ensure we read the expected amount of data
195
+ if (bytesRead !== length) {
196
+ const errorMsg = `CRITICAL: Expected to read ${length} bytes, but read ${bytesRead} bytes at position ${start}`
197
+ console.error(`⚠️ ${errorMsg}`)
198
+
199
+ // This indicates a race condition or file corruption
200
+ // Don't retry - the caller should handle synchronization properly
201
+ if (bytesRead === 0) {
202
+ throw new Error(`File corruption detected: ${errorMsg}`)
203
+ }
204
+
205
+ // Return partial data with warning - caller should handle this
206
+ return buffer.subarray(0, bytesRead)
207
+ }
208
+
209
+ return buffer
210
+ } catch (error) {
211
+ await fd.close().catch(() => {})
212
+ throw error
213
+ }
214
+ }
215
+
216
+ async readRanges(ranges, mapper) {
217
+ const lines = {}
218
+
219
+ // Check if file exists before trying to read it
220
+ if (!await this.exists()) {
221
+ return lines // Return empty object if file doesn't exist
222
+ }
223
+
224
+ const fd = await fs.promises.open(this.file, 'r')
225
+ const groupedRanges = await this.groupedRanges(ranges)
226
+ try {
227
+ await Promise.allSettled(groupedRanges.map(async (groupedRange) => {
228
+ await this.readLimiter(async () => {
229
+ for await (const row of this.readGroupedRange(groupedRange, fd)) {
230
+ lines[row.start] = mapper ? (await mapper(row.line, { start: row.start, end: row.start + row.line.length })) : row.line
231
+ }
232
+ })
233
+ }))
234
+ } catch (e) {
235
+ console.error('Error reading ranges:', e)
236
+ } finally {
237
+ await fd.close()
238
+ }
239
+ return lines
240
+ }
241
+
242
+ async groupedRanges(ranges) { // expects ordered ranges from Database.getRanges()
243
+ const readSize = 512 * 1024 // 512KB
244
+ const groupedRanges = []
245
+ let currentGroup = []
246
+ let currentSize = 0
247
+
248
+ // each range is a {start: number, end: number} object
249
+ for (let i = 0; i < ranges.length; i++) {
250
+ const range = ranges[i]
251
+ const rangeSize = range.end - range.start
252
+
253
+ if (currentGroup.length > 0) {
254
+ const lastRange = currentGroup[currentGroup.length - 1]
255
+ if (lastRange.end !== range.start || currentSize + rangeSize > readSize) {
256
+ groupedRanges.push(currentGroup)
257
+ currentGroup = []
258
+ currentSize = 0
259
+ }
260
+ }
261
+
262
+ currentGroup.push(range)
263
+ currentSize += rangeSize
264
+ }
265
+
266
+ if (currentGroup.length > 0) {
267
+ groupedRanges.push(currentGroup)
268
+ }
269
+
270
+ return groupedRanges
271
+ }
272
+
273
+ /**
274
+ * Ensure a line is complete by reading until newline if JSON appears truncated
275
+ * @param {string} line - The potentially incomplete line
276
+ * @param {number} fd - File descriptor
277
+ * @param {number} currentOffset - Current read offset
278
+ * @returns {string} Complete line
279
+ */
280
+ async ensureCompleteLine(line, fd, currentOffset) {
281
+ // Fast check: if line already ends with newline, it's likely complete
282
+ if (line.endsWith('\n')) {
283
+ return line
284
+ }
285
+
286
+ // Check if the line contains valid JSON by trying to parse it
287
+ const trimmedLine = line.trim()
288
+ if (trimmedLine.length === 0) {
289
+ return line
290
+ }
291
+
292
+ // Try to parse as JSON to see if it's complete
293
+ try {
294
+ JSON.parse(trimmedLine)
295
+ // If parsing succeeds, the line is complete (but missing newline)
296
+ // This is unusual but possible, return as-is
297
+ return line
298
+ } catch (jsonError) {
299
+ // JSON is incomplete, try to read more until we find a newline
300
+ const bufferSize = 2048 // Read in 2KB chunks for better performance
301
+ const additionalBuffer = Buffer.allocUnsafe(bufferSize)
302
+ let additionalOffset = currentOffset
303
+ let additionalContent = line
304
+
305
+ // Try reading up to 20KB more to find the newline (increased for safety)
306
+ const maxAdditionalRead = 20480
307
+ let totalAdditionalRead = 0
308
+
309
+ while (totalAdditionalRead < maxAdditionalRead) {
310
+ const { bytesRead } = await fd.read(additionalBuffer, 0, bufferSize, additionalOffset)
311
+
312
+ if (bytesRead === 0) {
313
+ // EOF reached, check if the accumulated content is now valid JSON
314
+ const finalTrimmed = additionalContent.trim()
315
+ try {
316
+ JSON.parse(finalTrimmed)
317
+ // If parsing succeeds now, return the content
318
+ return additionalContent
319
+ } catch {
320
+ // Still invalid, return original line to avoid data loss
321
+ return line
322
+ }
323
+ }
324
+
325
+ const chunk = additionalBuffer.toString('utf8', 0, bytesRead)
326
+ additionalContent += chunk
327
+ totalAdditionalRead += bytesRead
328
+
329
+ // Check if we found a newline in the entire accumulated content
330
+ const newlineIndex = additionalContent.indexOf('\n', line.length)
331
+ if (newlineIndex !== -1) {
332
+ // Found newline, return content up to and including the newline
333
+ const completeLine = additionalContent.substring(0, newlineIndex + 1)
334
+
335
+ // Validate that the complete line contains valid JSON
336
+ const trimmedComplete = completeLine.trim()
337
+ try {
338
+ JSON.parse(trimmedComplete)
339
+ return completeLine
340
+ } catch {
341
+ // Even with newline, JSON is invalid - this suggests data corruption
342
+ // Return original line to trigger normal error handling
343
+ return line
344
+ }
345
+ }
346
+
347
+ additionalOffset += bytesRead
348
+ }
349
+
350
+ // If we couldn't find a newline within the limit, return the original line
351
+ // This prevents infinite reading and excessive memory usage
352
+ return line
353
+ }
354
+ }
355
+
356
+ /**
357
+ * Split content into complete JSON lines, handling special characters and escaped quotes
358
+ * CRITICAL FIX: Prevents "Expected ',' or ']'" and "Unterminated string" errors by ensuring
359
+ * each line is a complete, valid JSON object/array, even when containing special characters
360
+ * @param {string} content - Raw content containing multiple JSON lines
361
+ * @returns {string[]} Array of complete JSON lines
362
+ */
363
+ splitJsonLines(content) {
364
+ const lines = []
365
+ let currentLine = ''
366
+ let inString = false
367
+ let escapeNext = false
368
+ let braceCount = 0
369
+ let bracketCount = 0
370
+
371
+ for (let i = 0; i < content.length; i++) {
372
+ const char = content[i]
373
+ const prevChar = i > 0 ? content[i - 1] : null
374
+
375
+ currentLine += char
376
+
377
+ if (escapeNext) {
378
+ escapeNext = false
379
+ continue
380
+ }
381
+
382
+ if (char === '\\') {
383
+ escapeNext = true
384
+ continue
385
+ }
386
+
387
+ if (char === '"' && !escapeNext) {
388
+ inString = !inString
389
+ continue
390
+ }
391
+
392
+ if (!inString) {
393
+ if (char === '{') braceCount++
394
+ else if (char === '}') braceCount--
395
+ else if (char === '[') bracketCount++
396
+ else if (char === ']') bracketCount--
397
+ else if (char === '\n' && braceCount === 0 && bracketCount === 0) {
398
+ // Found complete JSON object/array at newline
399
+ const trimmedLine = currentLine.trim()
400
+ if (trimmedLine.length > 0) {
401
+ lines.push(trimmedLine.replace(/\n$/, '')) // Remove trailing newline
402
+ }
403
+ currentLine = ''
404
+ braceCount = 0
405
+ bracketCount = 0
406
+ inString = false
407
+ escapeNext = false
408
+ }
409
+ }
410
+ }
411
+
412
+ // Add remaining content if it's a complete JSON object/array
413
+ const trimmedLine = currentLine.trim()
414
+ if (trimmedLine.length > 0 && braceCount === 0 && bracketCount === 0) {
415
+ lines.push(trimmedLine)
416
+ }
417
+
418
+ return lines.filter(line => line.trim().length > 0)
419
+ }
420
+
421
+ async *readGroupedRange(groupedRange, fd) {
422
+ if (groupedRange.length === 0) return
423
+
424
+ // OPTIMIZATION: For single range, use direct approach
425
+ if (groupedRange.length === 1) {
426
+ const range = groupedRange[0]
427
+ const bufferSize = range.end - range.start
428
+
429
+ if (bufferSize <= 0 || bufferSize > this.maxBufferSize) {
430
+ throw new Error(`Invalid buffer size: ${bufferSize}. Start: ${range.start}, End: ${range.end}. Max allowed: ${this.maxBufferSize}`)
431
+ }
432
+
433
+ const buffer = Buffer.allocUnsafe(bufferSize)
434
+ const { bytesRead } = await fd.read(buffer, 0, bufferSize, range.start)
435
+ const actualBuffer = bytesRead < bufferSize ? buffer.subarray(0, bytesRead) : buffer
436
+
437
+ if (actualBuffer.length === 0) return
438
+
439
+ let lineString
440
+ try {
441
+ lineString = actualBuffer.toString('utf8')
442
+ } catch (error) {
443
+ lineString = actualBuffer.toString('utf8', { replacement: '?' })
444
+ }
445
+
446
+ // CRITICAL FIX: For single ranges, check if JSON appears truncated and try to complete it
447
+ // Only attempt completion if the line doesn't end with newline (indicating possible truncation)
448
+ if (!lineString.endsWith('\n')) {
449
+ const completeLine = await this.ensureCompleteLine(lineString, fd, range.start + actualBuffer.length)
450
+ if (completeLine !== lineString) {
451
+ lineString = completeLine.trimEnd()
452
+ }
453
+ } else {
454
+ lineString = lineString.trimEnd()
455
+ }
456
+
457
+ yield {
458
+ line: lineString,
459
+ start: range.start,
460
+ _: range.index !== undefined ? range.index : (range._ || null)
461
+ }
462
+ return
463
+ }
464
+
465
+ // OPTIMIZATION: For multiple ranges, read as single buffer and split by offsets
466
+ const firstRange = groupedRange[0]
467
+ const lastRange = groupedRange[groupedRange.length - 1]
468
+ const totalSize = lastRange.end - firstRange.start
469
+
470
+ if (totalSize <= 0 || totalSize > this.maxBufferSize) {
471
+ throw new Error(`Invalid total buffer size: ${totalSize}. Start: ${firstRange.start}, End: ${lastRange.end}. Max allowed: ${this.maxBufferSize}`)
472
+ }
473
+
474
+ // Read entire grouped range as single buffer
475
+ const buffer = Buffer.allocUnsafe(totalSize)
476
+ const { bytesRead } = await fd.read(buffer, 0, totalSize, firstRange.start)
477
+ const actualBuffer = bytesRead < totalSize ? buffer.subarray(0, bytesRead) : buffer
478
+
479
+ if (actualBuffer.length === 0) return
480
+
481
+ // Convert to string once
482
+ let content
483
+ try {
484
+ content = actualBuffer.toString('utf8')
485
+ } catch (error) {
486
+ content = actualBuffer.toString('utf8', { replacement: '?' })
487
+ }
488
+
489
+ // CRITICAL FIX: Validate buffer completeness to prevent UTF-8 corruption
490
+ // When reading non-adjacent ranges, the buffer may be incomplete (last line cut mid-character)
491
+ const lastNewlineIndex = content.lastIndexOf('\n')
492
+ if (lastNewlineIndex === -1 || lastNewlineIndex < content.length - 2) {
493
+ // Buffer may be incomplete - truncate to last complete line
494
+ if (this.opts.debugMode) {
495
+ console.warn(`⚠️ Incomplete buffer detected at offset ${firstRange.start}, truncating to last complete line`)
496
+ }
497
+ if (lastNewlineIndex > 0) {
498
+ content = content.substring(0, lastNewlineIndex + 1)
499
+ } else {
500
+ // No complete lines found - may be a serious issue
501
+ if (this.opts.debugMode) {
502
+ console.warn(`⚠️ No complete lines found in buffer at offset ${firstRange.start}`)
503
+ }
504
+ }
505
+ }
506
+
507
+ // CRITICAL FIX: Handle ranges more carefully to prevent corruption
508
+ if (groupedRange.length === 2 && groupedRange[0].end === groupedRange[1].start) {
509
+ // Special case: Adjacent ranges - split by COMPLETE JSON lines, not just newlines
510
+ // This prevents corruption when lines contain special characters or unescaped quotes
511
+ const lines = this.splitJsonLines(content)
512
+
513
+ for (let i = 0; i < Math.min(lines.length, groupedRange.length); i++) {
514
+ const range = groupedRange[i]
515
+ yield {
516
+ line: lines[i],
517
+ start: range.start,
518
+ _: range.index !== undefined ? range.index : (range._ || null)
519
+ }
520
+ }
521
+ } else {
522
+ // CRITICAL FIX: For non-adjacent ranges, use the range.end directly
523
+ // because range.end already excludes the newline (calculated as offsets[n+1] - 1)
524
+ // We just need to find the line start (beginning of the line in the buffer)
525
+ for (let i = 0; i < groupedRange.length; i++) {
526
+ const range = groupedRange[i]
527
+ const relativeStart = range.start - firstRange.start
528
+ const relativeEnd = range.end - firstRange.start
529
+
530
+ // OPTIMIZATION 2: Find line start only if necessary
531
+ // Check if we're already at a line boundary to avoid unnecessary backwards search
532
+ let lineStart = relativeStart
533
+ if (relativeStart > 0 && content[relativeStart - 1] !== '\n') {
534
+ // Only search backwards if we're not already at a line boundary
535
+ while (lineStart > 0 && content[lineStart - 1] !== '\n') {
536
+ lineStart--
537
+ }
538
+ }
539
+
540
+ // OPTIMIZATION 3: Use slice() instead of substring() for better performance
541
+ // CRITICAL FIX: range.end = offsets[n+1] - 1 points to the newline character
542
+ // slice(start, end) includes characters from start to end-1 (end is exclusive)
543
+ // So if relativeEnd points to the newline, slice will include it
544
+ let rangeContent = content.slice(lineStart, relativeEnd)
545
+
546
+ // OPTIMIZATION 4: Direct character check instead of regex/trimEnd
547
+ // Remove trailing newlines and whitespace efficiently
548
+ // CRITICAL FIX: Prevents incomplete JSON line reading that caused "Expected ',' or ']'" parsing errors
549
+ // trimEnd() is actually optimized in V8, but we can check if there's anything to trim first
550
+ const len = rangeContent.length
551
+ if (len > 0) {
552
+ // Quick check: if last char is not whitespace, skip trimEnd
553
+ const lastChar = rangeContent[len - 1]
554
+ if (lastChar === '\n' || lastChar === '\r' || lastChar === ' ' || lastChar === '\t') {
555
+ // Only call trimEnd if we detected trailing whitespace
556
+ rangeContent = rangeContent.trimEnd()
557
+ }
558
+ }
559
+
560
+ if (rangeContent.length === 0) continue
561
+
562
+ // CRITICAL FIX: For multiple ranges, we cannot safely expand reading
563
+ // because offsets are pre-calculated. Instead, validate JSON and let
564
+ // the deserializer handle incomplete lines (which will trigger recovery)
565
+ const trimmedContent = rangeContent.trim()
566
+ let finalContent = rangeContent
567
+
568
+ if (trimmedContent.length > 0) {
569
+ try {
570
+ JSON.parse(trimmedContent)
571
+ // JSON is valid, use as-is
572
+ } catch (jsonError) {
573
+ // JSON appears incomplete - this is expected for truncated ranges
574
+ // Let the deserializer handle it (will trigger streaming recovery if needed)
575
+ // We don't try to expand reading here because offsets are pre-calculated
576
+ }
577
+ }
578
+
579
+ yield {
580
+ line: finalContent,
581
+ start: range.start,
582
+ _: range.index !== undefined ? range.index : (range._ || null)
583
+ }
584
+ }
585
+ }
586
+ }
587
+
588
+ async *walk(ranges) {
589
+ // CRITICAL FIX: Acquire file mutex to prevent race conditions with concurrent writes
590
+ const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
591
+ try {
592
+ // Check if file exists before trying to read it
593
+ if (!await this.exists()) {
594
+ return // Return empty generator if file doesn't exist
595
+ }
596
+
597
+ const fd = await fs.promises.open(this.file, 'r')
598
+ try {
599
+ const groupedRanges = await this.groupedRanges(ranges)
600
+ for(const groupedRange of groupedRanges) {
601
+ for await (const row of this.readGroupedRange(groupedRange, fd)) {
602
+ yield row
603
+ }
604
+ }
605
+ } finally {
606
+ await fd.close()
607
+ }
608
+ } finally {
609
+ release()
610
+ }
611
+ }
612
+
613
+ async replaceLines(ranges, lines) {
614
+ // CRITICAL: Always use file mutex to prevent concurrent file operations
615
+ if (this.fileMutex) {
616
+ return this.fileMutex.runExclusive(async () => {
617
+ // Add a small delay to ensure any pending operations complete
618
+ await new Promise(resolve => setTimeout(resolve, 10));
619
+ return this._replaceLinesInternal(ranges, lines);
620
+ });
621
+ } else {
622
+ return this._replaceLinesInternal(ranges, lines);
623
+ }
624
+ }
625
+
626
+ async _replaceLinesInternal(ranges, lines) {
627
+ const tmpFile = this.file + '.tmp';
628
+ let writer, reader;
629
+
630
+ try {
631
+ writer = await fs.promises.open(tmpFile, 'w+');
632
+
633
+ // Check if the main file exists before trying to read it
634
+ if (await this.exists()) {
635
+ reader = await fs.promises.open(this.file, 'r');
636
+ } else {
637
+ // If file doesn't exist, we'll just write the new lines
638
+ reader = null;
639
+ }
640
+
641
+ // Sort ranges by start position to ensure correct order
642
+ const sortedRanges = [...ranges].sort((a, b) => a.start - b.start);
643
+
644
+ let position = 0;
645
+ let lineIndex = 0;
646
+
647
+ for (const range of sortedRanges) {
648
+ // Write existing content before the range (only if file exists)
649
+ if (reader && position < range.start) {
650
+ const buffer = await this.readRange(position, range.start);
651
+ await writer.write(buffer);
652
+ }
653
+
654
+ // Write new line if provided, otherwise skip the range (for delete operations)
655
+ if (lineIndex < lines.length && lines[lineIndex]) {
656
+ const line = lines[lineIndex];
657
+ // Ensure line ends with newline
658
+ let formattedBuffer;
659
+ if (Buffer.isBuffer(line)) {
660
+ const needsNewline = line.length === 0 || line[line.length - 1] !== 0x0A;
661
+ formattedBuffer = needsNewline ? Buffer.concat([line, Buffer.from('\n')]) : line;
662
+ } else {
663
+ const withNewline = line.endsWith('\n') ? line : line + '\n';
664
+ formattedBuffer = Buffer.from(withNewline, 'utf8');
665
+ }
666
+ await writer.write(formattedBuffer);
667
+ }
668
+
669
+ // Update position to range.end to avoid overlapping writes
670
+ position = range.end;
671
+ lineIndex++;
672
+ }
673
+
674
+ // Write remaining content after the last range (only if file exists)
675
+ if (reader) {
676
+ const { size } = await reader.stat();
677
+ if (position < size) {
678
+ const buffer = await this.readRange(position, size);
679
+ await writer.write(buffer);
680
+ }
681
+ }
682
+
683
+ // Ensure all data is written to disk
684
+ await writer.sync();
685
+ if (reader) await reader.close();
686
+ await writer.close();
687
+
688
+ // Validate the temp file before renaming
689
+ await this._validateTempFile(tmpFile);
690
+
691
+ // CRITICAL: Retry logic for Windows EPERM errors
692
+ await this._safeRename(tmpFile, this.file);
693
+
694
+ } catch (e) {
695
+ console.error('Erro ao substituir linhas:', e);
696
+ throw e;
697
+ } finally {
698
+ if (reader) await reader.close().catch(() => { });
699
+ if (writer) await writer.close().catch(() => { });
700
+ await fs.promises.unlink(tmpFile).catch(() => { });
701
+ }
702
+ }
703
+
704
+ async _safeRename(tmpFile, targetFile, maxRetries = 3) {
705
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
706
+ try {
707
+ await fs.promises.rename(tmpFile, targetFile);
708
+ return; // Success
709
+ } catch (error) {
710
+ if (error.code === 'EPERM' && attempt < maxRetries) {
711
+ // Quick delay: 50ms, 100ms, 200ms
712
+ const delay = 50 * attempt;
713
+ console.log(`🔄 EPERM retry ${attempt}/${maxRetries}, waiting ${delay}ms...`);
714
+ await new Promise(resolve => setTimeout(resolve, delay));
715
+ continue;
716
+ }
717
+
718
+ // If all retries failed, try Windows fallback approach
719
+ if (error.code === 'EPERM' && attempt === maxRetries) {
720
+ console.log(`⚠️ All EPERM retries failed, trying Windows fallback...`);
721
+ return this._windowsFallbackRename(tmpFile, targetFile);
722
+ }
723
+
724
+ throw error; // Re-throw if not EPERM or max retries reached
725
+ }
726
+ }
727
+ }
728
+
729
+ async _validateTempFile(tmpFile) {
730
+ try {
731
+ // Read the temp file and validate JSON structure
732
+ const content = await fs.promises.readFile(tmpFile, 'utf8');
733
+ const lines = content.split('\n').filter(line => line.trim());
734
+
735
+ let hasInvalidJson = false;
736
+ const validLines = [];
737
+
738
+ for (let i = 0; i < lines.length; i++) {
739
+ try {
740
+ JSON.parse(lines[i]);
741
+ validLines.push(lines[i]);
742
+ } catch (error) {
743
+ if (this.opts.debugMode) {
744
+ console.warn(`⚠️ Invalid JSON in temp file at line ${i + 1}, skipping:`, lines[i].substring(0, 100));
745
+ }
746
+ hasInvalidJson = true;
747
+ }
748
+ }
749
+
750
+ // If we found invalid JSON, rewrite the file with only valid lines
751
+ if (hasInvalidJson && validLines.length > 0) {
752
+ console.log(`🔧 Rewriting temp file with ${validLines.length} valid lines`);
753
+ const correctedContent = validLines.join('\n') + '\n';
754
+ await fs.promises.writeFile(tmpFile, correctedContent, 'utf8');
755
+ }
756
+
757
+ console.log(`✅ Temp file validation passed: ${validLines.length} valid JSON lines`);
758
+ } catch (error) {
759
+ console.error(`❌ Temp file validation failed:`, error.message);
760
+ throw error;
761
+ }
762
+ }
763
+
764
+ async _windowsFallbackRename(tmpFile, targetFile) {
765
+ try {
766
+ // Windows fallback: copy content instead of rename
767
+ console.log(`🔄 Using Windows fallback: copy + delete approach`);
768
+
769
+ // Validate temp file before copying
770
+ await this._validateTempFile(tmpFile);
771
+
772
+ // Read the temp file content
773
+ const content = await fs.promises.readFile(tmpFile, 'utf8');
774
+
775
+ // Write to target file directly
776
+ await fs.promises.writeFile(targetFile, content, 'utf8');
777
+
778
+ // Delete temp file
779
+ await fs.promises.unlink(tmpFile);
780
+
781
+ console.log(`✅ Windows fallback successful`);
782
+ return;
783
+ } catch (fallbackError) {
784
+ console.error(`❌ Windows fallback also failed:`, fallbackError);
785
+ throw fallbackError;
786
+ }
787
+ }
788
+
789
+ async writeData(data, immediate, fd) {
790
+ await fd.write(data)
791
+ }
792
+
793
+ async writeDataAsync(data) {
794
+ // CRITICAL FIX: Ensure directory exists before writing
795
+ const dir = path.dirname(this.file)
796
+ await fs.promises.mkdir(dir, { recursive: true })
797
+
798
+ await fs.promises.appendFile(this.file, data)
799
+ }
800
+
801
+ /**
802
+ * Check if data appears to be binary (always false since we only use JSON now)
803
+ */
804
+ isBinaryData(data) {
805
+ // All data is now JSON format
806
+ return false
807
+ }
808
+
809
+ /**
810
+ * Check if file is binary (always false since we only use JSON now)
811
+ */
812
+ async isBinaryFile() {
813
+ // All files are now JSON format
814
+ return false
815
+ }
816
+
817
+ async readLastLine() {
818
+ // Use global read limiter to prevent file descriptor exhaustion
819
+ return this.readLimiter(async () => {
820
+ // Check if file exists before trying to read it
821
+ if (!await this.exists()) {
822
+ return null // Return null if file doesn't exist
823
+ }
824
+
825
+ const reader = await fs.promises.open(this.file, 'r')
826
+ try {
827
+ const { size } = await reader.stat()
828
+ if (size < 1) throw 'empty file'
829
+ this.size = size
830
+ const bufferSize = 16384
831
+ let buffer, isFirstRead = true, lastReadSize, readPosition = Math.max(size - bufferSize, 0)
832
+ while (readPosition >= 0) {
833
+ const readSize = Math.min(bufferSize, size - readPosition)
834
+ if (readSize !== lastReadSize) {
835
+ lastReadSize = readSize
836
+ buffer = Buffer.alloc(readSize)
837
+ }
838
+ const { bytesRead } = await reader.read(buffer, 0, isFirstRead ? (readSize - 1) : readSize, readPosition)
839
+ if (isFirstRead) isFirstRead = false
840
+ if (bytesRead === 0) break
841
+ const newlineIndex = buffer.lastIndexOf(10)
842
+ const start = readPosition + newlineIndex + 1
843
+ if (newlineIndex !== -1) {
844
+ const lastLine = Buffer.alloc(size - start)
845
+ await reader.read(lastLine, 0, size - start, start)
846
+ if (!lastLine || !lastLine.length) {
847
+ throw 'no metadata or empty file'
848
+ }
849
+ return lastLine
850
+ } else {
851
+ readPosition -= bufferSize
852
+ }
853
+ }
854
+ } catch (e) {
855
+ String(e).includes('empty file') || console.error('Error reading last line:', e)
856
+ } finally {
857
+ reader.close()
858
+ }
859
+ })
860
+ }
861
+
862
+ /**
863
+ * Read records with streaming using readline
864
+ * @param {Object} criteria - Filter criteria
865
+ * @param {Object} options - Options (limit, skip)
866
+ * @param {Function} matchesCriteria - Function to check if record matches criteria
867
+ * @returns {Promise<Array>} - Array of records
868
+ */
869
+ async readWithStreaming(criteria, options = {}, matchesCriteria, serializer = null) {
870
+ // CRITICAL: Always use file mutex to prevent concurrent file operations
871
+ if (this.fileMutex) {
872
+ return this.fileMutex.runExclusive(async () => {
873
+ // Add a small delay to ensure any pending operations complete
874
+ await new Promise(resolve => setTimeout(resolve, 5));
875
+ // Use global read limiter to prevent file descriptor exhaustion
876
+ return this.readLimiter(() => this._readWithStreamingRetry(criteria, options, matchesCriteria, serializer));
877
+ });
878
+ } else {
879
+ // Use global read limiter to prevent file descriptor exhaustion
880
+ return this.readLimiter(() => this._readWithStreamingRetry(criteria, options, matchesCriteria, serializer));
881
+ }
882
+ }
883
+
884
+ async _readWithStreamingRetry(criteria, options = {}, matchesCriteria, serializer = null) {
885
+ // If no timeout configured, use original implementation without retry
886
+ if (!options.ioTimeoutMs) {
887
+ return this._readWithStreamingInternal(criteria, options, matchesCriteria, serializer);
888
+ }
889
+
890
+ const timeoutMs = options.ioTimeoutMs || 5000; // Default 5s timeout per attempt
891
+ const maxRetries = options.maxRetries || 3;
892
+
893
+ return pRetry(async (attempt) => {
894
+ const controller = new AbortController();
895
+ const timeout = setTimeout(() => controller.abort(), timeoutMs);
896
+
897
+ try {
898
+ const results = await this._readWithStreamingInternal(criteria, options, matchesCriteria, serializer, controller.signal);
899
+ return results;
900
+ } catch (error) {
901
+ if (error.name === 'AbortError' || error.code === 'ETIMEDOUT') {
902
+ if (this.opts.debugMode) {
903
+ console.log(`⚠️ Streaming read attempt ${attempt} timed out, retrying...`);
904
+ }
905
+ throw error; // p-retry will retry
906
+ }
907
+ // For other errors, don't retry
908
+ throw new pRetry.AbortError(error);
909
+ } finally {
910
+ clearTimeout(timeout);
911
+ }
912
+ }, {
913
+ retries: maxRetries,
914
+ minTimeout: 100,
915
+ maxTimeout: 1000,
916
+ onFailedAttempt: (error) => {
917
+ if (this.opts.debugMode) {
918
+ console.log(`Streaming read failed (attempt ${error.attemptNumber}), ${error.retriesLeft} retries left`);
919
+ }
920
+ }
921
+ });
922
+ }
923
+
924
+ async _readWithStreamingInternal(criteria, options = {}, matchesCriteria, serializer = null, signal = null) {
925
+ const { limit, skip = 0 } = options; // No default limit
926
+ const results = [];
927
+ let lineNumber = 0;
928
+ let processed = 0;
929
+ let skipped = 0;
930
+ let matched = 0;
931
+
932
+ try {
933
+ // Check if file exists before trying to read it
934
+ if (!await this.exists()) {
935
+ return results; // Return empty results if file doesn't exist
936
+ }
937
+
938
+ // All files are now JSONL format - use line-by-line reading
939
+ // Create optimized read stream
940
+ const stream = fs.createReadStream(this.file, {
941
+ highWaterMark: 64 * 1024, // 64KB chunks
942
+ encoding: 'utf8'
943
+ });
944
+
945
+ // Create readline interface
946
+ const rl = readline.createInterface({
947
+ input: stream,
948
+ crlfDelay: Infinity // Better performance
949
+ });
950
+
951
+ // Handle abort signal
952
+ if (signal) {
953
+ signal.addEventListener('abort', () => {
954
+ stream.destroy();
955
+ rl.close();
956
+ });
957
+ }
958
+
959
+ // Process line by line
960
+ for await (const line of rl) {
961
+ if (signal && signal.aborted) {
962
+ break; // Stop if aborted
963
+ }
964
+
965
+ lineNumber++;
966
+
967
+ // Skip lines that were already processed in previous attempts
968
+ if (lineNumber <= skip) {
969
+ skipped++;
970
+ continue;
971
+ }
972
+
973
+ try {
974
+ let record;
975
+ if (serializer && typeof serializer.deserialize === 'function') {
976
+ // Use serializer for deserialization
977
+ record = serializer.deserialize(line);
978
+ } else {
979
+ // Fallback to JSON.parse for backward compatibility
980
+ record = JSON.parse(line);
981
+ }
982
+
983
+ if (record && matchesCriteria(record, criteria)) {
984
+ // Return raw data - term mapping will be handled by Database layer
985
+ results.push({ ...record, _: lineNumber });
986
+ matched++;
987
+
988
+ // Check if we've reached the limit
989
+ if (results.length >= limit) {
990
+ break;
991
+ }
992
+ }
993
+ } catch (error) {
994
+ // CRITICAL FIX: Only log errors if they're not expected during concurrent operations
995
+ // Don't log JSON parsing errors that occur during file writes
996
+ if (this.opts && this.opts.debugMode && !error.message.includes('Unexpected')) {
997
+ console.log(`Error reading line ${lineNumber}:`, error.message);
998
+ }
999
+ // Ignore invalid lines - they may be partial writes
1000
+ }
1001
+
1002
+ processed++;
1003
+ }
1004
+
1005
+ if (this.opts && this.opts.debugMode) {
1006
+ console.log(`📊 Streaming read completed: ${results.length} results, ${processed} processed, ${skipped} skipped, ${matched} matched`);
1007
+ }
1008
+
1009
+ return results;
1010
+
1011
+ } catch (error) {
1012
+ if (error.message === 'AbortError') {
1013
+ // Return partial results if aborted
1014
+ return results;
1015
+ }
1016
+ console.error('Error in readWithStreaming:', error);
1017
+ throw error;
1018
+ }
1019
+ }
1020
+
1021
+ /**
1022
+ * Count records with streaming
1023
+ * @param {Object} criteria - Filter criteria
1024
+ * @param {Object} options - Options (limit)
1025
+ * @param {Function} matchesCriteria - Function to check if record matches criteria
1026
+ * @returns {Promise<number>} - Number of records
1027
+ */
1028
+ async countWithStreaming(criteria, options = {}, matchesCriteria, serializer = null) {
1029
+ const { limit } = options;
1030
+ let count = 0;
1031
+ let processed = 0;
1032
+
1033
+ try {
1034
+ const stream = fs.createReadStream(this.file, {
1035
+ highWaterMark: 64 * 1024,
1036
+ encoding: 'utf8'
1037
+ });
1038
+
1039
+ const rl = readline.createInterface({
1040
+ input: stream,
1041
+ crlfDelay: Infinity
1042
+ });
1043
+
1044
+ for await (const line of rl) {
1045
+ if (limit && count >= limit) {
1046
+ break;
1047
+ }
1048
+
1049
+ try {
1050
+ let record;
1051
+ if (serializer) {
1052
+ // Use serializer for deserialization
1053
+ record = await serializer.deserialize(line);
1054
+ } else {
1055
+ // Fallback to JSON.parse for backward compatibility
1056
+ record = JSON.parse(line);
1057
+ }
1058
+
1059
+ if (record && matchesCriteria(record, criteria)) {
1060
+ count++;
1061
+ }
1062
+ } catch (error) {
1063
+ // Ignore invalid lines
1064
+ }
1065
+
1066
+ processed++;
1067
+ }
1068
+
1069
+ return count;
1070
+
1071
+ } catch (error) {
1072
+ throw error;
1073
+ }
1074
+ }
1075
+
1076
+ /**
1077
+ * Get file statistics
1078
+ * @returns {Promise<Object>} - File statistics
1079
+ */
1080
+ async getFileStats() {
1081
+ try {
1082
+ const stats = await fs.promises.stat(this.file);
1083
+ const lineCount = await this.countLines();
1084
+
1085
+ return {
1086
+ filePath: this.file,
1087
+ size: stats.size,
1088
+ lineCount,
1089
+ lastModified: stats.mtime
1090
+ };
1091
+ } catch (error) {
1092
+ throw error;
1093
+ }
1094
+ }
1095
+
1096
+ /**
1097
+ * Count lines in file
1098
+ * @returns {Promise<number>} - Number of lines
1099
+ */
1100
+ async countLines() {
1101
+ let lineCount = 0;
1102
+
1103
+ try {
1104
+ const stream = fs.createReadStream(this.file, {
1105
+ highWaterMark: 64 * 1024,
1106
+ encoding: 'utf8'
1107
+ });
1108
+
1109
+ const rl = readline.createInterface({
1110
+ input: stream,
1111
+ crlfDelay: Infinity
1112
+ });
1113
+
1114
+ for await (const line of rl) {
1115
+ lineCount++;
1116
+ }
1117
+
1118
+ return lineCount;
1119
+ } catch (error) {
1120
+ throw error;
1121
+ }
1122
+ }
1123
+
1124
+ async destroy() {
1125
+ // CRITICAL FIX: Close all file handles to prevent resource leaks
1126
+ try {
1127
+ // Close any open file descriptors
1128
+ if (this.fd) {
1129
+ await this.fd.close().catch(() => {})
1130
+ this.fd = null
1131
+ }
1132
+
1133
+ // Close any open readers/writers
1134
+ if (this.reader) {
1135
+ await this.reader.close().catch(() => {})
1136
+ this.reader = null
1137
+ }
1138
+
1139
+ if (this.writer) {
1140
+ await this.writer.close().catch(() => {})
1141
+ this.writer = null
1142
+ }
1143
+
1144
+ // Clear any cached file handles
1145
+ this.cachedFd = null
1146
+
1147
+ } catch (error) {
1148
+ // Ignore errors during cleanup
1149
+ }
1150
+ }
1151
+
1152
+ async delete() {
1153
+ try {
1154
+ // Delete main file
1155
+ await fs.promises.unlink(this.file).catch(() => {})
1156
+
1157
+ // Delete index file (which now contains both index and offsets data)
1158
+ await fs.promises.unlink(this.indexFile).catch(() => {})
1159
+ } catch (error) {
1160
+ // Ignore errors if files don't exist
1161
+ }
1162
+ }
1163
+
1164
+ async writeAll(data) {
1165
+ const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
1166
+ try {
1167
+ // Use Windows-specific retry logic for file operations
1168
+ await this._writeWithRetry(data)
1169
+ } finally {
1170
+ release()
1171
+ }
1172
+ }
1173
+
1174
+ /**
1175
+ * Optimized batch write operation (OPTIMIZATION)
1176
+ * @param {Array} dataChunks - Array of data chunks to write
1177
+ * @param {boolean} append - Whether to append or overwrite
1178
+ */
1179
+ async writeBatch(dataChunks, append = false) {
1180
+ if (!dataChunks || !dataChunks.length) return
1181
+
1182
+ const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
1183
+ try {
1184
+ // OPTIMIZATION: Use streaming write for better performance
1185
+ if (dataChunks.length === 1 && Buffer.isBuffer(dataChunks[0])) {
1186
+ // Single buffer - use direct write
1187
+ if (append) {
1188
+ await fs.promises.appendFile(this.file, dataChunks[0])
1189
+ } else {
1190
+ await this._writeFileWithRetry(this.file, dataChunks[0])
1191
+ }
1192
+ } else {
1193
+ // Multiple chunks - use streaming approach
1194
+ await this._writeBatchStreaming(dataChunks, append)
1195
+ }
1196
+ } finally {
1197
+ release()
1198
+ }
1199
+ }
1200
+
1201
+ /**
1202
+ * OPTIMIZATION: Streaming write for multiple chunks
1203
+ * @param {Array} dataChunks - Array of data chunks to write
1204
+ * @param {boolean} append - Whether to append or overwrite
1205
+ */
1206
+ async _writeBatchStreaming(dataChunks, append = false) {
1207
+ // OPTIMIZATION: Use createWriteStream for better performance
1208
+ const writeStream = fs.createWriteStream(this.file, {
1209
+ flags: append ? 'a' : 'w',
1210
+ highWaterMark: 64 * 1024 // 64KB buffer
1211
+ })
1212
+
1213
+ return new Promise((resolve, reject) => {
1214
+ writeStream.on('error', reject)
1215
+ writeStream.on('finish', resolve)
1216
+
1217
+ // Write chunks sequentially
1218
+ let index = 0
1219
+ const writeNext = () => {
1220
+ if (index >= dataChunks.length) {
1221
+ writeStream.end()
1222
+ return
1223
+ }
1224
+
1225
+ const chunk = dataChunks[index++]
1226
+ const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk, 'utf8')
1227
+
1228
+ if (!writeStream.write(buffer)) {
1229
+ writeStream.once('drain', writeNext)
1230
+ } else {
1231
+ writeNext()
1232
+ }
1233
+ }
1234
+
1235
+ writeNext()
1236
+ })
1237
+ }
1238
+
1239
+ /**
1240
+ * Optimized append operation for single data chunk (OPTIMIZATION)
1241
+ * @param {string|Buffer} data - Data to append
1242
+ */
1243
+ async appendOptimized(data) {
1244
+ const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
1245
+ try {
1246
+ // OPTIMIZATION: Direct append without retry logic for better performance
1247
+ await fs.promises.appendFile(this.file, data)
1248
+ } finally {
1249
+ release()
1250
+ }
1251
+ }
1252
+
1253
+ /**
1254
+ * Windows-specific retry logic for fs.promises.writeFile operations
1255
+ * Based on node-graceful-fs workarounds for EPERM issues
1256
+ */
1257
+ async _writeFileWithRetry(filePath, data, maxRetries = 3) {
1258
+ const isWindows = process.platform === 'win32'
1259
+
1260
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
1261
+ try {
1262
+ // Ensure data is properly formatted as string or buffer
1263
+ if (Buffer.isBuffer(data)) {
1264
+ await fs.promises.writeFile(filePath, data)
1265
+ } else {
1266
+ await fs.promises.writeFile(filePath, data.toString())
1267
+ }
1268
+
1269
+ // Windows: add small delay after write operation
1270
+ // This helps prevent EPERM issues caused by file handle not being released immediately
1271
+ if (isWindows) {
1272
+ await new Promise(resolve => setTimeout(resolve, 10))
1273
+ }
1274
+
1275
+ // Success - return immediately
1276
+ return
1277
+
1278
+ } catch (err) {
1279
+ // Only retry on EPERM errors on Windows
1280
+ if (err.code === 'EPERM' && isWindows && attempt < maxRetries - 1) {
1281
+ // Exponential backoff: 10ms, 50ms, 250ms
1282
+ const delay = Math.pow(10, attempt + 1)
1283
+ await new Promise(resolve => setTimeout(resolve, delay))
1284
+ continue
1285
+ }
1286
+
1287
+ // Re-throw if not a retryable error or max retries reached
1288
+ throw err
1289
+ }
1290
+ }
1291
+ }
1292
+
1293
+ /**
1294
+ * Windows-specific retry logic for file operations
1295
+ * Based on node-graceful-fs workarounds for EPERM issues
1296
+ */
1297
+ async _writeWithRetry(data, maxRetries = 3) {
1298
+ const isWindows = process.platform === 'win32'
1299
+
1300
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
1301
+ try {
1302
+ // CRITICAL FIX: Ensure directory exists before writing file
1303
+ const dir = path.dirname(this.file)
1304
+ await fs.promises.mkdir(dir, { recursive: true })
1305
+
1306
+ const fd = await fs.promises.open(this.file, 'w')
1307
+ try {
1308
+ // Ensure data is properly formatted as string or buffer
1309
+ if (Buffer.isBuffer(data)) {
1310
+ await fd.write(data)
1311
+ } else {
1312
+ await fd.write(data.toString())
1313
+ }
1314
+ } finally {
1315
+ await fd.close()
1316
+
1317
+ // Windows: add small delay after closing file handle
1318
+ // This helps prevent EPERM issues caused by file handle not being released immediately
1319
+ if (isWindows) {
1320
+ await new Promise(resolve => setTimeout(resolve, 10))
1321
+ }
1322
+ }
1323
+
1324
+ // Success - return immediately
1325
+ return
1326
+
1327
+ } catch (err) {
1328
+ // Only retry on EPERM errors on Windows
1329
+ if (err.code === 'EPERM' && isWindows && attempt < maxRetries - 1) {
1330
+ // Exponential backoff: 10ms, 50ms, 250ms
1331
+ const delay = Math.pow(10, attempt + 1)
1332
+ await new Promise(resolve => setTimeout(resolve, delay))
1333
+ continue
1334
+ }
1335
+
1336
+ // Re-throw if not a retryable error or max retries reached
1337
+ throw err
1338
+ }
1339
+ }
1340
+ }
1341
+
1342
+ async readAll() {
1343
+ const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
1344
+ try {
1345
+ // Check if file exists before trying to read it
1346
+ if (!await this.exists()) {
1347
+ return '' // Return empty string if file doesn't exist
1348
+ }
1349
+
1350
+ const fd = await fs.promises.open(this.file, 'r')
1351
+ try {
1352
+ const stats = await fd.stat()
1353
+ const buffer = Buffer.allocUnsafe(stats.size)
1354
+ await fd.read(buffer, 0, stats.size, 0)
1355
+ return buffer.toString('utf8')
1356
+ } finally {
1357
+ await fd.close()
1358
+ }
1359
+ } finally {
1360
+ release()
1361
+ }
1362
+ }
1363
+
1364
+ /**
1365
+ * Read specific lines from the file using line numbers
1366
+ * This is optimized for partial reads when using indexed queries
1367
+ * @param {number[]} lineNumbers - Array of line numbers to read (1-based)
1368
+ * @returns {Promise<string>} - Content of the specified lines
1369
+ */
1370
+ async readSpecificLines(lineNumbers) {
1371
+ if (!lineNumbers || lineNumbers.length === 0) {
1372
+ return ''
1373
+ }
1374
+
1375
+ const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
1376
+ try {
1377
+ // Check if file exists before trying to read it
1378
+ if (!await this.exists()) {
1379
+ return '' // Return empty string if file doesn't exist
1380
+ }
1381
+
1382
+ const fd = await fs.promises.open(this.file, 'r')
1383
+ try {
1384
+ const stats = await fd.stat()
1385
+ const buffer = Buffer.allocUnsafe(stats.size)
1386
+ await fd.read(buffer, 0, stats.size, 0)
1387
+
1388
+ // CRITICAL FIX: Ensure proper UTF-8 decoding for multi-byte characters
1389
+ let content
1390
+ try {
1391
+ content = buffer.toString('utf8')
1392
+ } catch (error) {
1393
+ // If UTF-8 decoding fails, try to recover by finding valid UTF-8 boundaries
1394
+ if (this.opts.debugMode) {
1395
+ console.warn(`UTF-8 decoding failed for file ${this.file}, attempting recovery`)
1396
+ }
1397
+
1398
+ // Find the last complete UTF-8 character
1399
+ let validLength = buffer.length
1400
+ for (let i = buffer.length - 1; i >= 0; i--) {
1401
+ const byte = buffer[i]
1402
+ // CRITICAL FIX: Correct UTF-8 start character detection
1403
+ // Check if this is the start of a UTF-8 character (not a continuation byte)
1404
+ if ((byte & 0x80) === 0 || // ASCII (1 byte) - 0xxxxxxx
1405
+ (byte & 0xE0) === 0xC0 || // 2-byte UTF-8 start - 110xxxxx
1406
+ (byte & 0xF0) === 0xE0 || // 3-byte UTF-8 start - 1110xxxx
1407
+ (byte & 0xF8) === 0xF0) { // 4-byte UTF-8 start - 11110xxx
1408
+ validLength = i + 1
1409
+ break
1410
+ }
1411
+ }
1412
+
1413
+ const validBuffer = buffer.subarray(0, validLength)
1414
+ content = validBuffer.toString('utf8')
1415
+ }
1416
+
1417
+ // Split content into lines and extract only the requested lines
1418
+ const lines = content.split('\n')
1419
+ const result = []
1420
+
1421
+ for (const lineNum of lineNumbers) {
1422
+ // Convert to 0-based index and check bounds
1423
+ const index = lineNum - 1
1424
+ if (index >= 0 && index < lines.length) {
1425
+ result.push(lines[index])
1426
+ }
1427
+ }
1428
+
1429
+ return result.join('\n')
1430
+ } finally {
1431
+ await fd.close()
1432
+ }
1433
+ } finally {
1434
+ release()
1435
+ }
1436
+ }
1437
+
1438
+ }
1439
+