jexidb 2.0.2 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/.babelrc +13 -0
  2. package/.gitattributes +2 -0
  3. package/CHANGELOG.md +140 -0
  4. package/LICENSE +21 -21
  5. package/README.md +301 -527
  6. package/babel.config.json +5 -0
  7. package/dist/Database.cjs +3896 -0
  8. package/docs/API.md +1051 -0
  9. package/docs/EXAMPLES.md +701 -0
  10. package/docs/README.md +194 -0
  11. package/examples/iterate-usage-example.js +157 -0
  12. package/examples/simple-iterate-example.js +115 -0
  13. package/jest.config.js +24 -0
  14. package/package.json +63 -51
  15. package/scripts/README.md +47 -0
  16. package/scripts/clean-test-files.js +75 -0
  17. package/scripts/prepare.js +31 -0
  18. package/scripts/run-tests.js +80 -0
  19. package/src/Database.mjs +4130 -0
  20. package/src/FileHandler.mjs +1101 -0
  21. package/src/OperationQueue.mjs +279 -0
  22. package/src/SchemaManager.mjs +268 -0
  23. package/src/Serializer.mjs +511 -0
  24. package/src/managers/ConcurrencyManager.mjs +257 -0
  25. package/src/managers/IndexManager.mjs +1403 -0
  26. package/src/managers/QueryManager.mjs +1273 -0
  27. package/src/managers/StatisticsManager.mjs +262 -0
  28. package/src/managers/StreamingProcessor.mjs +429 -0
  29. package/src/managers/TermManager.mjs +278 -0
  30. package/test/$not-operator-with-and.test.js +282 -0
  31. package/test/README.md +8 -0
  32. package/test/close-init-cycle.test.js +256 -0
  33. package/test/critical-bugs-fixes.test.js +1069 -0
  34. package/test/index-persistence.test.js +306 -0
  35. package/test/index-serialization.test.js +314 -0
  36. package/test/indexed-query-mode.test.js +360 -0
  37. package/test/iterate-method.test.js +272 -0
  38. package/test/query-operators.test.js +238 -0
  39. package/test/regex-array-fields.test.js +129 -0
  40. package/test/score-method.test.js +238 -0
  41. package/test/setup.js +17 -0
  42. package/test/term-mapping-minimal.test.js +154 -0
  43. package/test/term-mapping-simple.test.js +257 -0
  44. package/test/term-mapping.test.js +514 -0
  45. package/test/writebuffer-flush-resilience.test.js +204 -0
  46. package/dist/FileHandler.js +0 -688
  47. package/dist/IndexManager.js +0 -353
  48. package/dist/IntegrityChecker.js +0 -364
  49. package/dist/JSONLDatabase.js +0 -1194
  50. package/dist/index.js +0 -617
  51. package/src/FileHandler.js +0 -674
  52. package/src/IndexManager.js +0 -363
  53. package/src/IntegrityChecker.js +0 -379
  54. package/src/JSONLDatabase.js +0 -1248
  55. package/src/index.js +0 -608
@@ -0,0 +1,1101 @@
1
+ import fs from 'fs'
2
+ import path from 'path'
3
+ import readline from 'readline'
4
+ import pLimit from 'p-limit'
5
+
6
+ export default class FileHandler {
7
+ constructor(file, fileMutex = null, opts = {}) {
8
+ this.file = file
9
+ this.indexFile = file.replace(/\.jdb$/, '.idx.jdb')
10
+ this.fileMutex = fileMutex
11
+ this.opts = opts
12
+ this.maxBufferSize = opts.maxBufferSize || 4 * 1024 * 1024 // 4MB default
13
+ }
14
+
15
+ async truncate(offset) {
16
+ try {
17
+ await fs.promises.access(this.file, fs.constants.F_OK)
18
+ await fs.promises.truncate(this.file, offset)
19
+ } catch (err) {
20
+ await fs.promises.writeFile(this.file, '')
21
+ }
22
+ }
23
+
24
+ async writeOffsets(data) {
25
+ // Write offsets to the index file (will be combined with index data)
26
+ await fs.promises.writeFile(this.indexFile, data)
27
+ }
28
+
29
+ async readOffsets() {
30
+ try {
31
+ return await fs.promises.readFile(this.indexFile)
32
+ } catch (err) {
33
+ return null
34
+ }
35
+ }
36
+
37
+ async writeIndex(data) {
38
+ // Write index data to the index file (will be combined with offsets)
39
+ // Use Windows-specific retry logic for file operations
40
+ await this._writeFileWithRetry(this.indexFile, data)
41
+ }
42
+
43
+ async readIndex() {
44
+ try {
45
+ return await fs.promises.readFile(this.indexFile)
46
+ } catch (err) {
47
+ return null
48
+ }
49
+ }
50
+
51
+ async exists() {
52
+ try {
53
+ await fs.promises.access(this.file, fs.constants.F_OK)
54
+ return true
55
+ } catch (err) {
56
+ return false
57
+ }
58
+ }
59
+
60
+
61
+ async indexExists() {
62
+ try {
63
+ await fs.promises.access(this.indexFile, fs.constants.F_OK)
64
+ return true
65
+ } catch (err) {
66
+ return false
67
+ }
68
+ }
69
+
70
+ async isLegacyFormat() {
71
+ if (!await this.exists()) return false
72
+ if (await this.indexExists()) return false
73
+
74
+ // Check if main file contains offsets at the end (legacy format)
75
+ try {
76
+ const lastLine = await this.readLastLine()
77
+ if (!lastLine || !lastLine.length) return false
78
+
79
+ // Try to parse as offsets array
80
+ const content = lastLine.toString('utf-8').trim()
81
+ const parsed = JSON.parse(content)
82
+ return Array.isArray(parsed)
83
+ } catch (err) {
84
+ return false
85
+ }
86
+ }
87
+
88
+ async migrateLegacyFormat(serializer) {
89
+ if (!await this.isLegacyFormat()) return false
90
+
91
+ console.log('Migrating from legacy format to new 3-file format...')
92
+
93
+ // Read the legacy file
94
+ const lastLine = await this.readLastLine()
95
+ const offsets = JSON.parse(lastLine.toString('utf-8').trim())
96
+
97
+ // Get index offset and truncate offsets array
98
+ const indexOffset = offsets[offsets.length - 2]
99
+ const dataOffsets = offsets.slice(0, -2)
100
+
101
+ // Read index data
102
+ const indexStart = indexOffset
103
+ const indexEnd = offsets[offsets.length - 1]
104
+ const indexBuffer = await this.readRange(indexStart, indexEnd)
105
+ const indexData = await serializer.deserialize(indexBuffer)
106
+
107
+ // Write offsets to separate file
108
+ const offsetsString = await serializer.serialize(dataOffsets, { linebreak: false })
109
+ await this.writeOffsets(offsetsString)
110
+
111
+ // Write index to separate file
112
+ const indexString = await serializer.serialize(indexData, { linebreak: false })
113
+ await this.writeIndex(indexString)
114
+
115
+ // Truncate main file to remove index and offsets
116
+ await this.truncate(indexOffset)
117
+
118
+ console.log('Migration completed successfully!')
119
+ return true
120
+ }
121
+
122
+ async readRange(start, end) {
123
+ // Check if file exists before trying to read it
124
+ if (!await this.exists()) {
125
+ return Buffer.alloc(0) // Return empty buffer if file doesn't exist
126
+ }
127
+
128
+ let fd = await fs.promises.open(this.file, 'r')
129
+ try {
130
+ // CRITICAL FIX: Check file size before attempting to read
131
+ const stats = await fd.stat()
132
+ const fileSize = stats.size
133
+
134
+ // If start position is beyond file size, return empty buffer
135
+ if (start >= fileSize) {
136
+ await fd.close()
137
+ return Buffer.alloc(0)
138
+ }
139
+
140
+ // Adjust end position if it's beyond file size
141
+ const actualEnd = Math.min(end, fileSize)
142
+ const length = actualEnd - start
143
+
144
+ // If length is 0 or negative, return empty buffer
145
+ if (length <= 0) {
146
+ await fd.close()
147
+ return Buffer.alloc(0)
148
+ }
149
+
150
+ let buffer = Buffer.alloc(length)
151
+ const { bytesRead } = await fd.read(buffer, 0, length, start)
152
+ await fd.close()
153
+
154
+ // CRITICAL FIX: Ensure we read the expected amount of data
155
+ if (bytesRead !== length) {
156
+ const errorMsg = `CRITICAL: Expected to read ${length} bytes, but read ${bytesRead} bytes at position ${start}`
157
+ console.error(`⚠️ ${errorMsg}`)
158
+
159
+ // This indicates a race condition or file corruption
160
+ // Don't retry - the caller should handle synchronization properly
161
+ if (bytesRead === 0) {
162
+ throw new Error(`File corruption detected: ${errorMsg}`)
163
+ }
164
+
165
+ // Return partial data with warning - caller should handle this
166
+ return buffer.subarray(0, bytesRead)
167
+ }
168
+
169
+ return buffer
170
+ } catch (error) {
171
+ await fd.close().catch(() => {})
172
+ throw error
173
+ }
174
+ }
175
+
176
+ async readRanges(ranges, mapper) {
177
+ const lines = {}, limit = pLimit(4)
178
+
179
+ // Check if file exists before trying to read it
180
+ if (!await this.exists()) {
181
+ return lines // Return empty object if file doesn't exist
182
+ }
183
+
184
+ const fd = await fs.promises.open(this.file, 'r')
185
+ const groupedRanges = await this.groupedRanges(ranges)
186
+ try {
187
+ await Promise.allSettled(groupedRanges.map(async (groupedRange) => {
188
+ await limit(async () => {
189
+ for await (const row of this.readGroupedRange(groupedRange, fd)) {
190
+ lines[row.start] = mapper ? (await mapper(row.line, { start: row.start, end: row.start + row.line.length })) : row.line
191
+ }
192
+ })
193
+ }))
194
+ } catch (e) {
195
+ console.error('Error reading ranges:', e)
196
+ } finally {
197
+ await fd.close()
198
+ }
199
+ return lines
200
+ }
201
+
202
+ async groupedRanges(ranges) { // expects ordered ranges from Database.getRanges()
203
+ const readSize = 512 * 1024 // 512KB
204
+ const groupedRanges = []
205
+ let currentGroup = []
206
+ let currentSize = 0
207
+
208
+ // each range is a {start: number, end: number} object
209
+ for (let i = 0; i < ranges.length; i++) {
210
+ const range = ranges[i]
211
+ const rangeSize = range.end - range.start
212
+
213
+ if (currentGroup.length > 0) {
214
+ const lastRange = currentGroup[currentGroup.length - 1]
215
+ if (lastRange.end !== range.start || currentSize + rangeSize > readSize) {
216
+ groupedRanges.push(currentGroup)
217
+ currentGroup = []
218
+ currentSize = 0
219
+ }
220
+ }
221
+
222
+ currentGroup.push(range)
223
+ currentSize += rangeSize
224
+ }
225
+
226
+ if (currentGroup.length > 0) {
227
+ groupedRanges.push(currentGroup)
228
+ }
229
+
230
+ return groupedRanges
231
+ }
232
+
233
+ async *readGroupedRange(groupedRange, fd) {
234
+ if (groupedRange.length === 0) return
235
+
236
+ // OPTIMIZATION: For single range, use direct approach
237
+ if (groupedRange.length === 1) {
238
+ const range = groupedRange[0]
239
+ const bufferSize = range.end - range.start
240
+
241
+ if (bufferSize <= 0 || bufferSize > this.maxBufferSize) {
242
+ throw new Error(`Invalid buffer size: ${bufferSize}. Start: ${range.start}, End: ${range.end}. Max allowed: ${this.maxBufferSize}`)
243
+ }
244
+
245
+ const buffer = Buffer.allocUnsafe(bufferSize)
246
+ const { bytesRead } = await fd.read(buffer, 0, bufferSize, range.start)
247
+ const actualBuffer = bytesRead < bufferSize ? buffer.subarray(0, bytesRead) : buffer
248
+
249
+ if (actualBuffer.length === 0) return
250
+
251
+ let lineString
252
+ try {
253
+ lineString = actualBuffer.toString('utf8')
254
+ } catch (error) {
255
+ lineString = actualBuffer.toString('utf8', { replacement: '?' })
256
+ }
257
+
258
+ yield {
259
+ line: lineString,
260
+ start: range.start,
261
+ _: range.index !== undefined ? range.index : (range._ || null)
262
+ }
263
+ return
264
+ }
265
+
266
+ // OPTIMIZATION: For multiple ranges, read as single buffer and split by offsets
267
+ const firstRange = groupedRange[0]
268
+ const lastRange = groupedRange[groupedRange.length - 1]
269
+ const totalSize = lastRange.end - firstRange.start
270
+
271
+ if (totalSize <= 0 || totalSize > this.maxBufferSize) {
272
+ throw new Error(`Invalid total buffer size: ${totalSize}. Start: ${firstRange.start}, End: ${lastRange.end}. Max allowed: ${this.maxBufferSize}`)
273
+ }
274
+
275
+ // Read entire grouped range as single buffer
276
+ const buffer = Buffer.allocUnsafe(totalSize)
277
+ const { bytesRead } = await fd.read(buffer, 0, totalSize, firstRange.start)
278
+ const actualBuffer = bytesRead < totalSize ? buffer.subarray(0, bytesRead) : buffer
279
+
280
+ if (actualBuffer.length === 0) return
281
+
282
+ // Convert to string once
283
+ let content
284
+ try {
285
+ content = actualBuffer.toString('utf8')
286
+ } catch (error) {
287
+ content = actualBuffer.toString('utf8', { replacement: '?' })
288
+ }
289
+
290
+ // CRITICAL FIX: Handle ranges more carefully to prevent corruption
291
+ if (groupedRange.length === 2 && groupedRange[0].end === groupedRange[1].start) {
292
+ // Special case: Adjacent ranges - split by newlines to prevent corruption
293
+ const lines = content.split('\n').filter(line => line.trim().length > 0)
294
+
295
+ for (let i = 0; i < Math.min(lines.length, groupedRange.length); i++) {
296
+ const range = groupedRange[i]
297
+ yield {
298
+ line: lines[i],
299
+ start: range.start,
300
+ _: range.index !== undefined ? range.index : (range._ || null)
301
+ }
302
+ }
303
+ } else {
304
+ // Original logic for non-adjacent ranges
305
+ for (let i = 0; i < groupedRange.length; i++) {
306
+ const range = groupedRange[i]
307
+ const relativeStart = range.start - firstRange.start
308
+ const relativeEnd = range.end - firstRange.start
309
+
310
+ // Extract the specific range content
311
+ const rangeContent = content.substring(relativeStart, relativeEnd)
312
+
313
+ if (rangeContent.length === 0) continue
314
+
315
+ yield {
316
+ line: rangeContent,
317
+ start: range.start,
318
+ _: range.index !== undefined ? range.index : (range._ || null)
319
+ }
320
+ }
321
+ }
322
+ }
323
+
324
+ async *walk(ranges) {
325
+ // Check if file exists before trying to read it
326
+ if (!await this.exists()) {
327
+ return // Return empty generator if file doesn't exist
328
+ }
329
+
330
+ const fd = await fs.promises.open(this.file, 'r')
331
+ try {
332
+ const groupedRanges = await this.groupedRanges(ranges)
333
+ for(const groupedRange of groupedRanges) {
334
+ for await (const row of this.readGroupedRange(groupedRange, fd)) {
335
+ yield row
336
+ }
337
+ }
338
+ } finally {
339
+ await fd.close()
340
+ }
341
+ }
342
+
343
+ async replaceLines(ranges, lines) {
344
+ // CRITICAL: Always use file mutex to prevent concurrent file operations
345
+ if (this.fileMutex) {
346
+ return this.fileMutex.runExclusive(async () => {
347
+ // Add a small delay to ensure any pending operations complete
348
+ await new Promise(resolve => setTimeout(resolve, 10));
349
+ return this._replaceLinesInternal(ranges, lines);
350
+ });
351
+ } else {
352
+ return this._replaceLinesInternal(ranges, lines);
353
+ }
354
+ }
355
+
356
+ async _replaceLinesInternal(ranges, lines) {
357
+ const tmpFile = this.file + '.tmp';
358
+ let writer, reader;
359
+
360
+ try {
361
+ writer = await fs.promises.open(tmpFile, 'w+');
362
+
363
+ // Check if the main file exists before trying to read it
364
+ if (await this.exists()) {
365
+ reader = await fs.promises.open(this.file, 'r');
366
+ } else {
367
+ // If file doesn't exist, we'll just write the new lines
368
+ reader = null;
369
+ }
370
+
371
+ // Sort ranges by start position to ensure correct order
372
+ const sortedRanges = [...ranges].sort((a, b) => a.start - b.start);
373
+
374
+ let position = 0;
375
+ let lineIndex = 0;
376
+
377
+ for (const range of sortedRanges) {
378
+ // Write existing content before the range (only if file exists)
379
+ if (reader && position < range.start) {
380
+ const buffer = await this.readRange(position, range.start);
381
+ await writer.write(buffer);
382
+ }
383
+
384
+ // Write new line if provided, otherwise skip the range (for delete operations)
385
+ if (lineIndex < lines.length && lines[lineIndex]) {
386
+ const line = lines[lineIndex];
387
+ // Ensure line ends with newline
388
+ let formattedBuffer;
389
+ if (Buffer.isBuffer(line)) {
390
+ const needsNewline = line.length === 0 || line[line.length - 1] !== 0x0A;
391
+ formattedBuffer = needsNewline ? Buffer.concat([line, Buffer.from('\n')]) : line;
392
+ } else {
393
+ const withNewline = line.endsWith('\n') ? line : line + '\n';
394
+ formattedBuffer = Buffer.from(withNewline, 'utf8');
395
+ }
396
+ await writer.write(formattedBuffer);
397
+ }
398
+
399
+ // Update position to range.end to avoid overlapping writes
400
+ position = range.end;
401
+ lineIndex++;
402
+ }
403
+
404
+ // Write remaining content after the last range (only if file exists)
405
+ if (reader) {
406
+ const { size } = await reader.stat();
407
+ if (position < size) {
408
+ const buffer = await this.readRange(position, size);
409
+ await writer.write(buffer);
410
+ }
411
+ }
412
+
413
+ // Ensure all data is written to disk
414
+ await writer.sync();
415
+ if (reader) await reader.close();
416
+ await writer.close();
417
+
418
+ // Validate the temp file before renaming
419
+ await this._validateTempFile(tmpFile);
420
+
421
+ // CRITICAL: Retry logic for Windows EPERM errors
422
+ await this._safeRename(tmpFile, this.file);
423
+
424
+ } catch (e) {
425
+ console.error('Erro ao substituir linhas:', e);
426
+ throw e;
427
+ } finally {
428
+ if (reader) await reader.close().catch(() => { });
429
+ if (writer) await writer.close().catch(() => { });
430
+ await fs.promises.unlink(tmpFile).catch(() => { });
431
+ }
432
+ }
433
+
434
+ async _safeRename(tmpFile, targetFile, maxRetries = 3) {
435
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
436
+ try {
437
+ await fs.promises.rename(tmpFile, targetFile);
438
+ return; // Success
439
+ } catch (error) {
440
+ if (error.code === 'EPERM' && attempt < maxRetries) {
441
+ // Quick delay: 50ms, 100ms, 200ms
442
+ const delay = 50 * attempt;
443
+ console.log(`🔄 EPERM retry ${attempt}/${maxRetries}, waiting ${delay}ms...`);
444
+ await new Promise(resolve => setTimeout(resolve, delay));
445
+ continue;
446
+ }
447
+
448
+ // If all retries failed, try Windows fallback approach
449
+ if (error.code === 'EPERM' && attempt === maxRetries) {
450
+ console.log(`⚠️ All EPERM retries failed, trying Windows fallback...`);
451
+ return this._windowsFallbackRename(tmpFile, targetFile);
452
+ }
453
+
454
+ throw error; // Re-throw if not EPERM or max retries reached
455
+ }
456
+ }
457
+ }
458
+
459
+ async _validateTempFile(tmpFile) {
460
+ try {
461
+ // Read the temp file and validate JSON structure
462
+ const content = await fs.promises.readFile(tmpFile, 'utf8');
463
+ const lines = content.split('\n').filter(line => line.trim());
464
+
465
+ let hasInvalidJson = false;
466
+ const validLines = [];
467
+
468
+ for (let i = 0; i < lines.length; i++) {
469
+ try {
470
+ JSON.parse(lines[i]);
471
+ validLines.push(lines[i]);
472
+ } catch (error) {
473
+ console.warn(`⚠️ Invalid JSON in temp file at line ${i + 1}, skipping:`, lines[i].substring(0, 100));
474
+ hasInvalidJson = true;
475
+ }
476
+ }
477
+
478
+ // If we found invalid JSON, rewrite the file with only valid lines
479
+ if (hasInvalidJson && validLines.length > 0) {
480
+ console.log(`🔧 Rewriting temp file with ${validLines.length} valid lines`);
481
+ const correctedContent = validLines.join('\n') + '\n';
482
+ await fs.promises.writeFile(tmpFile, correctedContent, 'utf8');
483
+ }
484
+
485
+ console.log(`✅ Temp file validation passed: ${validLines.length} valid JSON lines`);
486
+ } catch (error) {
487
+ console.error(`❌ Temp file validation failed:`, error.message);
488
+ throw error;
489
+ }
490
+ }
491
+
492
+ async _windowsFallbackRename(tmpFile, targetFile) {
493
+ try {
494
+ // Windows fallback: copy content instead of rename
495
+ console.log(`🔄 Using Windows fallback: copy + delete approach`);
496
+
497
+ // Validate temp file before copying
498
+ await this._validateTempFile(tmpFile);
499
+
500
+ // Read the temp file content
501
+ const content = await fs.promises.readFile(tmpFile, 'utf8');
502
+
503
+ // Write to target file directly
504
+ await fs.promises.writeFile(targetFile, content, 'utf8');
505
+
506
+ // Delete temp file
507
+ await fs.promises.unlink(tmpFile);
508
+
509
+ console.log(`✅ Windows fallback successful`);
510
+ return;
511
+ } catch (fallbackError) {
512
+ console.error(`❌ Windows fallback also failed:`, fallbackError);
513
+ throw fallbackError;
514
+ }
515
+ }
516
+
517
+ async writeData(data, immediate, fd) {
518
+ await fd.write(data)
519
+ }
520
+
521
+ async writeDataAsync(data) {
522
+ // CRITICAL FIX: Ensure directory exists before writing
523
+ const dir = path.dirname(this.file)
524
+ await fs.promises.mkdir(dir, { recursive: true })
525
+
526
+ await fs.promises.appendFile(this.file, data)
527
+ }
528
+
529
+ /**
530
+ * Check if data appears to be binary (always false since we only use JSON now)
531
+ */
532
+ isBinaryData(data) {
533
+ // All data is now JSON format
534
+ return false
535
+ }
536
+
537
+ /**
538
+ * Check if file is binary (always false since we only use JSON now)
539
+ */
540
+ async isBinaryFile() {
541
+ // All files are now JSON format
542
+ return false
543
+ }
544
+
545
+ async readLastLine() {
546
+ // Check if file exists before trying to read it
547
+ if (!await this.exists()) {
548
+ return null // Return null if file doesn't exist
549
+ }
550
+
551
+ const reader = await fs.promises.open(this.file, 'r')
552
+ try {
553
+ const { size } = await reader.stat()
554
+ if (size < 1) throw 'empty file'
555
+ this.size = size
556
+ const bufferSize = 16384
557
+ let buffer, isFirstRead = true, lastReadSize, readPosition = Math.max(size - bufferSize, 0)
558
+ while (readPosition >= 0) {
559
+ const readSize = Math.min(bufferSize, size - readPosition)
560
+ if (readSize !== lastReadSize) {
561
+ lastReadSize = readSize
562
+ buffer = Buffer.alloc(readSize)
563
+ }
564
+ const { bytesRead } = await reader.read(buffer, 0, isFirstRead ? (readSize - 1) : readSize, readPosition)
565
+ if (isFirstRead) isFirstRead = false
566
+ if (bytesRead === 0) break
567
+ const newlineIndex = buffer.lastIndexOf(10)
568
+ const start = readPosition + newlineIndex + 1
569
+ if (newlineIndex !== -1) {
570
+ const lastLine = Buffer.alloc(size - start)
571
+ await reader.read(lastLine, 0, size - start, start)
572
+ if (!lastLine || !lastLine.length) {
573
+ throw 'no metadata or empty file'
574
+ }
575
+ return lastLine
576
+ } else {
577
+ readPosition -= bufferSize
578
+ }
579
+ }
580
+ } catch (e) {
581
+ String(e).includes('empty file') || console.error('Error reading last line:', e)
582
+ } finally {
583
+ reader.close()
584
+ }
585
+ }
586
+
587
+ /**
588
+ * Read records with streaming using readline
589
+ * @param {Object} criteria - Filter criteria
590
+ * @param {Object} options - Options (limit, skip)
591
+ * @param {Function} matchesCriteria - Function to check if record matches criteria
592
+ * @returns {Promise<Array>} - Array of records
593
+ */
594
+ async readWithStreaming(criteria, options = {}, matchesCriteria, serializer = null) {
595
+ // CRITICAL: Always use file mutex to prevent concurrent file operations
596
+ if (this.fileMutex) {
597
+ return this.fileMutex.runExclusive(async () => {
598
+ // Add a small delay to ensure any pending operations complete
599
+ await new Promise(resolve => setTimeout(resolve, 5));
600
+ return this._readWithStreamingInternal(criteria, options, matchesCriteria, serializer);
601
+ });
602
+ } else {
603
+ return this._readWithStreamingInternal(criteria, options, matchesCriteria, serializer);
604
+ }
605
+ }
606
+
607
+ async _readWithStreamingInternal(criteria, options = {}, matchesCriteria, serializer = null) {
608
+ const { limit, skip = 0 } = options; // No default limit
609
+ const results = [];
610
+ let lineNumber = 0;
611
+ let processed = 0;
612
+ let skipped = 0;
613
+ let matched = 0;
614
+
615
+ try {
616
+ // Check if file exists before trying to read it
617
+ if (!await this.exists()) {
618
+ return results; // Return empty results if file doesn't exist
619
+ }
620
+
621
+ // All files are now JSONL format - use line-by-line reading
622
+ // Create optimized read stream
623
+ const stream = fs.createReadStream(this.file, {
624
+ highWaterMark: 64 * 1024, // 64KB chunks
625
+ encoding: 'utf8'
626
+ });
627
+
628
+ // Create readline interface
629
+ const rl = readline.createInterface({
630
+ input: stream,
631
+ crlfDelay: Infinity // Better performance
632
+ });
633
+
634
+ // Process line by line
635
+ for await (const line of rl) {
636
+ if (lineNumber >= skip) {
637
+ try {
638
+ let record;
639
+ if (serializer && typeof serializer.deserialize === 'function') {
640
+ // Use serializer for deserialization
641
+ record = serializer.deserialize(line);
642
+ } else {
643
+ // Fallback to JSON.parse for backward compatibility
644
+ record = JSON.parse(line);
645
+ }
646
+
647
+ if (record && matchesCriteria(record, criteria)) {
648
+ // Return raw data - term mapping will be handled by Database layer
649
+ results.push({ ...record, _: lineNumber });
650
+ matched++;
651
+
652
+ // Check if we've reached the limit
653
+ if (results.length >= limit) {
654
+ break;
655
+ }
656
+ }
657
+ } catch (error) {
658
+ // CRITICAL FIX: Only log errors if they're not expected during concurrent operations
659
+ // Don't log JSON parsing errors that occur during file writes
660
+ if (this.opts && this.opts.debugMode && !error.message.includes('Unexpected')) {
661
+ console.log(`Error reading line ${lineNumber}:`, error.message);
662
+ }
663
+ // Ignore invalid lines - they may be partial writes
664
+ }
665
+ } else {
666
+ skipped++;
667
+ }
668
+
669
+ lineNumber++;
670
+ processed++;
671
+ }
672
+
673
+ if (this.opts && this.opts.debugMode) {
674
+ console.log(`📊 Streaming read completed: ${results.length} results, ${processed} processed, ${skipped} skipped, ${matched} matched`);
675
+ }
676
+
677
+ return results;
678
+
679
+ } catch (error) {
680
+ console.error('Error in readWithStreaming:', error);
681
+ throw error;
682
+ }
683
+ }
684
+
685
+ /**
686
+ * Count records with streaming
687
+ * @param {Object} criteria - Filter criteria
688
+ * @param {Object} options - Options (limit)
689
+ * @param {Function} matchesCriteria - Function to check if record matches criteria
690
+ * @returns {Promise<number>} - Number of records
691
+ */
692
+ async countWithStreaming(criteria, options = {}, matchesCriteria, serializer = null) {
693
+ const { limit } = options;
694
+ let count = 0;
695
+ let processed = 0;
696
+
697
+ try {
698
+ const stream = fs.createReadStream(this.file, {
699
+ highWaterMark: 64 * 1024,
700
+ encoding: 'utf8'
701
+ });
702
+
703
+ const rl = readline.createInterface({
704
+ input: stream,
705
+ crlfDelay: Infinity
706
+ });
707
+
708
+ for await (const line of rl) {
709
+ if (limit && count >= limit) {
710
+ break;
711
+ }
712
+
713
+ try {
714
+ let record;
715
+ if (serializer) {
716
+ // Use serializer for deserialization
717
+ record = await serializer.deserialize(line);
718
+ } else {
719
+ // Fallback to JSON.parse for backward compatibility
720
+ record = JSON.parse(line);
721
+ }
722
+
723
+ if (record && matchesCriteria(record, criteria)) {
724
+ count++;
725
+ }
726
+ } catch (error) {
727
+ // Ignore invalid lines
728
+ }
729
+
730
+ processed++;
731
+ }
732
+
733
+ return count;
734
+
735
+ } catch (error) {
736
+ throw error;
737
+ }
738
+ }
739
+
740
+ /**
741
+ * Get file statistics
742
+ * @returns {Promise<Object>} - File statistics
743
+ */
744
+ async getFileStats() {
745
+ try {
746
+ const stats = await fs.promises.stat(this.file);
747
+ const lineCount = await this.countLines();
748
+
749
+ return {
750
+ filePath: this.file,
751
+ size: stats.size,
752
+ lineCount,
753
+ lastModified: stats.mtime
754
+ };
755
+ } catch (error) {
756
+ throw error;
757
+ }
758
+ }
759
+
760
+ /**
761
+ * Count lines in file
762
+ * @returns {Promise<number>} - Number of lines
763
+ */
764
+ async countLines() {
765
+ let lineCount = 0;
766
+
767
+ try {
768
+ const stream = fs.createReadStream(this.file, {
769
+ highWaterMark: 64 * 1024,
770
+ encoding: 'utf8'
771
+ });
772
+
773
+ const rl = readline.createInterface({
774
+ input: stream,
775
+ crlfDelay: Infinity
776
+ });
777
+
778
+ for await (const line of rl) {
779
+ lineCount++;
780
+ }
781
+
782
+ return lineCount;
783
+ } catch (error) {
784
+ throw error;
785
+ }
786
+ }
787
+
788
+ async destroy() {
789
+ // CRITICAL FIX: Close all file handles to prevent resource leaks
790
+ try {
791
+ // Close any open file descriptors
792
+ if (this.fd) {
793
+ await this.fd.close().catch(() => {})
794
+ this.fd = null
795
+ }
796
+
797
+ // Close any open readers/writers
798
+ if (this.reader) {
799
+ await this.reader.close().catch(() => {})
800
+ this.reader = null
801
+ }
802
+
803
+ if (this.writer) {
804
+ await this.writer.close().catch(() => {})
805
+ this.writer = null
806
+ }
807
+
808
+ // Clear any cached file handles
809
+ this.cachedFd = null
810
+
811
+ } catch (error) {
812
+ // Ignore errors during cleanup
813
+ }
814
+ }
815
+
816
+ async delete() {
817
+ try {
818
+ // Delete main file
819
+ await fs.promises.unlink(this.file).catch(() => {})
820
+
821
+ // Delete index file (which now contains both index and offsets data)
822
+ await fs.promises.unlink(this.indexFile).catch(() => {})
823
+ } catch (error) {
824
+ // Ignore errors if files don't exist
825
+ }
826
+ }
827
+
828
+ async writeAll(data) {
829
+ const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
830
+ try {
831
+ // Use Windows-specific retry logic for file operations
832
+ await this._writeWithRetry(data)
833
+ } finally {
834
+ release()
835
+ }
836
+ }
837
+
838
+ /**
839
+ * Optimized batch write operation (OPTIMIZATION)
840
+ * @param {Array} dataChunks - Array of data chunks to write
841
+ * @param {boolean} append - Whether to append or overwrite
842
+ */
843
+ async writeBatch(dataChunks, append = false) {
844
+ if (!dataChunks || !dataChunks.length) return
845
+
846
+ const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
847
+ try {
848
+ // OPTIMIZATION: Use streaming write for better performance
849
+ if (dataChunks.length === 1 && Buffer.isBuffer(dataChunks[0])) {
850
+ // Single buffer - use direct write
851
+ if (append) {
852
+ await fs.promises.appendFile(this.file, dataChunks[0])
853
+ } else {
854
+ await this._writeFileWithRetry(this.file, dataChunks[0])
855
+ }
856
+ } else {
857
+ // Multiple chunks - use streaming approach
858
+ await this._writeBatchStreaming(dataChunks, append)
859
+ }
860
+ } finally {
861
+ release()
862
+ }
863
+ }
864
+
865
+ /**
866
+ * OPTIMIZATION: Streaming write for multiple chunks
867
+ * @param {Array} dataChunks - Array of data chunks to write
868
+ * @param {boolean} append - Whether to append or overwrite
869
+ */
870
+ async _writeBatchStreaming(dataChunks, append = false) {
871
+ // OPTIMIZATION: Use createWriteStream for better performance
872
+ const writeStream = fs.createWriteStream(this.file, {
873
+ flags: append ? 'a' : 'w',
874
+ highWaterMark: 64 * 1024 // 64KB buffer
875
+ })
876
+
877
+ return new Promise((resolve, reject) => {
878
+ writeStream.on('error', reject)
879
+ writeStream.on('finish', resolve)
880
+
881
+ // Write chunks sequentially
882
+ let index = 0
883
+ const writeNext = () => {
884
+ if (index >= dataChunks.length) {
885
+ writeStream.end()
886
+ return
887
+ }
888
+
889
+ const chunk = dataChunks[index++]
890
+ const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk, 'utf8')
891
+
892
+ if (!writeStream.write(buffer)) {
893
+ writeStream.once('drain', writeNext)
894
+ } else {
895
+ writeNext()
896
+ }
897
+ }
898
+
899
+ writeNext()
900
+ })
901
+ }
902
+
903
+ /**
904
+ * Optimized append operation for single data chunk (OPTIMIZATION)
905
+ * @param {string|Buffer} data - Data to append
906
+ */
907
+ async appendOptimized(data) {
908
+ const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
909
+ try {
910
+ // OPTIMIZATION: Direct append without retry logic for better performance
911
+ await fs.promises.appendFile(this.file, data)
912
+ } finally {
913
+ release()
914
+ }
915
+ }
916
+
917
+ /**
918
+ * Windows-specific retry logic for fs.promises.writeFile operations
919
+ * Based on node-graceful-fs workarounds for EPERM issues
920
+ */
921
+ async _writeFileWithRetry(filePath, data, maxRetries = 3) {
922
+ const isWindows = process.platform === 'win32'
923
+
924
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
925
+ try {
926
+ // Ensure data is properly formatted as string or buffer
927
+ if (Buffer.isBuffer(data)) {
928
+ await fs.promises.writeFile(filePath, data)
929
+ } else {
930
+ await fs.promises.writeFile(filePath, data.toString())
931
+ }
932
+
933
+ // Windows: add small delay after write operation
934
+ // This helps prevent EPERM issues caused by file handle not being released immediately
935
+ if (isWindows) {
936
+ await new Promise(resolve => setTimeout(resolve, 10))
937
+ }
938
+
939
+ // Success - return immediately
940
+ return
941
+
942
+ } catch (err) {
943
+ // Only retry on EPERM errors on Windows
944
+ if (err.code === 'EPERM' && isWindows && attempt < maxRetries - 1) {
945
+ // Exponential backoff: 10ms, 50ms, 250ms
946
+ const delay = Math.pow(10, attempt + 1)
947
+ await new Promise(resolve => setTimeout(resolve, delay))
948
+ continue
949
+ }
950
+
951
+ // Re-throw if not a retryable error or max retries reached
952
+ throw err
953
+ }
954
+ }
955
+ }
956
+
957
+ /**
958
+ * Windows-specific retry logic for file operations
959
+ * Based on node-graceful-fs workarounds for EPERM issues
960
+ */
961
+ async _writeWithRetry(data, maxRetries = 3) {
962
+ const isWindows = process.platform === 'win32'
963
+
964
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
965
+ try {
966
+ // CRITICAL FIX: Ensure directory exists before writing file
967
+ const dir = path.dirname(this.file)
968
+ await fs.promises.mkdir(dir, { recursive: true })
969
+
970
+ const fd = await fs.promises.open(this.file, 'w')
971
+ try {
972
+ // Ensure data is properly formatted as string or buffer
973
+ if (Buffer.isBuffer(data)) {
974
+ await fd.write(data)
975
+ } else {
976
+ await fd.write(data.toString())
977
+ }
978
+ } finally {
979
+ await fd.close()
980
+
981
+ // Windows: add small delay after closing file handle
982
+ // This helps prevent EPERM issues caused by file handle not being released immediately
983
+ if (isWindows) {
984
+ await new Promise(resolve => setTimeout(resolve, 10))
985
+ }
986
+ }
987
+
988
+ // Success - return immediately
989
+ return
990
+
991
+ } catch (err) {
992
+ // Only retry on EPERM errors on Windows
993
+ if (err.code === 'EPERM' && isWindows && attempt < maxRetries - 1) {
994
+ // Exponential backoff: 10ms, 50ms, 250ms
995
+ const delay = Math.pow(10, attempt + 1)
996
+ await new Promise(resolve => setTimeout(resolve, delay))
997
+ continue
998
+ }
999
+
1000
+ // Re-throw if not a retryable error or max retries reached
1001
+ throw err
1002
+ }
1003
+ }
1004
+ }
1005
+
1006
+ async readAll() {
1007
+ const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
1008
+ try {
1009
+ // Check if file exists before trying to read it
1010
+ if (!await this.exists()) {
1011
+ return '' // Return empty string if file doesn't exist
1012
+ }
1013
+
1014
+ const fd = await fs.promises.open(this.file, 'r')
1015
+ try {
1016
+ const stats = await fd.stat()
1017
+ const buffer = Buffer.allocUnsafe(stats.size)
1018
+ await fd.read(buffer, 0, stats.size, 0)
1019
+ return buffer.toString('utf8')
1020
+ } finally {
1021
+ await fd.close()
1022
+ }
1023
+ } finally {
1024
+ release()
1025
+ }
1026
+ }
1027
+
1028
+ /**
1029
+ * Read specific lines from the file using line numbers
1030
+ * This is optimized for partial reads when using indexed queries
1031
+ * @param {number[]} lineNumbers - Array of line numbers to read (1-based)
1032
+ * @returns {Promise<string>} - Content of the specified lines
1033
+ */
1034
+ async readSpecificLines(lineNumbers) {
1035
+ if (!lineNumbers || lineNumbers.length === 0) {
1036
+ return ''
1037
+ }
1038
+
1039
+ const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
1040
+ try {
1041
+ // Check if file exists before trying to read it
1042
+ if (!await this.exists()) {
1043
+ return '' // Return empty string if file doesn't exist
1044
+ }
1045
+
1046
+ const fd = await fs.promises.open(this.file, 'r')
1047
+ try {
1048
+ const stats = await fd.stat()
1049
+ const buffer = Buffer.allocUnsafe(stats.size)
1050
+ await fd.read(buffer, 0, stats.size, 0)
1051
+
1052
+ // CRITICAL FIX: Ensure proper UTF-8 decoding for multi-byte characters
1053
+ let content
1054
+ try {
1055
+ content = buffer.toString('utf8')
1056
+ } catch (error) {
1057
+ // If UTF-8 decoding fails, try to recover by finding valid UTF-8 boundaries
1058
+ console.warn(`UTF-8 decoding failed for file ${this.file}, attempting recovery`)
1059
+
1060
+ // Find the last complete UTF-8 character
1061
+ let validLength = buffer.length
1062
+ for (let i = buffer.length - 1; i >= 0; i--) {
1063
+ const byte = buffer[i]
1064
+ // CRITICAL FIX: Correct UTF-8 start character detection
1065
+ // Check if this is the start of a UTF-8 character (not a continuation byte)
1066
+ if ((byte & 0x80) === 0 || // ASCII (1 byte) - 0xxxxxxx
1067
+ (byte & 0xE0) === 0xC0 || // 2-byte UTF-8 start - 110xxxxx
1068
+ (byte & 0xF0) === 0xE0 || // 3-byte UTF-8 start - 1110xxxx
1069
+ (byte & 0xF8) === 0xF0) { // 4-byte UTF-8 start - 11110xxx
1070
+ validLength = i + 1
1071
+ break
1072
+ }
1073
+ }
1074
+
1075
+ const validBuffer = buffer.subarray(0, validLength)
1076
+ content = validBuffer.toString('utf8')
1077
+ }
1078
+
1079
+ // Split content into lines and extract only the requested lines
1080
+ const lines = content.split('\n')
1081
+ const result = []
1082
+
1083
+ for (const lineNum of lineNumbers) {
1084
+ // Convert to 0-based index and check bounds
1085
+ const index = lineNum - 1
1086
+ if (index >= 0 && index < lines.length) {
1087
+ result.push(lines[index])
1088
+ }
1089
+ }
1090
+
1091
+ return result.join('\n')
1092
+ } finally {
1093
+ await fd.close()
1094
+ }
1095
+ } finally {
1096
+ release()
1097
+ }
1098
+ }
1099
+
1100
+ }
1101
+