jexidb 2.0.3 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/.babelrc +13 -0
  2. package/.gitattributes +2 -0
  3. package/CHANGELOG.md +132 -101
  4. package/LICENSE +21 -21
  5. package/README.md +301 -639
  6. package/babel.config.json +5 -0
  7. package/dist/Database.cjs +5204 -0
  8. package/docs/API.md +908 -241
  9. package/docs/EXAMPLES.md +701 -177
  10. package/docs/README.md +194 -184
  11. package/examples/iterate-usage-example.js +157 -0
  12. package/examples/simple-iterate-example.js +115 -0
  13. package/jest.config.js +24 -0
  14. package/package.json +63 -54
  15. package/scripts/README.md +47 -0
  16. package/scripts/benchmark-array-serialization.js +108 -0
  17. package/scripts/clean-test-files.js +75 -0
  18. package/scripts/prepare.js +31 -0
  19. package/scripts/run-tests.js +80 -0
  20. package/scripts/score-mode-demo.js +45 -0
  21. package/src/Database.mjs +5325 -0
  22. package/src/FileHandler.mjs +1140 -0
  23. package/src/OperationQueue.mjs +279 -0
  24. package/src/SchemaManager.mjs +268 -0
  25. package/src/Serializer.mjs +702 -0
  26. package/src/managers/ConcurrencyManager.mjs +257 -0
  27. package/src/managers/IndexManager.mjs +2094 -0
  28. package/src/managers/QueryManager.mjs +1490 -0
  29. package/src/managers/StatisticsManager.mjs +262 -0
  30. package/src/managers/StreamingProcessor.mjs +429 -0
  31. package/src/managers/TermManager.mjs +278 -0
  32. package/src/utils/operatorNormalizer.mjs +116 -0
  33. package/test/$not-operator-with-and.test.js +282 -0
  34. package/test/README.md +8 -0
  35. package/test/close-init-cycle.test.js +256 -0
  36. package/test/coverage-method.test.js +93 -0
  37. package/test/critical-bugs-fixes.test.js +1069 -0
  38. package/test/deserialize-corruption-fixes.test.js +296 -0
  39. package/test/exists-method.test.js +318 -0
  40. package/test/explicit-indexes-comparison.test.js +219 -0
  41. package/test/filehandler-non-adjacent-ranges-bug.test.js +175 -0
  42. package/test/index-line-number-regression.test.js +100 -0
  43. package/test/index-missing-index-data.test.js +91 -0
  44. package/test/index-persistence.test.js +491 -0
  45. package/test/index-serialization.test.js +314 -0
  46. package/test/indexed-query-mode.test.js +360 -0
  47. package/test/insert-session-auto-flush.test.js +353 -0
  48. package/test/iterate-method.test.js +272 -0
  49. package/test/legacy-operator-compat.test.js +154 -0
  50. package/test/query-operators.test.js +238 -0
  51. package/test/regex-array-fields.test.js +129 -0
  52. package/test/score-method.test.js +298 -0
  53. package/test/setup.js +17 -0
  54. package/test/term-mapping-minimal.test.js +154 -0
  55. package/test/term-mapping-simple.test.js +257 -0
  56. package/test/term-mapping.test.js +514 -0
  57. package/test/writebuffer-flush-resilience.test.js +204 -0
  58. package/dist/FileHandler.js +0 -688
  59. package/dist/IndexManager.js +0 -353
  60. package/dist/IntegrityChecker.js +0 -364
  61. package/dist/JSONLDatabase.js +0 -1333
  62. package/dist/index.js +0 -617
  63. package/docs/MIGRATION.md +0 -295
  64. package/examples/auto-save-example.js +0 -158
  65. package/examples/cjs-usage.cjs +0 -82
  66. package/examples/close-vs-delete-example.js +0 -71
  67. package/examples/esm-usage.js +0 -113
  68. package/examples/example-columns.idx.jdb +0 -0
  69. package/examples/example-columns.jdb +0 -9
  70. package/examples/example-options.idx.jdb +0 -0
  71. package/examples/example-options.jdb +0 -0
  72. package/examples/example-users.idx.jdb +0 -0
  73. package/examples/example-users.jdb +0 -5
  74. package/examples/simple-test.js +0 -55
  75. package/src/FileHandler.js +0 -674
  76. package/src/IndexManager.js +0 -363
  77. package/src/IntegrityChecker.js +0 -379
  78. package/src/JSONLDatabase.js +0 -1391
  79. package/src/index.js +0 -608
@@ -0,0 +1,1140 @@
1
+ import fs from 'fs'
2
+ import path from 'path'
3
+ import readline from 'readline'
4
+ import pLimit from 'p-limit'
5
+
6
+ export default class FileHandler {
7
+ constructor(file, fileMutex = null, opts = {}) {
8
+ this.file = file
9
+ this.indexFile = file ? file.replace(/\.jdb$/, '.idx.jdb') : null
10
+ this.fileMutex = fileMutex
11
+ this.opts = opts
12
+ this.maxBufferSize = opts.maxBufferSize || 4 * 1024 * 1024 // 4MB default
13
+ // Global I/O limiter to prevent file descriptor exhaustion in concurrent operations
14
+ this.readLimiter = pLimit(opts.maxConcurrentReads || 4)
15
+ }
16
+
17
+ async truncate(offset) {
18
+ try {
19
+ await fs.promises.access(this.file, fs.constants.F_OK)
20
+ await fs.promises.truncate(this.file, offset)
21
+ } catch (err) {
22
+ await fs.promises.writeFile(this.file, '')
23
+ }
24
+ }
25
+
26
+ async writeOffsets(data) {
27
+ // Write offsets to the index file (will be combined with index data)
28
+ await fs.promises.writeFile(this.indexFile, data)
29
+ }
30
+
31
+ async readOffsets() {
32
+ try {
33
+ return await fs.promises.readFile(this.indexFile)
34
+ } catch (err) {
35
+ return null
36
+ }
37
+ }
38
+
39
+ async writeIndex(data) {
40
+ // Write index data to the index file (will be combined with offsets)
41
+ // Use Windows-specific retry logic for file operations
42
+ await this._writeFileWithRetry(this.indexFile, data)
43
+ }
44
+
45
+ async readIndex() {
46
+ try {
47
+ return await fs.promises.readFile(this.indexFile)
48
+ } catch (err) {
49
+ return null
50
+ }
51
+ }
52
+
53
+ async exists() {
54
+ try {
55
+ await fs.promises.access(this.file, fs.constants.F_OK)
56
+ return true
57
+ } catch (err) {
58
+ return false
59
+ }
60
+ }
61
+
62
+
63
+ async indexExists() {
64
+ try {
65
+ await fs.promises.access(this.indexFile, fs.constants.F_OK)
66
+ return true
67
+ } catch (err) {
68
+ return false
69
+ }
70
+ }
71
+
72
+ async isLegacyFormat() {
73
+ if (!await this.exists()) return false
74
+ if (await this.indexExists()) return false
75
+
76
+ // Check if main file contains offsets at the end (legacy format)
77
+ try {
78
+ const lastLine = await this.readLastLine()
79
+ if (!lastLine || !lastLine.length) return false
80
+
81
+ // Try to parse as offsets array
82
+ const content = lastLine.toString('utf-8').trim()
83
+ const parsed = JSON.parse(content)
84
+ return Array.isArray(parsed)
85
+ } catch (err) {
86
+ return false
87
+ }
88
+ }
89
+
90
+ async migrateLegacyFormat(serializer) {
91
+ if (!await this.isLegacyFormat()) return false
92
+
93
+ console.log('Migrating from legacy format to new 3-file format...')
94
+
95
+ // Read the legacy file
96
+ const lastLine = await this.readLastLine()
97
+ const offsets = JSON.parse(lastLine.toString('utf-8').trim())
98
+
99
+ // Get index offset and truncate offsets array
100
+ const indexOffset = offsets[offsets.length - 2]
101
+ const dataOffsets = offsets.slice(0, -2)
102
+
103
+ // Read index data
104
+ const indexStart = indexOffset
105
+ const indexEnd = offsets[offsets.length - 1]
106
+ const indexBuffer = await this.readRange(indexStart, indexEnd)
107
+ const indexData = await serializer.deserialize(indexBuffer)
108
+
109
+ // Write offsets to separate file
110
+ const offsetsString = await serializer.serialize(dataOffsets, { linebreak: false })
111
+ await this.writeOffsets(offsetsString)
112
+
113
+ // Write index to separate file
114
+ const indexString = await serializer.serialize(indexData, { linebreak: false })
115
+ await this.writeIndex(indexString)
116
+
117
+ // Truncate main file to remove index and offsets
118
+ await this.truncate(indexOffset)
119
+
120
+ console.log('Migration completed successfully!')
121
+ return true
122
+ }
123
+
124
+ async readRange(start, end) {
125
+ // Check if file exists before trying to read it
126
+ if (!await this.exists()) {
127
+ return Buffer.alloc(0) // Return empty buffer if file doesn't exist
128
+ }
129
+
130
+ let fd = await fs.promises.open(this.file, 'r')
131
+ try {
132
+ // CRITICAL FIX: Check file size before attempting to read
133
+ const stats = await fd.stat()
134
+ const fileSize = stats.size
135
+
136
+ // If start position is beyond file size, return empty buffer
137
+ if (start >= fileSize) {
138
+ await fd.close()
139
+ return Buffer.alloc(0)
140
+ }
141
+
142
+ // Adjust end position if it's beyond file size
143
+ const actualEnd = Math.min(end, fileSize)
144
+ const length = actualEnd - start
145
+
146
+ // If length is 0 or negative, return empty buffer
147
+ if (length <= 0) {
148
+ await fd.close()
149
+ return Buffer.alloc(0)
150
+ }
151
+
152
+ let buffer = Buffer.alloc(length)
153
+ const { bytesRead } = await fd.read(buffer, 0, length, start)
154
+ await fd.close()
155
+
156
+ // CRITICAL FIX: Ensure we read the expected amount of data
157
+ if (bytesRead !== length) {
158
+ const errorMsg = `CRITICAL: Expected to read ${length} bytes, but read ${bytesRead} bytes at position ${start}`
159
+ console.error(`⚠️ ${errorMsg}`)
160
+
161
+ // This indicates a race condition or file corruption
162
+ // Don't retry - the caller should handle synchronization properly
163
+ if (bytesRead === 0) {
164
+ throw new Error(`File corruption detected: ${errorMsg}`)
165
+ }
166
+
167
+ // Return partial data with warning - caller should handle this
168
+ return buffer.subarray(0, bytesRead)
169
+ }
170
+
171
+ return buffer
172
+ } catch (error) {
173
+ await fd.close().catch(() => {})
174
+ throw error
175
+ }
176
+ }
177
+
178
+ async readRanges(ranges, mapper) {
179
+ const lines = {}
180
+
181
+ // Check if file exists before trying to read it
182
+ if (!await this.exists()) {
183
+ return lines // Return empty object if file doesn't exist
184
+ }
185
+
186
+ const fd = await fs.promises.open(this.file, 'r')
187
+ const groupedRanges = await this.groupedRanges(ranges)
188
+ try {
189
+ await Promise.allSettled(groupedRanges.map(async (groupedRange) => {
190
+ await this.readLimiter(async () => {
191
+ for await (const row of this.readGroupedRange(groupedRange, fd)) {
192
+ lines[row.start] = mapper ? (await mapper(row.line, { start: row.start, end: row.start + row.line.length })) : row.line
193
+ }
194
+ })
195
+ }))
196
+ } catch (e) {
197
+ console.error('Error reading ranges:', e)
198
+ } finally {
199
+ await fd.close()
200
+ }
201
+ return lines
202
+ }
203
+
204
+ async groupedRanges(ranges) { // expects ordered ranges from Database.getRanges()
205
+ const readSize = 512 * 1024 // 512KB
206
+ const groupedRanges = []
207
+ let currentGroup = []
208
+ let currentSize = 0
209
+
210
+ // each range is a {start: number, end: number} object
211
+ for (let i = 0; i < ranges.length; i++) {
212
+ const range = ranges[i]
213
+ const rangeSize = range.end - range.start
214
+
215
+ if (currentGroup.length > 0) {
216
+ const lastRange = currentGroup[currentGroup.length - 1]
217
+ if (lastRange.end !== range.start || currentSize + rangeSize > readSize) {
218
+ groupedRanges.push(currentGroup)
219
+ currentGroup = []
220
+ currentSize = 0
221
+ }
222
+ }
223
+
224
+ currentGroup.push(range)
225
+ currentSize += rangeSize
226
+ }
227
+
228
+ if (currentGroup.length > 0) {
229
+ groupedRanges.push(currentGroup)
230
+ }
231
+
232
+ return groupedRanges
233
+ }
234
+
235
+ async *readGroupedRange(groupedRange, fd) {
236
+ if (groupedRange.length === 0) return
237
+
238
+ // OPTIMIZATION: For single range, use direct approach
239
+ if (groupedRange.length === 1) {
240
+ const range = groupedRange[0]
241
+ const bufferSize = range.end - range.start
242
+
243
+ if (bufferSize <= 0 || bufferSize > this.maxBufferSize) {
244
+ throw new Error(`Invalid buffer size: ${bufferSize}. Start: ${range.start}, End: ${range.end}. Max allowed: ${this.maxBufferSize}`)
245
+ }
246
+
247
+ const buffer = Buffer.allocUnsafe(bufferSize)
248
+ const { bytesRead } = await fd.read(buffer, 0, bufferSize, range.start)
249
+ const actualBuffer = bytesRead < bufferSize ? buffer.subarray(0, bytesRead) : buffer
250
+
251
+ if (actualBuffer.length === 0) return
252
+
253
+ let lineString
254
+ try {
255
+ lineString = actualBuffer.toString('utf8')
256
+ } catch (error) {
257
+ lineString = actualBuffer.toString('utf8', { replacement: '?' })
258
+ }
259
+
260
+ // CRITICAL FIX: Remove trailing newlines and whitespace for single range too
261
+ // Optimized: Use trimEnd() which efficiently removes all trailing whitespace (faster than manual checks)
262
+ lineString = lineString.trimEnd()
263
+
264
+ yield {
265
+ line: lineString,
266
+ start: range.start,
267
+ _: range.index !== undefined ? range.index : (range._ || null)
268
+ }
269
+ return
270
+ }
271
+
272
+ // OPTIMIZATION: For multiple ranges, read as single buffer and split by offsets
273
+ const firstRange = groupedRange[0]
274
+ const lastRange = groupedRange[groupedRange.length - 1]
275
+ const totalSize = lastRange.end - firstRange.start
276
+
277
+ if (totalSize <= 0 || totalSize > this.maxBufferSize) {
278
+ throw new Error(`Invalid total buffer size: ${totalSize}. Start: ${firstRange.start}, End: ${lastRange.end}. Max allowed: ${this.maxBufferSize}`)
279
+ }
280
+
281
+ // Read entire grouped range as single buffer
282
+ const buffer = Buffer.allocUnsafe(totalSize)
283
+ const { bytesRead } = await fd.read(buffer, 0, totalSize, firstRange.start)
284
+ const actualBuffer = bytesRead < totalSize ? buffer.subarray(0, bytesRead) : buffer
285
+
286
+ if (actualBuffer.length === 0) return
287
+
288
+ // Convert to string once
289
+ let content
290
+ try {
291
+ content = actualBuffer.toString('utf8')
292
+ } catch (error) {
293
+ content = actualBuffer.toString('utf8', { replacement: '?' })
294
+ }
295
+
296
+ // CRITICAL FIX: Handle ranges more carefully to prevent corruption
297
+ if (groupedRange.length === 2 && groupedRange[0].end === groupedRange[1].start) {
298
+ // Special case: Adjacent ranges - split by newlines to prevent corruption
299
+ const lines = content.split('\n').filter(line => line.trim().length > 0)
300
+
301
+ for (let i = 0; i < Math.min(lines.length, groupedRange.length); i++) {
302
+ const range = groupedRange[i]
303
+ yield {
304
+ line: lines[i],
305
+ start: range.start,
306
+ _: range.index !== undefined ? range.index : (range._ || null)
307
+ }
308
+ }
309
+ } else {
310
+ // CRITICAL FIX: For non-adjacent ranges, use the range.end directly
311
+ // because range.end already excludes the newline (calculated as offsets[n+1] - 1)
312
+ // We just need to find the line start (beginning of the line in the buffer)
313
+ for (let i = 0; i < groupedRange.length; i++) {
314
+ const range = groupedRange[i]
315
+ const relativeStart = range.start - firstRange.start
316
+ const relativeEnd = range.end - firstRange.start
317
+
318
+ // OPTIMIZATION 2: Find line start only if necessary
319
+ // Check if we're already at a line boundary to avoid unnecessary backwards search
320
+ let lineStart = relativeStart
321
+ if (relativeStart > 0 && content[relativeStart - 1] !== '\n') {
322
+ // Only search backwards if we're not already at a line boundary
323
+ while (lineStart > 0 && content[lineStart - 1] !== '\n') {
324
+ lineStart--
325
+ }
326
+ }
327
+
328
+ // OPTIMIZATION 3: Use slice() instead of substring() for better performance
329
+ // CRITICAL FIX: range.end = offsets[n+1] - 1 points to the newline character
330
+ // slice(start, end) includes characters from start to end-1 (end is exclusive)
331
+ // So if relativeEnd points to the newline, slice will include it
332
+ let rangeContent = content.slice(lineStart, relativeEnd)
333
+
334
+ // OPTIMIZATION 4: Direct character check instead of regex/trimEnd
335
+ // Remove trailing newlines and whitespace efficiently
336
+ // trimEnd() is actually optimized in V8, but we can check if there's anything to trim first
337
+ const len = rangeContent.length
338
+ if (len > 0) {
339
+ // Quick check: if last char is not whitespace, skip trimEnd
340
+ const lastChar = rangeContent[len - 1]
341
+ if (lastChar === '\n' || lastChar === '\r' || lastChar === ' ' || lastChar === '\t') {
342
+ // Only call trimEnd if we detected trailing whitespace
343
+ rangeContent = rangeContent.trimEnd()
344
+ }
345
+ }
346
+
347
+ if (rangeContent.length === 0) continue
348
+
349
+ yield {
350
+ line: rangeContent,
351
+ start: range.start,
352
+ _: range.index !== undefined ? range.index : (range._ || null)
353
+ }
354
+ }
355
+ }
356
+ }
357
+
358
+ async *walk(ranges) {
359
+ // Check if file exists before trying to read it
360
+ if (!await this.exists()) {
361
+ return // Return empty generator if file doesn't exist
362
+ }
363
+
364
+ const fd = await fs.promises.open(this.file, 'r')
365
+ try {
366
+ const groupedRanges = await this.groupedRanges(ranges)
367
+ for(const groupedRange of groupedRanges) {
368
+ for await (const row of this.readGroupedRange(groupedRange, fd)) {
369
+ yield row
370
+ }
371
+ }
372
+ } finally {
373
+ await fd.close()
374
+ }
375
+ }
376
+
377
+ async replaceLines(ranges, lines) {
378
+ // CRITICAL: Always use file mutex to prevent concurrent file operations
379
+ if (this.fileMutex) {
380
+ return this.fileMutex.runExclusive(async () => {
381
+ // Add a small delay to ensure any pending operations complete
382
+ await new Promise(resolve => setTimeout(resolve, 10));
383
+ return this._replaceLinesInternal(ranges, lines);
384
+ });
385
+ } else {
386
+ return this._replaceLinesInternal(ranges, lines);
387
+ }
388
+ }
389
+
390
+ async _replaceLinesInternal(ranges, lines) {
391
+ const tmpFile = this.file + '.tmp';
392
+ let writer, reader;
393
+
394
+ try {
395
+ writer = await fs.promises.open(tmpFile, 'w+');
396
+
397
+ // Check if the main file exists before trying to read it
398
+ if (await this.exists()) {
399
+ reader = await fs.promises.open(this.file, 'r');
400
+ } else {
401
+ // If file doesn't exist, we'll just write the new lines
402
+ reader = null;
403
+ }
404
+
405
+ // Sort ranges by start position to ensure correct order
406
+ const sortedRanges = [...ranges].sort((a, b) => a.start - b.start);
407
+
408
+ let position = 0;
409
+ let lineIndex = 0;
410
+
411
+ for (const range of sortedRanges) {
412
+ // Write existing content before the range (only if file exists)
413
+ if (reader && position < range.start) {
414
+ const buffer = await this.readRange(position, range.start);
415
+ await writer.write(buffer);
416
+ }
417
+
418
+ // Write new line if provided, otherwise skip the range (for delete operations)
419
+ if (lineIndex < lines.length && lines[lineIndex]) {
420
+ const line = lines[lineIndex];
421
+ // Ensure line ends with newline
422
+ let formattedBuffer;
423
+ if (Buffer.isBuffer(line)) {
424
+ const needsNewline = line.length === 0 || line[line.length - 1] !== 0x0A;
425
+ formattedBuffer = needsNewline ? Buffer.concat([line, Buffer.from('\n')]) : line;
426
+ } else {
427
+ const withNewline = line.endsWith('\n') ? line : line + '\n';
428
+ formattedBuffer = Buffer.from(withNewline, 'utf8');
429
+ }
430
+ await writer.write(formattedBuffer);
431
+ }
432
+
433
+ // Update position to range.end to avoid overlapping writes
434
+ position = range.end;
435
+ lineIndex++;
436
+ }
437
+
438
+ // Write remaining content after the last range (only if file exists)
439
+ if (reader) {
440
+ const { size } = await reader.stat();
441
+ if (position < size) {
442
+ const buffer = await this.readRange(position, size);
443
+ await writer.write(buffer);
444
+ }
445
+ }
446
+
447
+ // Ensure all data is written to disk
448
+ await writer.sync();
449
+ if (reader) await reader.close();
450
+ await writer.close();
451
+
452
+ // Validate the temp file before renaming
453
+ await this._validateTempFile(tmpFile);
454
+
455
+ // CRITICAL: Retry logic for Windows EPERM errors
456
+ await this._safeRename(tmpFile, this.file);
457
+
458
+ } catch (e) {
459
+ console.error('Erro ao substituir linhas:', e);
460
+ throw e;
461
+ } finally {
462
+ if (reader) await reader.close().catch(() => { });
463
+ if (writer) await writer.close().catch(() => { });
464
+ await fs.promises.unlink(tmpFile).catch(() => { });
465
+ }
466
+ }
467
+
468
+ async _safeRename(tmpFile, targetFile, maxRetries = 3) {
469
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
470
+ try {
471
+ await fs.promises.rename(tmpFile, targetFile);
472
+ return; // Success
473
+ } catch (error) {
474
+ if (error.code === 'EPERM' && attempt < maxRetries) {
475
+ // Quick delay: 50ms, 100ms, 200ms
476
+ const delay = 50 * attempt;
477
+ console.log(`🔄 EPERM retry ${attempt}/${maxRetries}, waiting ${delay}ms...`);
478
+ await new Promise(resolve => setTimeout(resolve, delay));
479
+ continue;
480
+ }
481
+
482
+ // If all retries failed, try Windows fallback approach
483
+ if (error.code === 'EPERM' && attempt === maxRetries) {
484
+ console.log(`⚠️ All EPERM retries failed, trying Windows fallback...`);
485
+ return this._windowsFallbackRename(tmpFile, targetFile);
486
+ }
487
+
488
+ throw error; // Re-throw if not EPERM or max retries reached
489
+ }
490
+ }
491
+ }
492
+
493
+ async _validateTempFile(tmpFile) {
494
+ try {
495
+ // Read the temp file and validate JSON structure
496
+ const content = await fs.promises.readFile(tmpFile, 'utf8');
497
+ const lines = content.split('\n').filter(line => line.trim());
498
+
499
+ let hasInvalidJson = false;
500
+ const validLines = [];
501
+
502
+ for (let i = 0; i < lines.length; i++) {
503
+ try {
504
+ JSON.parse(lines[i]);
505
+ validLines.push(lines[i]);
506
+ } catch (error) {
507
+ console.warn(`⚠️ Invalid JSON in temp file at line ${i + 1}, skipping:`, lines[i].substring(0, 100));
508
+ hasInvalidJson = true;
509
+ }
510
+ }
511
+
512
+ // If we found invalid JSON, rewrite the file with only valid lines
513
+ if (hasInvalidJson && validLines.length > 0) {
514
+ console.log(`🔧 Rewriting temp file with ${validLines.length} valid lines`);
515
+ const correctedContent = validLines.join('\n') + '\n';
516
+ await fs.promises.writeFile(tmpFile, correctedContent, 'utf8');
517
+ }
518
+
519
+ console.log(`✅ Temp file validation passed: ${validLines.length} valid JSON lines`);
520
+ } catch (error) {
521
+ console.error(`❌ Temp file validation failed:`, error.message);
522
+ throw error;
523
+ }
524
+ }
525
+
526
+ async _windowsFallbackRename(tmpFile, targetFile) {
527
+ try {
528
+ // Windows fallback: copy content instead of rename
529
+ console.log(`🔄 Using Windows fallback: copy + delete approach`);
530
+
531
+ // Validate temp file before copying
532
+ await this._validateTempFile(tmpFile);
533
+
534
+ // Read the temp file content
535
+ const content = await fs.promises.readFile(tmpFile, 'utf8');
536
+
537
+ // Write to target file directly
538
+ await fs.promises.writeFile(targetFile, content, 'utf8');
539
+
540
+ // Delete temp file
541
+ await fs.promises.unlink(tmpFile);
542
+
543
+ console.log(`✅ Windows fallback successful`);
544
+ return;
545
+ } catch (fallbackError) {
546
+ console.error(`❌ Windows fallback also failed:`, fallbackError);
547
+ throw fallbackError;
548
+ }
549
+ }
550
+
551
+ async writeData(data, immediate, fd) {
552
+ await fd.write(data)
553
+ }
554
+
555
+ async writeDataAsync(data) {
556
+ // CRITICAL FIX: Ensure directory exists before writing
557
+ const dir = path.dirname(this.file)
558
+ await fs.promises.mkdir(dir, { recursive: true })
559
+
560
+ await fs.promises.appendFile(this.file, data)
561
+ }
562
+
563
+ /**
564
+ * Check if data appears to be binary (always false since we only use JSON now)
565
+ */
566
+ isBinaryData(data) {
567
+ // All data is now JSON format
568
+ return false
569
+ }
570
+
571
+ /**
572
+ * Check if file is binary (always false since we only use JSON now)
573
+ */
574
+ async isBinaryFile() {
575
+ // All files are now JSON format
576
+ return false
577
+ }
578
+
579
+ async readLastLine() {
580
+ // Use global read limiter to prevent file descriptor exhaustion
581
+ return this.readLimiter(async () => {
582
+ // Check if file exists before trying to read it
583
+ if (!await this.exists()) {
584
+ return null // Return null if file doesn't exist
585
+ }
586
+
587
+ const reader = await fs.promises.open(this.file, 'r')
588
+ try {
589
+ const { size } = await reader.stat()
590
+ if (size < 1) throw 'empty file'
591
+ this.size = size
592
+ const bufferSize = 16384
593
+ let buffer, isFirstRead = true, lastReadSize, readPosition = Math.max(size - bufferSize, 0)
594
+ while (readPosition >= 0) {
595
+ const readSize = Math.min(bufferSize, size - readPosition)
596
+ if (readSize !== lastReadSize) {
597
+ lastReadSize = readSize
598
+ buffer = Buffer.alloc(readSize)
599
+ }
600
+ const { bytesRead } = await reader.read(buffer, 0, isFirstRead ? (readSize - 1) : readSize, readPosition)
601
+ if (isFirstRead) isFirstRead = false
602
+ if (bytesRead === 0) break
603
+ const newlineIndex = buffer.lastIndexOf(10)
604
+ const start = readPosition + newlineIndex + 1
605
+ if (newlineIndex !== -1) {
606
+ const lastLine = Buffer.alloc(size - start)
607
+ await reader.read(lastLine, 0, size - start, start)
608
+ if (!lastLine || !lastLine.length) {
609
+ throw 'no metadata or empty file'
610
+ }
611
+ return lastLine
612
+ } else {
613
+ readPosition -= bufferSize
614
+ }
615
+ }
616
+ } catch (e) {
617
+ String(e).includes('empty file') || console.error('Error reading last line:', e)
618
+ } finally {
619
+ reader.close()
620
+ }
621
+ })
622
+ }
623
+
624
+ /**
625
+ * Read records with streaming using readline
626
+ * @param {Object} criteria - Filter criteria
627
+ * @param {Object} options - Options (limit, skip)
628
+ * @param {Function} matchesCriteria - Function to check if record matches criteria
629
+ * @returns {Promise<Array>} - Array of records
630
+ */
631
+ async readWithStreaming(criteria, options = {}, matchesCriteria, serializer = null) {
632
+ // CRITICAL: Always use file mutex to prevent concurrent file operations
633
+ if (this.fileMutex) {
634
+ return this.fileMutex.runExclusive(async () => {
635
+ // Add a small delay to ensure any pending operations complete
636
+ await new Promise(resolve => setTimeout(resolve, 5));
637
+ // Use global read limiter to prevent file descriptor exhaustion
638
+ return this.readLimiter(() => this._readWithStreamingInternal(criteria, options, matchesCriteria, serializer));
639
+ });
640
+ } else {
641
+ // Use global read limiter to prevent file descriptor exhaustion
642
+ return this.readLimiter(() => this._readWithStreamingInternal(criteria, options, matchesCriteria, serializer));
643
+ }
644
+ }
645
+
646
+ async _readWithStreamingInternal(criteria, options = {}, matchesCriteria, serializer = null) {
647
+ const { limit, skip = 0 } = options; // No default limit
648
+ const results = [];
649
+ let lineNumber = 0;
650
+ let processed = 0;
651
+ let skipped = 0;
652
+ let matched = 0;
653
+
654
+ try {
655
+ // Check if file exists before trying to read it
656
+ if (!await this.exists()) {
657
+ return results; // Return empty results if file doesn't exist
658
+ }
659
+
660
+ // All files are now JSONL format - use line-by-line reading
661
+ // Create optimized read stream
662
+ const stream = fs.createReadStream(this.file, {
663
+ highWaterMark: 64 * 1024, // 64KB chunks
664
+ encoding: 'utf8'
665
+ });
666
+
667
+ // Create readline interface
668
+ const rl = readline.createInterface({
669
+ input: stream,
670
+ crlfDelay: Infinity // Better performance
671
+ });
672
+
673
+ // Process line by line
674
+ for await (const line of rl) {
675
+ if (lineNumber >= skip) {
676
+ try {
677
+ let record;
678
+ if (serializer && typeof serializer.deserialize === 'function') {
679
+ // Use serializer for deserialization
680
+ record = serializer.deserialize(line);
681
+ } else {
682
+ // Fallback to JSON.parse for backward compatibility
683
+ record = JSON.parse(line);
684
+ }
685
+
686
+ if (record && matchesCriteria(record, criteria)) {
687
+ // Return raw data - term mapping will be handled by Database layer
688
+ results.push({ ...record, _: lineNumber });
689
+ matched++;
690
+
691
+ // Check if we've reached the limit
692
+ if (results.length >= limit) {
693
+ break;
694
+ }
695
+ }
696
+ } catch (error) {
697
+ // CRITICAL FIX: Only log errors if they're not expected during concurrent operations
698
+ // Don't log JSON parsing errors that occur during file writes
699
+ if (this.opts && this.opts.debugMode && !error.message.includes('Unexpected')) {
700
+ console.log(`Error reading line ${lineNumber}:`, error.message);
701
+ }
702
+ // Ignore invalid lines - they may be partial writes
703
+ }
704
+ } else {
705
+ skipped++;
706
+ }
707
+
708
+ lineNumber++;
709
+ processed++;
710
+ }
711
+
712
+ if (this.opts && this.opts.debugMode) {
713
+ console.log(`📊 Streaming read completed: ${results.length} results, ${processed} processed, ${skipped} skipped, ${matched} matched`);
714
+ }
715
+
716
+ return results;
717
+
718
+ } catch (error) {
719
+ console.error('Error in readWithStreaming:', error);
720
+ throw error;
721
+ }
722
+ }
723
+
724
+ /**
725
+ * Count records with streaming
726
+ * @param {Object} criteria - Filter criteria
727
+ * @param {Object} options - Options (limit)
728
+ * @param {Function} matchesCriteria - Function to check if record matches criteria
729
+ * @returns {Promise<number>} - Number of records
730
+ */
731
+ async countWithStreaming(criteria, options = {}, matchesCriteria, serializer = null) {
732
+ const { limit } = options;
733
+ let count = 0;
734
+ let processed = 0;
735
+
736
+ try {
737
+ const stream = fs.createReadStream(this.file, {
738
+ highWaterMark: 64 * 1024,
739
+ encoding: 'utf8'
740
+ });
741
+
742
+ const rl = readline.createInterface({
743
+ input: stream,
744
+ crlfDelay: Infinity
745
+ });
746
+
747
+ for await (const line of rl) {
748
+ if (limit && count >= limit) {
749
+ break;
750
+ }
751
+
752
+ try {
753
+ let record;
754
+ if (serializer) {
755
+ // Use serializer for deserialization
756
+ record = await serializer.deserialize(line);
757
+ } else {
758
+ // Fallback to JSON.parse for backward compatibility
759
+ record = JSON.parse(line);
760
+ }
761
+
762
+ if (record && matchesCriteria(record, criteria)) {
763
+ count++;
764
+ }
765
+ } catch (error) {
766
+ // Ignore invalid lines
767
+ }
768
+
769
+ processed++;
770
+ }
771
+
772
+ return count;
773
+
774
+ } catch (error) {
775
+ throw error;
776
+ }
777
+ }
778
+
779
+ /**
780
+ * Get file statistics
781
+ * @returns {Promise<Object>} - File statistics
782
+ */
783
+ async getFileStats() {
784
+ try {
785
+ const stats = await fs.promises.stat(this.file);
786
+ const lineCount = await this.countLines();
787
+
788
+ return {
789
+ filePath: this.file,
790
+ size: stats.size,
791
+ lineCount,
792
+ lastModified: stats.mtime
793
+ };
794
+ } catch (error) {
795
+ throw error;
796
+ }
797
+ }
798
+
799
+ /**
800
+ * Count lines in file
801
+ * @returns {Promise<number>} - Number of lines
802
+ */
803
+ async countLines() {
804
+ let lineCount = 0;
805
+
806
+ try {
807
+ const stream = fs.createReadStream(this.file, {
808
+ highWaterMark: 64 * 1024,
809
+ encoding: 'utf8'
810
+ });
811
+
812
+ const rl = readline.createInterface({
813
+ input: stream,
814
+ crlfDelay: Infinity
815
+ });
816
+
817
+ for await (const line of rl) {
818
+ lineCount++;
819
+ }
820
+
821
+ return lineCount;
822
+ } catch (error) {
823
+ throw error;
824
+ }
825
+ }
826
+
827
+ async destroy() {
828
+ // CRITICAL FIX: Close all file handles to prevent resource leaks
829
+ try {
830
+ // Close any open file descriptors
831
+ if (this.fd) {
832
+ await this.fd.close().catch(() => {})
833
+ this.fd = null
834
+ }
835
+
836
+ // Close any open readers/writers
837
+ if (this.reader) {
838
+ await this.reader.close().catch(() => {})
839
+ this.reader = null
840
+ }
841
+
842
+ if (this.writer) {
843
+ await this.writer.close().catch(() => {})
844
+ this.writer = null
845
+ }
846
+
847
+ // Clear any cached file handles
848
+ this.cachedFd = null
849
+
850
+ } catch (error) {
851
+ // Ignore errors during cleanup
852
+ }
853
+ }
854
+
855
+ async delete() {
856
+ try {
857
+ // Delete main file
858
+ await fs.promises.unlink(this.file).catch(() => {})
859
+
860
+ // Delete index file (which now contains both index and offsets data)
861
+ await fs.promises.unlink(this.indexFile).catch(() => {})
862
+ } catch (error) {
863
+ // Ignore errors if files don't exist
864
+ }
865
+ }
866
+
867
+ async writeAll(data) {
868
+ const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
869
+ try {
870
+ // Use Windows-specific retry logic for file operations
871
+ await this._writeWithRetry(data)
872
+ } finally {
873
+ release()
874
+ }
875
+ }
876
+
877
+ /**
878
+ * Optimized batch write operation (OPTIMIZATION)
879
+ * @param {Array} dataChunks - Array of data chunks to write
880
+ * @param {boolean} append - Whether to append or overwrite
881
+ */
882
+ async writeBatch(dataChunks, append = false) {
883
+ if (!dataChunks || !dataChunks.length) return
884
+
885
+ const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
886
+ try {
887
+ // OPTIMIZATION: Use streaming write for better performance
888
+ if (dataChunks.length === 1 && Buffer.isBuffer(dataChunks[0])) {
889
+ // Single buffer - use direct write
890
+ if (append) {
891
+ await fs.promises.appendFile(this.file, dataChunks[0])
892
+ } else {
893
+ await this._writeFileWithRetry(this.file, dataChunks[0])
894
+ }
895
+ } else {
896
+ // Multiple chunks - use streaming approach
897
+ await this._writeBatchStreaming(dataChunks, append)
898
+ }
899
+ } finally {
900
+ release()
901
+ }
902
+ }
903
+
904
+ /**
905
+ * OPTIMIZATION: Streaming write for multiple chunks
906
+ * @param {Array} dataChunks - Array of data chunks to write
907
+ * @param {boolean} append - Whether to append or overwrite
908
+ */
909
+ async _writeBatchStreaming(dataChunks, append = false) {
910
+ // OPTIMIZATION: Use createWriteStream for better performance
911
+ const writeStream = fs.createWriteStream(this.file, {
912
+ flags: append ? 'a' : 'w',
913
+ highWaterMark: 64 * 1024 // 64KB buffer
914
+ })
915
+
916
+ return new Promise((resolve, reject) => {
917
+ writeStream.on('error', reject)
918
+ writeStream.on('finish', resolve)
919
+
920
+ // Write chunks sequentially
921
+ let index = 0
922
+ const writeNext = () => {
923
+ if (index >= dataChunks.length) {
924
+ writeStream.end()
925
+ return
926
+ }
927
+
928
+ const chunk = dataChunks[index++]
929
+ const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk, 'utf8')
930
+
931
+ if (!writeStream.write(buffer)) {
932
+ writeStream.once('drain', writeNext)
933
+ } else {
934
+ writeNext()
935
+ }
936
+ }
937
+
938
+ writeNext()
939
+ })
940
+ }
941
+
942
+ /**
943
+ * Optimized append operation for single data chunk (OPTIMIZATION)
944
+ * @param {string|Buffer} data - Data to append
945
+ */
946
+ async appendOptimized(data) {
947
+ const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
948
+ try {
949
+ // OPTIMIZATION: Direct append without retry logic for better performance
950
+ await fs.promises.appendFile(this.file, data)
951
+ } finally {
952
+ release()
953
+ }
954
+ }
955
+
956
+ /**
957
+ * Windows-specific retry logic for fs.promises.writeFile operations
958
+ * Based on node-graceful-fs workarounds for EPERM issues
959
+ */
960
+ async _writeFileWithRetry(filePath, data, maxRetries = 3) {
961
+ const isWindows = process.platform === 'win32'
962
+
963
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
964
+ try {
965
+ // Ensure data is properly formatted as string or buffer
966
+ if (Buffer.isBuffer(data)) {
967
+ await fs.promises.writeFile(filePath, data)
968
+ } else {
969
+ await fs.promises.writeFile(filePath, data.toString())
970
+ }
971
+
972
+ // Windows: add small delay after write operation
973
+ // This helps prevent EPERM issues caused by file handle not being released immediately
974
+ if (isWindows) {
975
+ await new Promise(resolve => setTimeout(resolve, 10))
976
+ }
977
+
978
+ // Success - return immediately
979
+ return
980
+
981
+ } catch (err) {
982
+ // Only retry on EPERM errors on Windows
983
+ if (err.code === 'EPERM' && isWindows && attempt < maxRetries - 1) {
984
+ // Exponential backoff: 10ms, 50ms, 250ms
985
+ const delay = Math.pow(10, attempt + 1)
986
+ await new Promise(resolve => setTimeout(resolve, delay))
987
+ continue
988
+ }
989
+
990
+ // Re-throw if not a retryable error or max retries reached
991
+ throw err
992
+ }
993
+ }
994
+ }
995
+
996
+ /**
997
+ * Windows-specific retry logic for file operations
998
+ * Based on node-graceful-fs workarounds for EPERM issues
999
+ */
1000
+ async _writeWithRetry(data, maxRetries = 3) {
1001
+ const isWindows = process.platform === 'win32'
1002
+
1003
+ for (let attempt = 0; attempt < maxRetries; attempt++) {
1004
+ try {
1005
+ // CRITICAL FIX: Ensure directory exists before writing file
1006
+ const dir = path.dirname(this.file)
1007
+ await fs.promises.mkdir(dir, { recursive: true })
1008
+
1009
+ const fd = await fs.promises.open(this.file, 'w')
1010
+ try {
1011
+ // Ensure data is properly formatted as string or buffer
1012
+ if (Buffer.isBuffer(data)) {
1013
+ await fd.write(data)
1014
+ } else {
1015
+ await fd.write(data.toString())
1016
+ }
1017
+ } finally {
1018
+ await fd.close()
1019
+
1020
+ // Windows: add small delay after closing file handle
1021
+ // This helps prevent EPERM issues caused by file handle not being released immediately
1022
+ if (isWindows) {
1023
+ await new Promise(resolve => setTimeout(resolve, 10))
1024
+ }
1025
+ }
1026
+
1027
+ // Success - return immediately
1028
+ return
1029
+
1030
+ } catch (err) {
1031
+ // Only retry on EPERM errors on Windows
1032
+ if (err.code === 'EPERM' && isWindows && attempt < maxRetries - 1) {
1033
+ // Exponential backoff: 10ms, 50ms, 250ms
1034
+ const delay = Math.pow(10, attempt + 1)
1035
+ await new Promise(resolve => setTimeout(resolve, delay))
1036
+ continue
1037
+ }
1038
+
1039
+ // Re-throw if not a retryable error or max retries reached
1040
+ throw err
1041
+ }
1042
+ }
1043
+ }
1044
+
1045
+ async readAll() {
1046
+ const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
1047
+ try {
1048
+ // Check if file exists before trying to read it
1049
+ if (!await this.exists()) {
1050
+ return '' // Return empty string if file doesn't exist
1051
+ }
1052
+
1053
+ const fd = await fs.promises.open(this.file, 'r')
1054
+ try {
1055
+ const stats = await fd.stat()
1056
+ const buffer = Buffer.allocUnsafe(stats.size)
1057
+ await fd.read(buffer, 0, stats.size, 0)
1058
+ return buffer.toString('utf8')
1059
+ } finally {
1060
+ await fd.close()
1061
+ }
1062
+ } finally {
1063
+ release()
1064
+ }
1065
+ }
1066
+
1067
+ /**
1068
+ * Read specific lines from the file using line numbers
1069
+ * This is optimized for partial reads when using indexed queries
1070
+ * @param {number[]} lineNumbers - Array of line numbers to read (1-based)
1071
+ * @returns {Promise<string>} - Content of the specified lines
1072
+ */
1073
+ async readSpecificLines(lineNumbers) {
1074
+ if (!lineNumbers || lineNumbers.length === 0) {
1075
+ return ''
1076
+ }
1077
+
1078
+ const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
1079
+ try {
1080
+ // Check if file exists before trying to read it
1081
+ if (!await this.exists()) {
1082
+ return '' // Return empty string if file doesn't exist
1083
+ }
1084
+
1085
+ const fd = await fs.promises.open(this.file, 'r')
1086
+ try {
1087
+ const stats = await fd.stat()
1088
+ const buffer = Buffer.allocUnsafe(stats.size)
1089
+ await fd.read(buffer, 0, stats.size, 0)
1090
+
1091
+ // CRITICAL FIX: Ensure proper UTF-8 decoding for multi-byte characters
1092
+ let content
1093
+ try {
1094
+ content = buffer.toString('utf8')
1095
+ } catch (error) {
1096
+ // If UTF-8 decoding fails, try to recover by finding valid UTF-8 boundaries
1097
+ console.warn(`UTF-8 decoding failed for file ${this.file}, attempting recovery`)
1098
+
1099
+ // Find the last complete UTF-8 character
1100
+ let validLength = buffer.length
1101
+ for (let i = buffer.length - 1; i >= 0; i--) {
1102
+ const byte = buffer[i]
1103
+ // CRITICAL FIX: Correct UTF-8 start character detection
1104
+ // Check if this is the start of a UTF-8 character (not a continuation byte)
1105
+ if ((byte & 0x80) === 0 || // ASCII (1 byte) - 0xxxxxxx
1106
+ (byte & 0xE0) === 0xC0 || // 2-byte UTF-8 start - 110xxxxx
1107
+ (byte & 0xF0) === 0xE0 || // 3-byte UTF-8 start - 1110xxxx
1108
+ (byte & 0xF8) === 0xF0) { // 4-byte UTF-8 start - 11110xxx
1109
+ validLength = i + 1
1110
+ break
1111
+ }
1112
+ }
1113
+
1114
+ const validBuffer = buffer.subarray(0, validLength)
1115
+ content = validBuffer.toString('utf8')
1116
+ }
1117
+
1118
+ // Split content into lines and extract only the requested lines
1119
+ const lines = content.split('\n')
1120
+ const result = []
1121
+
1122
+ for (const lineNum of lineNumbers) {
1123
+ // Convert to 0-based index and check bounds
1124
+ const index = lineNum - 1
1125
+ if (index >= 0 && index < lines.length) {
1126
+ result.push(lines[index])
1127
+ }
1128
+ }
1129
+
1130
+ return result.join('\n')
1131
+ } finally {
1132
+ await fd.close()
1133
+ }
1134
+ } finally {
1135
+ release()
1136
+ }
1137
+ }
1138
+
1139
+ }
1140
+