jexidb 2.0.3 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.babelrc +13 -0
- package/.gitattributes +2 -0
- package/CHANGELOG.md +132 -101
- package/LICENSE +21 -21
- package/README.md +301 -639
- package/babel.config.json +5 -0
- package/dist/Database.cjs +5204 -0
- package/docs/API.md +908 -241
- package/docs/EXAMPLES.md +701 -177
- package/docs/README.md +194 -184
- package/examples/iterate-usage-example.js +157 -0
- package/examples/simple-iterate-example.js +115 -0
- package/jest.config.js +24 -0
- package/package.json +63 -54
- package/scripts/README.md +47 -0
- package/scripts/benchmark-array-serialization.js +108 -0
- package/scripts/clean-test-files.js +75 -0
- package/scripts/prepare.js +31 -0
- package/scripts/run-tests.js +80 -0
- package/scripts/score-mode-demo.js +45 -0
- package/src/Database.mjs +5325 -0
- package/src/FileHandler.mjs +1140 -0
- package/src/OperationQueue.mjs +279 -0
- package/src/SchemaManager.mjs +268 -0
- package/src/Serializer.mjs +702 -0
- package/src/managers/ConcurrencyManager.mjs +257 -0
- package/src/managers/IndexManager.mjs +2094 -0
- package/src/managers/QueryManager.mjs +1490 -0
- package/src/managers/StatisticsManager.mjs +262 -0
- package/src/managers/StreamingProcessor.mjs +429 -0
- package/src/managers/TermManager.mjs +278 -0
- package/src/utils/operatorNormalizer.mjs +116 -0
- package/test/$not-operator-with-and.test.js +282 -0
- package/test/README.md +8 -0
- package/test/close-init-cycle.test.js +256 -0
- package/test/coverage-method.test.js +93 -0
- package/test/critical-bugs-fixes.test.js +1069 -0
- package/test/deserialize-corruption-fixes.test.js +296 -0
- package/test/exists-method.test.js +318 -0
- package/test/explicit-indexes-comparison.test.js +219 -0
- package/test/filehandler-non-adjacent-ranges-bug.test.js +175 -0
- package/test/index-line-number-regression.test.js +100 -0
- package/test/index-missing-index-data.test.js +91 -0
- package/test/index-persistence.test.js +491 -0
- package/test/index-serialization.test.js +314 -0
- package/test/indexed-query-mode.test.js +360 -0
- package/test/insert-session-auto-flush.test.js +353 -0
- package/test/iterate-method.test.js +272 -0
- package/test/legacy-operator-compat.test.js +154 -0
- package/test/query-operators.test.js +238 -0
- package/test/regex-array-fields.test.js +129 -0
- package/test/score-method.test.js +298 -0
- package/test/setup.js +17 -0
- package/test/term-mapping-minimal.test.js +154 -0
- package/test/term-mapping-simple.test.js +257 -0
- package/test/term-mapping.test.js +514 -0
- package/test/writebuffer-flush-resilience.test.js +204 -0
- package/dist/FileHandler.js +0 -688
- package/dist/IndexManager.js +0 -353
- package/dist/IntegrityChecker.js +0 -364
- package/dist/JSONLDatabase.js +0 -1333
- package/dist/index.js +0 -617
- package/docs/MIGRATION.md +0 -295
- package/examples/auto-save-example.js +0 -158
- package/examples/cjs-usage.cjs +0 -82
- package/examples/close-vs-delete-example.js +0 -71
- package/examples/esm-usage.js +0 -113
- package/examples/example-columns.idx.jdb +0 -0
- package/examples/example-columns.jdb +0 -9
- package/examples/example-options.idx.jdb +0 -0
- package/examples/example-options.jdb +0 -0
- package/examples/example-users.idx.jdb +0 -0
- package/examples/example-users.jdb +0 -5
- package/examples/simple-test.js +0 -55
- package/src/FileHandler.js +0 -674
- package/src/IndexManager.js +0 -363
- package/src/IntegrityChecker.js +0 -379
- package/src/JSONLDatabase.js +0 -1391
- package/src/index.js +0 -608
|
@@ -0,0 +1,1140 @@
|
|
|
1
|
+
import fs from 'fs'
|
|
2
|
+
import path from 'path'
|
|
3
|
+
import readline from 'readline'
|
|
4
|
+
import pLimit from 'p-limit'
|
|
5
|
+
|
|
6
|
+
export default class FileHandler {
|
|
7
|
+
constructor(file, fileMutex = null, opts = {}) {
|
|
8
|
+
this.file = file
|
|
9
|
+
this.indexFile = file ? file.replace(/\.jdb$/, '.idx.jdb') : null
|
|
10
|
+
this.fileMutex = fileMutex
|
|
11
|
+
this.opts = opts
|
|
12
|
+
this.maxBufferSize = opts.maxBufferSize || 4 * 1024 * 1024 // 4MB default
|
|
13
|
+
// Global I/O limiter to prevent file descriptor exhaustion in concurrent operations
|
|
14
|
+
this.readLimiter = pLimit(opts.maxConcurrentReads || 4)
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
async truncate(offset) {
|
|
18
|
+
try {
|
|
19
|
+
await fs.promises.access(this.file, fs.constants.F_OK)
|
|
20
|
+
await fs.promises.truncate(this.file, offset)
|
|
21
|
+
} catch (err) {
|
|
22
|
+
await fs.promises.writeFile(this.file, '')
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
async writeOffsets(data) {
|
|
27
|
+
// Write offsets to the index file (will be combined with index data)
|
|
28
|
+
await fs.promises.writeFile(this.indexFile, data)
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
async readOffsets() {
|
|
32
|
+
try {
|
|
33
|
+
return await fs.promises.readFile(this.indexFile)
|
|
34
|
+
} catch (err) {
|
|
35
|
+
return null
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async writeIndex(data) {
|
|
40
|
+
// Write index data to the index file (will be combined with offsets)
|
|
41
|
+
// Use Windows-specific retry logic for file operations
|
|
42
|
+
await this._writeFileWithRetry(this.indexFile, data)
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
async readIndex() {
|
|
46
|
+
try {
|
|
47
|
+
return await fs.promises.readFile(this.indexFile)
|
|
48
|
+
} catch (err) {
|
|
49
|
+
return null
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
async exists() {
|
|
54
|
+
try {
|
|
55
|
+
await fs.promises.access(this.file, fs.constants.F_OK)
|
|
56
|
+
return true
|
|
57
|
+
} catch (err) {
|
|
58
|
+
return false
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
async indexExists() {
|
|
64
|
+
try {
|
|
65
|
+
await fs.promises.access(this.indexFile, fs.constants.F_OK)
|
|
66
|
+
return true
|
|
67
|
+
} catch (err) {
|
|
68
|
+
return false
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
async isLegacyFormat() {
|
|
73
|
+
if (!await this.exists()) return false
|
|
74
|
+
if (await this.indexExists()) return false
|
|
75
|
+
|
|
76
|
+
// Check if main file contains offsets at the end (legacy format)
|
|
77
|
+
try {
|
|
78
|
+
const lastLine = await this.readLastLine()
|
|
79
|
+
if (!lastLine || !lastLine.length) return false
|
|
80
|
+
|
|
81
|
+
// Try to parse as offsets array
|
|
82
|
+
const content = lastLine.toString('utf-8').trim()
|
|
83
|
+
const parsed = JSON.parse(content)
|
|
84
|
+
return Array.isArray(parsed)
|
|
85
|
+
} catch (err) {
|
|
86
|
+
return false
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
async migrateLegacyFormat(serializer) {
|
|
91
|
+
if (!await this.isLegacyFormat()) return false
|
|
92
|
+
|
|
93
|
+
console.log('Migrating from legacy format to new 3-file format...')
|
|
94
|
+
|
|
95
|
+
// Read the legacy file
|
|
96
|
+
const lastLine = await this.readLastLine()
|
|
97
|
+
const offsets = JSON.parse(lastLine.toString('utf-8').trim())
|
|
98
|
+
|
|
99
|
+
// Get index offset and truncate offsets array
|
|
100
|
+
const indexOffset = offsets[offsets.length - 2]
|
|
101
|
+
const dataOffsets = offsets.slice(0, -2)
|
|
102
|
+
|
|
103
|
+
// Read index data
|
|
104
|
+
const indexStart = indexOffset
|
|
105
|
+
const indexEnd = offsets[offsets.length - 1]
|
|
106
|
+
const indexBuffer = await this.readRange(indexStart, indexEnd)
|
|
107
|
+
const indexData = await serializer.deserialize(indexBuffer)
|
|
108
|
+
|
|
109
|
+
// Write offsets to separate file
|
|
110
|
+
const offsetsString = await serializer.serialize(dataOffsets, { linebreak: false })
|
|
111
|
+
await this.writeOffsets(offsetsString)
|
|
112
|
+
|
|
113
|
+
// Write index to separate file
|
|
114
|
+
const indexString = await serializer.serialize(indexData, { linebreak: false })
|
|
115
|
+
await this.writeIndex(indexString)
|
|
116
|
+
|
|
117
|
+
// Truncate main file to remove index and offsets
|
|
118
|
+
await this.truncate(indexOffset)
|
|
119
|
+
|
|
120
|
+
console.log('Migration completed successfully!')
|
|
121
|
+
return true
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
async readRange(start, end) {
|
|
125
|
+
// Check if file exists before trying to read it
|
|
126
|
+
if (!await this.exists()) {
|
|
127
|
+
return Buffer.alloc(0) // Return empty buffer if file doesn't exist
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
let fd = await fs.promises.open(this.file, 'r')
|
|
131
|
+
try {
|
|
132
|
+
// CRITICAL FIX: Check file size before attempting to read
|
|
133
|
+
const stats = await fd.stat()
|
|
134
|
+
const fileSize = stats.size
|
|
135
|
+
|
|
136
|
+
// If start position is beyond file size, return empty buffer
|
|
137
|
+
if (start >= fileSize) {
|
|
138
|
+
await fd.close()
|
|
139
|
+
return Buffer.alloc(0)
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Adjust end position if it's beyond file size
|
|
143
|
+
const actualEnd = Math.min(end, fileSize)
|
|
144
|
+
const length = actualEnd - start
|
|
145
|
+
|
|
146
|
+
// If length is 0 or negative, return empty buffer
|
|
147
|
+
if (length <= 0) {
|
|
148
|
+
await fd.close()
|
|
149
|
+
return Buffer.alloc(0)
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
let buffer = Buffer.alloc(length)
|
|
153
|
+
const { bytesRead } = await fd.read(buffer, 0, length, start)
|
|
154
|
+
await fd.close()
|
|
155
|
+
|
|
156
|
+
// CRITICAL FIX: Ensure we read the expected amount of data
|
|
157
|
+
if (bytesRead !== length) {
|
|
158
|
+
const errorMsg = `CRITICAL: Expected to read ${length} bytes, but read ${bytesRead} bytes at position ${start}`
|
|
159
|
+
console.error(`⚠️ ${errorMsg}`)
|
|
160
|
+
|
|
161
|
+
// This indicates a race condition or file corruption
|
|
162
|
+
// Don't retry - the caller should handle synchronization properly
|
|
163
|
+
if (bytesRead === 0) {
|
|
164
|
+
throw new Error(`File corruption detected: ${errorMsg}`)
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Return partial data with warning - caller should handle this
|
|
168
|
+
return buffer.subarray(0, bytesRead)
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return buffer
|
|
172
|
+
} catch (error) {
|
|
173
|
+
await fd.close().catch(() => {})
|
|
174
|
+
throw error
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
async readRanges(ranges, mapper) {
|
|
179
|
+
const lines = {}
|
|
180
|
+
|
|
181
|
+
// Check if file exists before trying to read it
|
|
182
|
+
if (!await this.exists()) {
|
|
183
|
+
return lines // Return empty object if file doesn't exist
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
const fd = await fs.promises.open(this.file, 'r')
|
|
187
|
+
const groupedRanges = await this.groupedRanges(ranges)
|
|
188
|
+
try {
|
|
189
|
+
await Promise.allSettled(groupedRanges.map(async (groupedRange) => {
|
|
190
|
+
await this.readLimiter(async () => {
|
|
191
|
+
for await (const row of this.readGroupedRange(groupedRange, fd)) {
|
|
192
|
+
lines[row.start] = mapper ? (await mapper(row.line, { start: row.start, end: row.start + row.line.length })) : row.line
|
|
193
|
+
}
|
|
194
|
+
})
|
|
195
|
+
}))
|
|
196
|
+
} catch (e) {
|
|
197
|
+
console.error('Error reading ranges:', e)
|
|
198
|
+
} finally {
|
|
199
|
+
await fd.close()
|
|
200
|
+
}
|
|
201
|
+
return lines
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
async groupedRanges(ranges) { // expects ordered ranges from Database.getRanges()
|
|
205
|
+
const readSize = 512 * 1024 // 512KB
|
|
206
|
+
const groupedRanges = []
|
|
207
|
+
let currentGroup = []
|
|
208
|
+
let currentSize = 0
|
|
209
|
+
|
|
210
|
+
// each range is a {start: number, end: number} object
|
|
211
|
+
for (let i = 0; i < ranges.length; i++) {
|
|
212
|
+
const range = ranges[i]
|
|
213
|
+
const rangeSize = range.end - range.start
|
|
214
|
+
|
|
215
|
+
if (currentGroup.length > 0) {
|
|
216
|
+
const lastRange = currentGroup[currentGroup.length - 1]
|
|
217
|
+
if (lastRange.end !== range.start || currentSize + rangeSize > readSize) {
|
|
218
|
+
groupedRanges.push(currentGroup)
|
|
219
|
+
currentGroup = []
|
|
220
|
+
currentSize = 0
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
currentGroup.push(range)
|
|
225
|
+
currentSize += rangeSize
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
if (currentGroup.length > 0) {
|
|
229
|
+
groupedRanges.push(currentGroup)
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
return groupedRanges
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
async *readGroupedRange(groupedRange, fd) {
|
|
236
|
+
if (groupedRange.length === 0) return
|
|
237
|
+
|
|
238
|
+
// OPTIMIZATION: For single range, use direct approach
|
|
239
|
+
if (groupedRange.length === 1) {
|
|
240
|
+
const range = groupedRange[0]
|
|
241
|
+
const bufferSize = range.end - range.start
|
|
242
|
+
|
|
243
|
+
if (bufferSize <= 0 || bufferSize > this.maxBufferSize) {
|
|
244
|
+
throw new Error(`Invalid buffer size: ${bufferSize}. Start: ${range.start}, End: ${range.end}. Max allowed: ${this.maxBufferSize}`)
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
const buffer = Buffer.allocUnsafe(bufferSize)
|
|
248
|
+
const { bytesRead } = await fd.read(buffer, 0, bufferSize, range.start)
|
|
249
|
+
const actualBuffer = bytesRead < bufferSize ? buffer.subarray(0, bytesRead) : buffer
|
|
250
|
+
|
|
251
|
+
if (actualBuffer.length === 0) return
|
|
252
|
+
|
|
253
|
+
let lineString
|
|
254
|
+
try {
|
|
255
|
+
lineString = actualBuffer.toString('utf8')
|
|
256
|
+
} catch (error) {
|
|
257
|
+
lineString = actualBuffer.toString('utf8', { replacement: '?' })
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// CRITICAL FIX: Remove trailing newlines and whitespace for single range too
|
|
261
|
+
// Optimized: Use trimEnd() which efficiently removes all trailing whitespace (faster than manual checks)
|
|
262
|
+
lineString = lineString.trimEnd()
|
|
263
|
+
|
|
264
|
+
yield {
|
|
265
|
+
line: lineString,
|
|
266
|
+
start: range.start,
|
|
267
|
+
_: range.index !== undefined ? range.index : (range._ || null)
|
|
268
|
+
}
|
|
269
|
+
return
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
// OPTIMIZATION: For multiple ranges, read as single buffer and split by offsets
|
|
273
|
+
const firstRange = groupedRange[0]
|
|
274
|
+
const lastRange = groupedRange[groupedRange.length - 1]
|
|
275
|
+
const totalSize = lastRange.end - firstRange.start
|
|
276
|
+
|
|
277
|
+
if (totalSize <= 0 || totalSize > this.maxBufferSize) {
|
|
278
|
+
throw new Error(`Invalid total buffer size: ${totalSize}. Start: ${firstRange.start}, End: ${lastRange.end}. Max allowed: ${this.maxBufferSize}`)
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// Read entire grouped range as single buffer
|
|
282
|
+
const buffer = Buffer.allocUnsafe(totalSize)
|
|
283
|
+
const { bytesRead } = await fd.read(buffer, 0, totalSize, firstRange.start)
|
|
284
|
+
const actualBuffer = bytesRead < totalSize ? buffer.subarray(0, bytesRead) : buffer
|
|
285
|
+
|
|
286
|
+
if (actualBuffer.length === 0) return
|
|
287
|
+
|
|
288
|
+
// Convert to string once
|
|
289
|
+
let content
|
|
290
|
+
try {
|
|
291
|
+
content = actualBuffer.toString('utf8')
|
|
292
|
+
} catch (error) {
|
|
293
|
+
content = actualBuffer.toString('utf8', { replacement: '?' })
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// CRITICAL FIX: Handle ranges more carefully to prevent corruption
|
|
297
|
+
if (groupedRange.length === 2 && groupedRange[0].end === groupedRange[1].start) {
|
|
298
|
+
// Special case: Adjacent ranges - split by newlines to prevent corruption
|
|
299
|
+
const lines = content.split('\n').filter(line => line.trim().length > 0)
|
|
300
|
+
|
|
301
|
+
for (let i = 0; i < Math.min(lines.length, groupedRange.length); i++) {
|
|
302
|
+
const range = groupedRange[i]
|
|
303
|
+
yield {
|
|
304
|
+
line: lines[i],
|
|
305
|
+
start: range.start,
|
|
306
|
+
_: range.index !== undefined ? range.index : (range._ || null)
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
} else {
|
|
310
|
+
// CRITICAL FIX: For non-adjacent ranges, use the range.end directly
|
|
311
|
+
// because range.end already excludes the newline (calculated as offsets[n+1] - 1)
|
|
312
|
+
// We just need to find the line start (beginning of the line in the buffer)
|
|
313
|
+
for (let i = 0; i < groupedRange.length; i++) {
|
|
314
|
+
const range = groupedRange[i]
|
|
315
|
+
const relativeStart = range.start - firstRange.start
|
|
316
|
+
const relativeEnd = range.end - firstRange.start
|
|
317
|
+
|
|
318
|
+
// OPTIMIZATION 2: Find line start only if necessary
|
|
319
|
+
// Check if we're already at a line boundary to avoid unnecessary backwards search
|
|
320
|
+
let lineStart = relativeStart
|
|
321
|
+
if (relativeStart > 0 && content[relativeStart - 1] !== '\n') {
|
|
322
|
+
// Only search backwards if we're not already at a line boundary
|
|
323
|
+
while (lineStart > 0 && content[lineStart - 1] !== '\n') {
|
|
324
|
+
lineStart--
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
// OPTIMIZATION 3: Use slice() instead of substring() for better performance
|
|
329
|
+
// CRITICAL FIX: range.end = offsets[n+1] - 1 points to the newline character
|
|
330
|
+
// slice(start, end) includes characters from start to end-1 (end is exclusive)
|
|
331
|
+
// So if relativeEnd points to the newline, slice will include it
|
|
332
|
+
let rangeContent = content.slice(lineStart, relativeEnd)
|
|
333
|
+
|
|
334
|
+
// OPTIMIZATION 4: Direct character check instead of regex/trimEnd
|
|
335
|
+
// Remove trailing newlines and whitespace efficiently
|
|
336
|
+
// trimEnd() is actually optimized in V8, but we can check if there's anything to trim first
|
|
337
|
+
const len = rangeContent.length
|
|
338
|
+
if (len > 0) {
|
|
339
|
+
// Quick check: if last char is not whitespace, skip trimEnd
|
|
340
|
+
const lastChar = rangeContent[len - 1]
|
|
341
|
+
if (lastChar === '\n' || lastChar === '\r' || lastChar === ' ' || lastChar === '\t') {
|
|
342
|
+
// Only call trimEnd if we detected trailing whitespace
|
|
343
|
+
rangeContent = rangeContent.trimEnd()
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
if (rangeContent.length === 0) continue
|
|
348
|
+
|
|
349
|
+
yield {
|
|
350
|
+
line: rangeContent,
|
|
351
|
+
start: range.start,
|
|
352
|
+
_: range.index !== undefined ? range.index : (range._ || null)
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
async *walk(ranges) {
|
|
359
|
+
// Check if file exists before trying to read it
|
|
360
|
+
if (!await this.exists()) {
|
|
361
|
+
return // Return empty generator if file doesn't exist
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
const fd = await fs.promises.open(this.file, 'r')
|
|
365
|
+
try {
|
|
366
|
+
const groupedRanges = await this.groupedRanges(ranges)
|
|
367
|
+
for(const groupedRange of groupedRanges) {
|
|
368
|
+
for await (const row of this.readGroupedRange(groupedRange, fd)) {
|
|
369
|
+
yield row
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
} finally {
|
|
373
|
+
await fd.close()
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
async replaceLines(ranges, lines) {
|
|
378
|
+
// CRITICAL: Always use file mutex to prevent concurrent file operations
|
|
379
|
+
if (this.fileMutex) {
|
|
380
|
+
return this.fileMutex.runExclusive(async () => {
|
|
381
|
+
// Add a small delay to ensure any pending operations complete
|
|
382
|
+
await new Promise(resolve => setTimeout(resolve, 10));
|
|
383
|
+
return this._replaceLinesInternal(ranges, lines);
|
|
384
|
+
});
|
|
385
|
+
} else {
|
|
386
|
+
return this._replaceLinesInternal(ranges, lines);
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
async _replaceLinesInternal(ranges, lines) {
|
|
391
|
+
const tmpFile = this.file + '.tmp';
|
|
392
|
+
let writer, reader;
|
|
393
|
+
|
|
394
|
+
try {
|
|
395
|
+
writer = await fs.promises.open(tmpFile, 'w+');
|
|
396
|
+
|
|
397
|
+
// Check if the main file exists before trying to read it
|
|
398
|
+
if (await this.exists()) {
|
|
399
|
+
reader = await fs.promises.open(this.file, 'r');
|
|
400
|
+
} else {
|
|
401
|
+
// If file doesn't exist, we'll just write the new lines
|
|
402
|
+
reader = null;
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
// Sort ranges by start position to ensure correct order
|
|
406
|
+
const sortedRanges = [...ranges].sort((a, b) => a.start - b.start);
|
|
407
|
+
|
|
408
|
+
let position = 0;
|
|
409
|
+
let lineIndex = 0;
|
|
410
|
+
|
|
411
|
+
for (const range of sortedRanges) {
|
|
412
|
+
// Write existing content before the range (only if file exists)
|
|
413
|
+
if (reader && position < range.start) {
|
|
414
|
+
const buffer = await this.readRange(position, range.start);
|
|
415
|
+
await writer.write(buffer);
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
// Write new line if provided, otherwise skip the range (for delete operations)
|
|
419
|
+
if (lineIndex < lines.length && lines[lineIndex]) {
|
|
420
|
+
const line = lines[lineIndex];
|
|
421
|
+
// Ensure line ends with newline
|
|
422
|
+
let formattedBuffer;
|
|
423
|
+
if (Buffer.isBuffer(line)) {
|
|
424
|
+
const needsNewline = line.length === 0 || line[line.length - 1] !== 0x0A;
|
|
425
|
+
formattedBuffer = needsNewline ? Buffer.concat([line, Buffer.from('\n')]) : line;
|
|
426
|
+
} else {
|
|
427
|
+
const withNewline = line.endsWith('\n') ? line : line + '\n';
|
|
428
|
+
formattedBuffer = Buffer.from(withNewline, 'utf8');
|
|
429
|
+
}
|
|
430
|
+
await writer.write(formattedBuffer);
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
// Update position to range.end to avoid overlapping writes
|
|
434
|
+
position = range.end;
|
|
435
|
+
lineIndex++;
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
// Write remaining content after the last range (only if file exists)
|
|
439
|
+
if (reader) {
|
|
440
|
+
const { size } = await reader.stat();
|
|
441
|
+
if (position < size) {
|
|
442
|
+
const buffer = await this.readRange(position, size);
|
|
443
|
+
await writer.write(buffer);
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// Ensure all data is written to disk
|
|
448
|
+
await writer.sync();
|
|
449
|
+
if (reader) await reader.close();
|
|
450
|
+
await writer.close();
|
|
451
|
+
|
|
452
|
+
// Validate the temp file before renaming
|
|
453
|
+
await this._validateTempFile(tmpFile);
|
|
454
|
+
|
|
455
|
+
// CRITICAL: Retry logic for Windows EPERM errors
|
|
456
|
+
await this._safeRename(tmpFile, this.file);
|
|
457
|
+
|
|
458
|
+
} catch (e) {
|
|
459
|
+
console.error('Erro ao substituir linhas:', e);
|
|
460
|
+
throw e;
|
|
461
|
+
} finally {
|
|
462
|
+
if (reader) await reader.close().catch(() => { });
|
|
463
|
+
if (writer) await writer.close().catch(() => { });
|
|
464
|
+
await fs.promises.unlink(tmpFile).catch(() => { });
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
async _safeRename(tmpFile, targetFile, maxRetries = 3) {
|
|
469
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
470
|
+
try {
|
|
471
|
+
await fs.promises.rename(tmpFile, targetFile);
|
|
472
|
+
return; // Success
|
|
473
|
+
} catch (error) {
|
|
474
|
+
if (error.code === 'EPERM' && attempt < maxRetries) {
|
|
475
|
+
// Quick delay: 50ms, 100ms, 200ms
|
|
476
|
+
const delay = 50 * attempt;
|
|
477
|
+
console.log(`🔄 EPERM retry ${attempt}/${maxRetries}, waiting ${delay}ms...`);
|
|
478
|
+
await new Promise(resolve => setTimeout(resolve, delay));
|
|
479
|
+
continue;
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
// If all retries failed, try Windows fallback approach
|
|
483
|
+
if (error.code === 'EPERM' && attempt === maxRetries) {
|
|
484
|
+
console.log(`⚠️ All EPERM retries failed, trying Windows fallback...`);
|
|
485
|
+
return this._windowsFallbackRename(tmpFile, targetFile);
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
throw error; // Re-throw if not EPERM or max retries reached
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
async _validateTempFile(tmpFile) {
|
|
494
|
+
try {
|
|
495
|
+
// Read the temp file and validate JSON structure
|
|
496
|
+
const content = await fs.promises.readFile(tmpFile, 'utf8');
|
|
497
|
+
const lines = content.split('\n').filter(line => line.trim());
|
|
498
|
+
|
|
499
|
+
let hasInvalidJson = false;
|
|
500
|
+
const validLines = [];
|
|
501
|
+
|
|
502
|
+
for (let i = 0; i < lines.length; i++) {
|
|
503
|
+
try {
|
|
504
|
+
JSON.parse(lines[i]);
|
|
505
|
+
validLines.push(lines[i]);
|
|
506
|
+
} catch (error) {
|
|
507
|
+
console.warn(`⚠️ Invalid JSON in temp file at line ${i + 1}, skipping:`, lines[i].substring(0, 100));
|
|
508
|
+
hasInvalidJson = true;
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
// If we found invalid JSON, rewrite the file with only valid lines
|
|
513
|
+
if (hasInvalidJson && validLines.length > 0) {
|
|
514
|
+
console.log(`🔧 Rewriting temp file with ${validLines.length} valid lines`);
|
|
515
|
+
const correctedContent = validLines.join('\n') + '\n';
|
|
516
|
+
await fs.promises.writeFile(tmpFile, correctedContent, 'utf8');
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
console.log(`✅ Temp file validation passed: ${validLines.length} valid JSON lines`);
|
|
520
|
+
} catch (error) {
|
|
521
|
+
console.error(`❌ Temp file validation failed:`, error.message);
|
|
522
|
+
throw error;
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
async _windowsFallbackRename(tmpFile, targetFile) {
|
|
527
|
+
try {
|
|
528
|
+
// Windows fallback: copy content instead of rename
|
|
529
|
+
console.log(`🔄 Using Windows fallback: copy + delete approach`);
|
|
530
|
+
|
|
531
|
+
// Validate temp file before copying
|
|
532
|
+
await this._validateTempFile(tmpFile);
|
|
533
|
+
|
|
534
|
+
// Read the temp file content
|
|
535
|
+
const content = await fs.promises.readFile(tmpFile, 'utf8');
|
|
536
|
+
|
|
537
|
+
// Write to target file directly
|
|
538
|
+
await fs.promises.writeFile(targetFile, content, 'utf8');
|
|
539
|
+
|
|
540
|
+
// Delete temp file
|
|
541
|
+
await fs.promises.unlink(tmpFile);
|
|
542
|
+
|
|
543
|
+
console.log(`✅ Windows fallback successful`);
|
|
544
|
+
return;
|
|
545
|
+
} catch (fallbackError) {
|
|
546
|
+
console.error(`❌ Windows fallback also failed:`, fallbackError);
|
|
547
|
+
throw fallbackError;
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
async writeData(data, immediate, fd) {
|
|
552
|
+
await fd.write(data)
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
async writeDataAsync(data) {
|
|
556
|
+
// CRITICAL FIX: Ensure directory exists before writing
|
|
557
|
+
const dir = path.dirname(this.file)
|
|
558
|
+
await fs.promises.mkdir(dir, { recursive: true })
|
|
559
|
+
|
|
560
|
+
await fs.promises.appendFile(this.file, data)
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
/**
|
|
564
|
+
* Check if data appears to be binary (always false since we only use JSON now)
|
|
565
|
+
*/
|
|
566
|
+
isBinaryData(data) {
|
|
567
|
+
// All data is now JSON format
|
|
568
|
+
return false
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
/**
|
|
572
|
+
* Check if file is binary (always false since we only use JSON now)
|
|
573
|
+
*/
|
|
574
|
+
async isBinaryFile() {
|
|
575
|
+
// All files are now JSON format
|
|
576
|
+
return false
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
async readLastLine() {
|
|
580
|
+
// Use global read limiter to prevent file descriptor exhaustion
|
|
581
|
+
return this.readLimiter(async () => {
|
|
582
|
+
// Check if file exists before trying to read it
|
|
583
|
+
if (!await this.exists()) {
|
|
584
|
+
return null // Return null if file doesn't exist
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
const reader = await fs.promises.open(this.file, 'r')
|
|
588
|
+
try {
|
|
589
|
+
const { size } = await reader.stat()
|
|
590
|
+
if (size < 1) throw 'empty file'
|
|
591
|
+
this.size = size
|
|
592
|
+
const bufferSize = 16384
|
|
593
|
+
let buffer, isFirstRead = true, lastReadSize, readPosition = Math.max(size - bufferSize, 0)
|
|
594
|
+
while (readPosition >= 0) {
|
|
595
|
+
const readSize = Math.min(bufferSize, size - readPosition)
|
|
596
|
+
if (readSize !== lastReadSize) {
|
|
597
|
+
lastReadSize = readSize
|
|
598
|
+
buffer = Buffer.alloc(readSize)
|
|
599
|
+
}
|
|
600
|
+
const { bytesRead } = await reader.read(buffer, 0, isFirstRead ? (readSize - 1) : readSize, readPosition)
|
|
601
|
+
if (isFirstRead) isFirstRead = false
|
|
602
|
+
if (bytesRead === 0) break
|
|
603
|
+
const newlineIndex = buffer.lastIndexOf(10)
|
|
604
|
+
const start = readPosition + newlineIndex + 1
|
|
605
|
+
if (newlineIndex !== -1) {
|
|
606
|
+
const lastLine = Buffer.alloc(size - start)
|
|
607
|
+
await reader.read(lastLine, 0, size - start, start)
|
|
608
|
+
if (!lastLine || !lastLine.length) {
|
|
609
|
+
throw 'no metadata or empty file'
|
|
610
|
+
}
|
|
611
|
+
return lastLine
|
|
612
|
+
} else {
|
|
613
|
+
readPosition -= bufferSize
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
} catch (e) {
|
|
617
|
+
String(e).includes('empty file') || console.error('Error reading last line:', e)
|
|
618
|
+
} finally {
|
|
619
|
+
reader.close()
|
|
620
|
+
}
|
|
621
|
+
})
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
/**
|
|
625
|
+
* Read records with streaming using readline
|
|
626
|
+
* @param {Object} criteria - Filter criteria
|
|
627
|
+
* @param {Object} options - Options (limit, skip)
|
|
628
|
+
* @param {Function} matchesCriteria - Function to check if record matches criteria
|
|
629
|
+
* @returns {Promise<Array>} - Array of records
|
|
630
|
+
*/
|
|
631
|
+
async readWithStreaming(criteria, options = {}, matchesCriteria, serializer = null) {
|
|
632
|
+
// CRITICAL: Always use file mutex to prevent concurrent file operations
|
|
633
|
+
if (this.fileMutex) {
|
|
634
|
+
return this.fileMutex.runExclusive(async () => {
|
|
635
|
+
// Add a small delay to ensure any pending operations complete
|
|
636
|
+
await new Promise(resolve => setTimeout(resolve, 5));
|
|
637
|
+
// Use global read limiter to prevent file descriptor exhaustion
|
|
638
|
+
return this.readLimiter(() => this._readWithStreamingInternal(criteria, options, matchesCriteria, serializer));
|
|
639
|
+
});
|
|
640
|
+
} else {
|
|
641
|
+
// Use global read limiter to prevent file descriptor exhaustion
|
|
642
|
+
return this.readLimiter(() => this._readWithStreamingInternal(criteria, options, matchesCriteria, serializer));
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
async _readWithStreamingInternal(criteria, options = {}, matchesCriteria, serializer = null) {
|
|
647
|
+
const { limit, skip = 0 } = options; // No default limit
|
|
648
|
+
const results = [];
|
|
649
|
+
let lineNumber = 0;
|
|
650
|
+
let processed = 0;
|
|
651
|
+
let skipped = 0;
|
|
652
|
+
let matched = 0;
|
|
653
|
+
|
|
654
|
+
try {
|
|
655
|
+
// Check if file exists before trying to read it
|
|
656
|
+
if (!await this.exists()) {
|
|
657
|
+
return results; // Return empty results if file doesn't exist
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
// All files are now JSONL format - use line-by-line reading
|
|
661
|
+
// Create optimized read stream
|
|
662
|
+
const stream = fs.createReadStream(this.file, {
|
|
663
|
+
highWaterMark: 64 * 1024, // 64KB chunks
|
|
664
|
+
encoding: 'utf8'
|
|
665
|
+
});
|
|
666
|
+
|
|
667
|
+
// Create readline interface
|
|
668
|
+
const rl = readline.createInterface({
|
|
669
|
+
input: stream,
|
|
670
|
+
crlfDelay: Infinity // Better performance
|
|
671
|
+
});
|
|
672
|
+
|
|
673
|
+
// Process line by line
|
|
674
|
+
for await (const line of rl) {
|
|
675
|
+
if (lineNumber >= skip) {
|
|
676
|
+
try {
|
|
677
|
+
let record;
|
|
678
|
+
if (serializer && typeof serializer.deserialize === 'function') {
|
|
679
|
+
// Use serializer for deserialization
|
|
680
|
+
record = serializer.deserialize(line);
|
|
681
|
+
} else {
|
|
682
|
+
// Fallback to JSON.parse for backward compatibility
|
|
683
|
+
record = JSON.parse(line);
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
if (record && matchesCriteria(record, criteria)) {
|
|
687
|
+
// Return raw data - term mapping will be handled by Database layer
|
|
688
|
+
results.push({ ...record, _: lineNumber });
|
|
689
|
+
matched++;
|
|
690
|
+
|
|
691
|
+
// Check if we've reached the limit
|
|
692
|
+
if (results.length >= limit) {
|
|
693
|
+
break;
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
} catch (error) {
|
|
697
|
+
// CRITICAL FIX: Only log errors if they're not expected during concurrent operations
|
|
698
|
+
// Don't log JSON parsing errors that occur during file writes
|
|
699
|
+
if (this.opts && this.opts.debugMode && !error.message.includes('Unexpected')) {
|
|
700
|
+
console.log(`Error reading line ${lineNumber}:`, error.message);
|
|
701
|
+
}
|
|
702
|
+
// Ignore invalid lines - they may be partial writes
|
|
703
|
+
}
|
|
704
|
+
} else {
|
|
705
|
+
skipped++;
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
lineNumber++;
|
|
709
|
+
processed++;
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
if (this.opts && this.opts.debugMode) {
|
|
713
|
+
console.log(`📊 Streaming read completed: ${results.length} results, ${processed} processed, ${skipped} skipped, ${matched} matched`);
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
return results;
|
|
717
|
+
|
|
718
|
+
} catch (error) {
|
|
719
|
+
console.error('Error in readWithStreaming:', error);
|
|
720
|
+
throw error;
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
/**
|
|
725
|
+
* Count records with streaming
|
|
726
|
+
* @param {Object} criteria - Filter criteria
|
|
727
|
+
* @param {Object} options - Options (limit)
|
|
728
|
+
* @param {Function} matchesCriteria - Function to check if record matches criteria
|
|
729
|
+
* @returns {Promise<number>} - Number of records
|
|
730
|
+
*/
|
|
731
|
+
async countWithStreaming(criteria, options = {}, matchesCriteria, serializer = null) {
|
|
732
|
+
const { limit } = options;
|
|
733
|
+
let count = 0;
|
|
734
|
+
let processed = 0;
|
|
735
|
+
|
|
736
|
+
try {
|
|
737
|
+
const stream = fs.createReadStream(this.file, {
|
|
738
|
+
highWaterMark: 64 * 1024,
|
|
739
|
+
encoding: 'utf8'
|
|
740
|
+
});
|
|
741
|
+
|
|
742
|
+
const rl = readline.createInterface({
|
|
743
|
+
input: stream,
|
|
744
|
+
crlfDelay: Infinity
|
|
745
|
+
});
|
|
746
|
+
|
|
747
|
+
for await (const line of rl) {
|
|
748
|
+
if (limit && count >= limit) {
|
|
749
|
+
break;
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
try {
|
|
753
|
+
let record;
|
|
754
|
+
if (serializer) {
|
|
755
|
+
// Use serializer for deserialization
|
|
756
|
+
record = await serializer.deserialize(line);
|
|
757
|
+
} else {
|
|
758
|
+
// Fallback to JSON.parse for backward compatibility
|
|
759
|
+
record = JSON.parse(line);
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
if (record && matchesCriteria(record, criteria)) {
|
|
763
|
+
count++;
|
|
764
|
+
}
|
|
765
|
+
} catch (error) {
|
|
766
|
+
// Ignore invalid lines
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
processed++;
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
return count;
|
|
773
|
+
|
|
774
|
+
} catch (error) {
|
|
775
|
+
throw error;
|
|
776
|
+
}
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
/**
|
|
780
|
+
* Get file statistics
|
|
781
|
+
* @returns {Promise<Object>} - File statistics
|
|
782
|
+
*/
|
|
783
|
+
async getFileStats() {
|
|
784
|
+
try {
|
|
785
|
+
const stats = await fs.promises.stat(this.file);
|
|
786
|
+
const lineCount = await this.countLines();
|
|
787
|
+
|
|
788
|
+
return {
|
|
789
|
+
filePath: this.file,
|
|
790
|
+
size: stats.size,
|
|
791
|
+
lineCount,
|
|
792
|
+
lastModified: stats.mtime
|
|
793
|
+
};
|
|
794
|
+
} catch (error) {
|
|
795
|
+
throw error;
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
/**
|
|
800
|
+
* Count lines in file
|
|
801
|
+
* @returns {Promise<number>} - Number of lines
|
|
802
|
+
*/
|
|
803
|
+
async countLines() {
|
|
804
|
+
let lineCount = 0;
|
|
805
|
+
|
|
806
|
+
try {
|
|
807
|
+
const stream = fs.createReadStream(this.file, {
|
|
808
|
+
highWaterMark: 64 * 1024,
|
|
809
|
+
encoding: 'utf8'
|
|
810
|
+
});
|
|
811
|
+
|
|
812
|
+
const rl = readline.createInterface({
|
|
813
|
+
input: stream,
|
|
814
|
+
crlfDelay: Infinity
|
|
815
|
+
});
|
|
816
|
+
|
|
817
|
+
for await (const line of rl) {
|
|
818
|
+
lineCount++;
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
return lineCount;
|
|
822
|
+
} catch (error) {
|
|
823
|
+
throw error;
|
|
824
|
+
}
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
async destroy() {
|
|
828
|
+
// CRITICAL FIX: Close all file handles to prevent resource leaks
|
|
829
|
+
try {
|
|
830
|
+
// Close any open file descriptors
|
|
831
|
+
if (this.fd) {
|
|
832
|
+
await this.fd.close().catch(() => {})
|
|
833
|
+
this.fd = null
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
// Close any open readers/writers
|
|
837
|
+
if (this.reader) {
|
|
838
|
+
await this.reader.close().catch(() => {})
|
|
839
|
+
this.reader = null
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
if (this.writer) {
|
|
843
|
+
await this.writer.close().catch(() => {})
|
|
844
|
+
this.writer = null
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
// Clear any cached file handles
|
|
848
|
+
this.cachedFd = null
|
|
849
|
+
|
|
850
|
+
} catch (error) {
|
|
851
|
+
// Ignore errors during cleanup
|
|
852
|
+
}
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
async delete() {
|
|
856
|
+
try {
|
|
857
|
+
// Delete main file
|
|
858
|
+
await fs.promises.unlink(this.file).catch(() => {})
|
|
859
|
+
|
|
860
|
+
// Delete index file (which now contains both index and offsets data)
|
|
861
|
+
await fs.promises.unlink(this.indexFile).catch(() => {})
|
|
862
|
+
} catch (error) {
|
|
863
|
+
// Ignore errors if files don't exist
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
async writeAll(data) {
|
|
868
|
+
const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
|
|
869
|
+
try {
|
|
870
|
+
// Use Windows-specific retry logic for file operations
|
|
871
|
+
await this._writeWithRetry(data)
|
|
872
|
+
} finally {
|
|
873
|
+
release()
|
|
874
|
+
}
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
/**
|
|
878
|
+
* Optimized batch write operation (OPTIMIZATION)
|
|
879
|
+
* @param {Array} dataChunks - Array of data chunks to write
|
|
880
|
+
* @param {boolean} append - Whether to append or overwrite
|
|
881
|
+
*/
|
|
882
|
+
async writeBatch(dataChunks, append = false) {
|
|
883
|
+
if (!dataChunks || !dataChunks.length) return
|
|
884
|
+
|
|
885
|
+
const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
|
|
886
|
+
try {
|
|
887
|
+
// OPTIMIZATION: Use streaming write for better performance
|
|
888
|
+
if (dataChunks.length === 1 && Buffer.isBuffer(dataChunks[0])) {
|
|
889
|
+
// Single buffer - use direct write
|
|
890
|
+
if (append) {
|
|
891
|
+
await fs.promises.appendFile(this.file, dataChunks[0])
|
|
892
|
+
} else {
|
|
893
|
+
await this._writeFileWithRetry(this.file, dataChunks[0])
|
|
894
|
+
}
|
|
895
|
+
} else {
|
|
896
|
+
// Multiple chunks - use streaming approach
|
|
897
|
+
await this._writeBatchStreaming(dataChunks, append)
|
|
898
|
+
}
|
|
899
|
+
} finally {
|
|
900
|
+
release()
|
|
901
|
+
}
|
|
902
|
+
}
|
|
903
|
+
|
|
904
|
+
/**
|
|
905
|
+
* OPTIMIZATION: Streaming write for multiple chunks
|
|
906
|
+
* @param {Array} dataChunks - Array of data chunks to write
|
|
907
|
+
* @param {boolean} append - Whether to append or overwrite
|
|
908
|
+
*/
|
|
909
|
+
async _writeBatchStreaming(dataChunks, append = false) {
|
|
910
|
+
// OPTIMIZATION: Use createWriteStream for better performance
|
|
911
|
+
const writeStream = fs.createWriteStream(this.file, {
|
|
912
|
+
flags: append ? 'a' : 'w',
|
|
913
|
+
highWaterMark: 64 * 1024 // 64KB buffer
|
|
914
|
+
})
|
|
915
|
+
|
|
916
|
+
return new Promise((resolve, reject) => {
|
|
917
|
+
writeStream.on('error', reject)
|
|
918
|
+
writeStream.on('finish', resolve)
|
|
919
|
+
|
|
920
|
+
// Write chunks sequentially
|
|
921
|
+
let index = 0
|
|
922
|
+
const writeNext = () => {
|
|
923
|
+
if (index >= dataChunks.length) {
|
|
924
|
+
writeStream.end()
|
|
925
|
+
return
|
|
926
|
+
}
|
|
927
|
+
|
|
928
|
+
const chunk = dataChunks[index++]
|
|
929
|
+
const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk, 'utf8')
|
|
930
|
+
|
|
931
|
+
if (!writeStream.write(buffer)) {
|
|
932
|
+
writeStream.once('drain', writeNext)
|
|
933
|
+
} else {
|
|
934
|
+
writeNext()
|
|
935
|
+
}
|
|
936
|
+
}
|
|
937
|
+
|
|
938
|
+
writeNext()
|
|
939
|
+
})
|
|
940
|
+
}
|
|
941
|
+
|
|
942
|
+
/**
|
|
943
|
+
* Optimized append operation for single data chunk (OPTIMIZATION)
|
|
944
|
+
* @param {string|Buffer} data - Data to append
|
|
945
|
+
*/
|
|
946
|
+
async appendOptimized(data) {
|
|
947
|
+
const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
|
|
948
|
+
try {
|
|
949
|
+
// OPTIMIZATION: Direct append without retry logic for better performance
|
|
950
|
+
await fs.promises.appendFile(this.file, data)
|
|
951
|
+
} finally {
|
|
952
|
+
release()
|
|
953
|
+
}
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
/**
|
|
957
|
+
* Windows-specific retry logic for fs.promises.writeFile operations
|
|
958
|
+
* Based on node-graceful-fs workarounds for EPERM issues
|
|
959
|
+
*/
|
|
960
|
+
async _writeFileWithRetry(filePath, data, maxRetries = 3) {
|
|
961
|
+
const isWindows = process.platform === 'win32'
|
|
962
|
+
|
|
963
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
964
|
+
try {
|
|
965
|
+
// Ensure data is properly formatted as string or buffer
|
|
966
|
+
if (Buffer.isBuffer(data)) {
|
|
967
|
+
await fs.promises.writeFile(filePath, data)
|
|
968
|
+
} else {
|
|
969
|
+
await fs.promises.writeFile(filePath, data.toString())
|
|
970
|
+
}
|
|
971
|
+
|
|
972
|
+
// Windows: add small delay after write operation
|
|
973
|
+
// This helps prevent EPERM issues caused by file handle not being released immediately
|
|
974
|
+
if (isWindows) {
|
|
975
|
+
await new Promise(resolve => setTimeout(resolve, 10))
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
// Success - return immediately
|
|
979
|
+
return
|
|
980
|
+
|
|
981
|
+
} catch (err) {
|
|
982
|
+
// Only retry on EPERM errors on Windows
|
|
983
|
+
if (err.code === 'EPERM' && isWindows && attempt < maxRetries - 1) {
|
|
984
|
+
// Exponential backoff: 10ms, 50ms, 250ms
|
|
985
|
+
const delay = Math.pow(10, attempt + 1)
|
|
986
|
+
await new Promise(resolve => setTimeout(resolve, delay))
|
|
987
|
+
continue
|
|
988
|
+
}
|
|
989
|
+
|
|
990
|
+
// Re-throw if not a retryable error or max retries reached
|
|
991
|
+
throw err
|
|
992
|
+
}
|
|
993
|
+
}
|
|
994
|
+
}
|
|
995
|
+
|
|
996
|
+
/**
|
|
997
|
+
* Windows-specific retry logic for file operations
|
|
998
|
+
* Based on node-graceful-fs workarounds for EPERM issues
|
|
999
|
+
*/
|
|
1000
|
+
async _writeWithRetry(data, maxRetries = 3) {
|
|
1001
|
+
const isWindows = process.platform === 'win32'
|
|
1002
|
+
|
|
1003
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
1004
|
+
try {
|
|
1005
|
+
// CRITICAL FIX: Ensure directory exists before writing file
|
|
1006
|
+
const dir = path.dirname(this.file)
|
|
1007
|
+
await fs.promises.mkdir(dir, { recursive: true })
|
|
1008
|
+
|
|
1009
|
+
const fd = await fs.promises.open(this.file, 'w')
|
|
1010
|
+
try {
|
|
1011
|
+
// Ensure data is properly formatted as string or buffer
|
|
1012
|
+
if (Buffer.isBuffer(data)) {
|
|
1013
|
+
await fd.write(data)
|
|
1014
|
+
} else {
|
|
1015
|
+
await fd.write(data.toString())
|
|
1016
|
+
}
|
|
1017
|
+
} finally {
|
|
1018
|
+
await fd.close()
|
|
1019
|
+
|
|
1020
|
+
// Windows: add small delay after closing file handle
|
|
1021
|
+
// This helps prevent EPERM issues caused by file handle not being released immediately
|
|
1022
|
+
if (isWindows) {
|
|
1023
|
+
await new Promise(resolve => setTimeout(resolve, 10))
|
|
1024
|
+
}
|
|
1025
|
+
}
|
|
1026
|
+
|
|
1027
|
+
// Success - return immediately
|
|
1028
|
+
return
|
|
1029
|
+
|
|
1030
|
+
} catch (err) {
|
|
1031
|
+
// Only retry on EPERM errors on Windows
|
|
1032
|
+
if (err.code === 'EPERM' && isWindows && attempt < maxRetries - 1) {
|
|
1033
|
+
// Exponential backoff: 10ms, 50ms, 250ms
|
|
1034
|
+
const delay = Math.pow(10, attempt + 1)
|
|
1035
|
+
await new Promise(resolve => setTimeout(resolve, delay))
|
|
1036
|
+
continue
|
|
1037
|
+
}
|
|
1038
|
+
|
|
1039
|
+
// Re-throw if not a retryable error or max retries reached
|
|
1040
|
+
throw err
|
|
1041
|
+
}
|
|
1042
|
+
}
|
|
1043
|
+
}
|
|
1044
|
+
|
|
1045
|
+
async readAll() {
|
|
1046
|
+
const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
|
|
1047
|
+
try {
|
|
1048
|
+
// Check if file exists before trying to read it
|
|
1049
|
+
if (!await this.exists()) {
|
|
1050
|
+
return '' // Return empty string if file doesn't exist
|
|
1051
|
+
}
|
|
1052
|
+
|
|
1053
|
+
const fd = await fs.promises.open(this.file, 'r')
|
|
1054
|
+
try {
|
|
1055
|
+
const stats = await fd.stat()
|
|
1056
|
+
const buffer = Buffer.allocUnsafe(stats.size)
|
|
1057
|
+
await fd.read(buffer, 0, stats.size, 0)
|
|
1058
|
+
return buffer.toString('utf8')
|
|
1059
|
+
} finally {
|
|
1060
|
+
await fd.close()
|
|
1061
|
+
}
|
|
1062
|
+
} finally {
|
|
1063
|
+
release()
|
|
1064
|
+
}
|
|
1065
|
+
}
|
|
1066
|
+
|
|
1067
|
+
/**
|
|
1068
|
+
* Read specific lines from the file using line numbers
|
|
1069
|
+
* This is optimized for partial reads when using indexed queries
|
|
1070
|
+
* @param {number[]} lineNumbers - Array of line numbers to read (1-based)
|
|
1071
|
+
* @returns {Promise<string>} - Content of the specified lines
|
|
1072
|
+
*/
|
|
1073
|
+
async readSpecificLines(lineNumbers) {
|
|
1074
|
+
if (!lineNumbers || lineNumbers.length === 0) {
|
|
1075
|
+
return ''
|
|
1076
|
+
}
|
|
1077
|
+
|
|
1078
|
+
const release = this.fileMutex ? await this.fileMutex.acquire() : () => {}
|
|
1079
|
+
try {
|
|
1080
|
+
// Check if file exists before trying to read it
|
|
1081
|
+
if (!await this.exists()) {
|
|
1082
|
+
return '' // Return empty string if file doesn't exist
|
|
1083
|
+
}
|
|
1084
|
+
|
|
1085
|
+
const fd = await fs.promises.open(this.file, 'r')
|
|
1086
|
+
try {
|
|
1087
|
+
const stats = await fd.stat()
|
|
1088
|
+
const buffer = Buffer.allocUnsafe(stats.size)
|
|
1089
|
+
await fd.read(buffer, 0, stats.size, 0)
|
|
1090
|
+
|
|
1091
|
+
// CRITICAL FIX: Ensure proper UTF-8 decoding for multi-byte characters
|
|
1092
|
+
let content
|
|
1093
|
+
try {
|
|
1094
|
+
content = buffer.toString('utf8')
|
|
1095
|
+
} catch (error) {
|
|
1096
|
+
// If UTF-8 decoding fails, try to recover by finding valid UTF-8 boundaries
|
|
1097
|
+
console.warn(`UTF-8 decoding failed for file ${this.file}, attempting recovery`)
|
|
1098
|
+
|
|
1099
|
+
// Find the last complete UTF-8 character
|
|
1100
|
+
let validLength = buffer.length
|
|
1101
|
+
for (let i = buffer.length - 1; i >= 0; i--) {
|
|
1102
|
+
const byte = buffer[i]
|
|
1103
|
+
// CRITICAL FIX: Correct UTF-8 start character detection
|
|
1104
|
+
// Check if this is the start of a UTF-8 character (not a continuation byte)
|
|
1105
|
+
if ((byte & 0x80) === 0 || // ASCII (1 byte) - 0xxxxxxx
|
|
1106
|
+
(byte & 0xE0) === 0xC0 || // 2-byte UTF-8 start - 110xxxxx
|
|
1107
|
+
(byte & 0xF0) === 0xE0 || // 3-byte UTF-8 start - 1110xxxx
|
|
1108
|
+
(byte & 0xF8) === 0xF0) { // 4-byte UTF-8 start - 11110xxx
|
|
1109
|
+
validLength = i + 1
|
|
1110
|
+
break
|
|
1111
|
+
}
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1114
|
+
const validBuffer = buffer.subarray(0, validLength)
|
|
1115
|
+
content = validBuffer.toString('utf8')
|
|
1116
|
+
}
|
|
1117
|
+
|
|
1118
|
+
// Split content into lines and extract only the requested lines
|
|
1119
|
+
const lines = content.split('\n')
|
|
1120
|
+
const result = []
|
|
1121
|
+
|
|
1122
|
+
for (const lineNum of lineNumbers) {
|
|
1123
|
+
// Convert to 0-based index and check bounds
|
|
1124
|
+
const index = lineNum - 1
|
|
1125
|
+
if (index >= 0 && index < lines.length) {
|
|
1126
|
+
result.push(lines[index])
|
|
1127
|
+
}
|
|
1128
|
+
}
|
|
1129
|
+
|
|
1130
|
+
return result.join('\n')
|
|
1131
|
+
} finally {
|
|
1132
|
+
await fd.close()
|
|
1133
|
+
}
|
|
1134
|
+
} finally {
|
|
1135
|
+
release()
|
|
1136
|
+
}
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
}
|
|
1140
|
+
|