jexidb 1.0.8 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,674 @@
1
+ import { promises as fs } from 'fs';
2
+ import path from 'path';
3
+
4
+ /**
5
+ * FileHandler - Secure file operations for JSONL
6
+ * Handles textLength vs byteLength (UTF-8)
7
+ * Implements safe truncation and guaranteed flush
8
+ */
9
+ class FileHandler {
10
+ constructor(filePath) {
11
+ this.filePath = path.resolve(filePath);
12
+ this.indexPath = this.filePath.replace('.jsonl', '.index.json');
13
+ this.metaPath = this.filePath.replace('.jsonl', '.meta.json');
14
+ this.writeBuffer = [];
15
+ this.isWriting = false;
16
+ }
17
+
18
+ /**
19
+ * Ensures the directory exists
20
+ */
21
+ async ensureDirectory() {
22
+ const dir = path.dirname(this.filePath);
23
+ try {
24
+ await fs.access(dir);
25
+ } catch {
26
+ await fs.mkdir(dir, { recursive: true });
27
+ }
28
+ }
29
+
30
+ /**
31
+ * Calculates the byte length of a UTF-8 string
32
+ */
33
+ getByteLength(str) {
34
+ return Buffer.byteLength(str, 'utf8');
35
+ }
36
+
37
+ /**
38
+ * Serializes an object to JSON with newline
39
+ */
40
+ serialize(obj) {
41
+ return JSON.stringify(obj) + '\n';
42
+ }
43
+
44
+ /**
45
+ * Deserializes a JSON line
46
+ */
47
+ deserialize(line) {
48
+ try {
49
+ const trimmed = line.trim();
50
+ if (!trimmed) {
51
+ throw new Error('Empty line');
52
+ }
53
+ const parsed = JSON.parse(trimmed);
54
+ return parsed;
55
+ } catch (error) {
56
+ // Add more context to the error
57
+ const context = line.length > 100 ? line.substring(0, 100) + '...' : line;
58
+ throw new Error(`Failed to deserialize JSON data: ${context} - ${error.message}`);
59
+ }
60
+ }
61
+
62
+ /**
63
+ * Reads a specific line from the file by offset
64
+ */
65
+ async readLine(offset) {
66
+ try {
67
+ const fd = await fs.open(this.filePath, 'r');
68
+ try {
69
+ // Read until newline is found
70
+ const buffer = Buffer.alloc(1024);
71
+ let line = '';
72
+ let position = offset;
73
+
74
+ while (true) {
75
+ const { bytesRead } = await fd.read(buffer, 0, buffer.length, position);
76
+ if (bytesRead === 0) break;
77
+
78
+ const chunk = buffer.toString('utf8', 0, bytesRead);
79
+ const newlineIndex = chunk.indexOf('\n');
80
+
81
+ if (newlineIndex !== -1) {
82
+ line += chunk.substring(0, newlineIndex);
83
+ break;
84
+ } else {
85
+ line += chunk;
86
+ position += bytesRead;
87
+ }
88
+ }
89
+
90
+ return line;
91
+ } finally {
92
+ await fd.close();
93
+ }
94
+ } catch (error) {
95
+ if (error.code === 'ENOENT') {
96
+ return null; // File doesn't exist
97
+ }
98
+ throw error;
99
+ }
100
+ }
101
+
102
+ /**
103
+ * Reads multiple lines by offsets with batch optimization
104
+ */
105
+ async readLines(offsets) {
106
+ const results = [];
107
+ for (const offset of offsets) {
108
+ const line = await this.readLine(offset);
109
+ if (line !== null) {
110
+ results.push({ offset, line });
111
+ }
112
+ }
113
+ return results;
114
+ }
115
+
116
+ /**
117
+ * Reads multiple lines with intelligent batching for optimal performance
118
+ * Groups consecutive offsets for sequential reads and processes non-consecutive in parallel
119
+ */
120
+ async readLinesBatch(offsets) {
121
+ if (!offsets || offsets.length === 0) {
122
+ return [];
123
+ }
124
+
125
+ // For now, use parallel processing for all reads
126
+ // This is simpler and still provides significant performance improvement
127
+ const promises = offsets.map(offset => this.readLine(offset));
128
+ const results = await Promise.all(promises);
129
+
130
+ return results.filter(line => line !== null);
131
+ }
132
+
133
+ /**
134
+ * Groups offsets into consecutive and non-consecutive batches
135
+ */
136
+ groupConsecutiveOffsets(offsets) {
137
+ if (offsets.length === 0) return [];
138
+
139
+ const sortedOffsets = [...offsets].sort((a, b) => a - b);
140
+ const batches = [];
141
+ let currentBatch = {
142
+ consecutive: true,
143
+ start: sortedOffsets[0],
144
+ count: 1,
145
+ offsets: [sortedOffsets[0]]
146
+ };
147
+
148
+ for (let i = 1; i < sortedOffsets.length; i++) {
149
+ const currentOffset = sortedOffsets[i];
150
+ const expectedOffset = currentBatch.start + (currentBatch.count * this.estimateLineSize());
151
+
152
+ if (currentOffset === expectedOffset) {
153
+ // Consecutive offset
154
+ currentBatch.count++;
155
+ currentBatch.offsets.push(currentOffset);
156
+ } else {
157
+ // Non-consecutive offset, start new batch
158
+ batches.push(currentBatch);
159
+ currentBatch = {
160
+ consecutive: false,
161
+ start: currentOffset,
162
+ count: 1,
163
+ offsets: [currentOffset]
164
+ };
165
+ }
166
+ }
167
+
168
+ batches.push(currentBatch);
169
+ return batches;
170
+ }
171
+
172
+ /**
173
+ * Estimates average line size for consecutive offset calculation
174
+ */
175
+ estimateLineSize() {
176
+ // Default estimate of 200 bytes per line (JSON + newline)
177
+ // This can be improved with actual statistics
178
+ return 200;
179
+ }
180
+
181
+ /**
182
+ * Reads consecutive lines efficiently using a single file read
183
+ */
184
+ async readConsecutiveLines(startOffset, count) {
185
+ try {
186
+ const fd = await fs.open(this.filePath, 'r');
187
+ try {
188
+ // Read a larger buffer to get multiple lines
189
+ const bufferSize = Math.max(8192, count * this.estimateLineSize());
190
+ const buffer = Buffer.alloc(bufferSize);
191
+ const { bytesRead } = await fd.read(buffer, 0, buffer.size, startOffset);
192
+
193
+ if (bytesRead === 0) return [];
194
+
195
+ const content = buffer.toString('utf8', 0, bytesRead);
196
+ const lines = content.split('\n');
197
+
198
+ // Extract the requested number of complete lines
199
+ const results = [];
200
+ for (let i = 0; i < Math.min(count, lines.length - 1); i++) {
201
+ const line = lines[i];
202
+ if (line.trim()) {
203
+ results.push(line);
204
+ }
205
+ }
206
+
207
+ return results;
208
+ } finally {
209
+ await fd.close();
210
+ }
211
+ } catch (error) {
212
+ if (error.code === 'ENOENT') {
213
+ return [];
214
+ }
215
+ throw error;
216
+ }
217
+ }
218
+
219
+ /**
220
+ * Appends a line to the file
221
+ */
222
+ async appendLine(data) {
223
+ await this.ensureDirectory();
224
+ const line = this.serialize(data);
225
+
226
+ // Retry logic for file permission issues
227
+ let retries = 3;
228
+ while (retries > 0) {
229
+ try {
230
+ const fd = await fs.open(this.filePath, 'a');
231
+ try {
232
+ await fd.write(line);
233
+ await fd.sync(); // Ensures flush
234
+ return this.getByteLength(line);
235
+ } finally {
236
+ await fd.close();
237
+ }
238
+ } catch (error) {
239
+ retries--;
240
+ if (error.code === 'EPERM' || error.code === 'EACCES') {
241
+ if (retries > 0) {
242
+ // Wait a bit before retrying
243
+ await new Promise(resolve => setTimeout(resolve, 100));
244
+ continue;
245
+ }
246
+ }
247
+ throw error;
248
+ }
249
+ }
250
+ }
251
+
252
+ /**
253
+ * Appends multiple lines in a single batch operation with optimized buffering
254
+ * Uses chunked writes for better performance with large datasets
255
+ * Accepts both string and Buffer inputs
256
+ */
257
+ async appendBatch(batchData) {
258
+ await this.ensureDirectory();
259
+
260
+ // Convert to Buffer if it's a string, otherwise use as-is
261
+ const buffer = Buffer.isBuffer(batchData) ? batchData : Buffer.from(batchData, 'utf8');
262
+
263
+ // Retry logic for file permission issues
264
+ let retries = 3;
265
+ while (retries > 0) {
266
+ try {
267
+ // Use fs.appendFile for simpler, more reliable batch writing
268
+ await fs.appendFile(this.filePath, buffer);
269
+ return buffer.length;
270
+ } catch (error) {
271
+ retries--;
272
+ if (error.code === 'EPERM' || error.code === 'EACCES') {
273
+ if (retries > 0) {
274
+ // Wait a bit before retrying
275
+ await new Promise(resolve => setTimeout(resolve, 100));
276
+ continue;
277
+ }
278
+ }
279
+ throw error;
280
+ }
281
+ }
282
+ }
283
+
284
+ /**
285
+ * Replaces a specific line
286
+ */
287
+ async replaceLine(offset, data) {
288
+ const newLine = this.serialize(data);
289
+ const newLineBytes = this.getByteLength(newLine);
290
+
291
+ // Read the current line to calculate size
292
+ const oldLine = await this.readLine(offset);
293
+ if (oldLine === null) {
294
+ throw new Error(`Line at offset ${offset} not found`);
295
+ }
296
+
297
+ const oldLineBytes = this.getByteLength(oldLine + '\n');
298
+
299
+ // If the new line is larger, need to rewrite the file
300
+ if (newLineBytes > oldLineBytes) {
301
+ return await this.replaceLineWithRewrite(offset, oldLineBytes, newLine);
302
+ } else {
303
+ // Can overwrite directly
304
+ return await this.replaceLineInPlace(offset, oldLineBytes, newLine);
305
+ }
306
+ }
307
+
308
+ /**
309
+ * Replaces line by overwriting in place (when new line is smaller or equal)
310
+ */
311
+ async replaceLineInPlace(offset, oldLineBytes, newLine) {
312
+ const fd = await fs.open(this.filePath, 'r+');
313
+ try {
314
+ await fd.write(newLine, 0, newLine.length, offset);
315
+
316
+ // If the new line is smaller, truncate the file
317
+ if (newLine.length < oldLineBytes) {
318
+ const stats = await fd.stat();
319
+ const newSize = offset + newLine.length;
320
+ await fd.truncate(newSize);
321
+ }
322
+
323
+ await fd.sync(); // Ensures flush
324
+ return offset; // Return the same offset since we overwrote in place
325
+ } finally {
326
+ await fd.close();
327
+ }
328
+ }
329
+
330
+ /**
331
+ * Rewrites the file when the new line is larger
332
+ */
333
+ async replaceLineWithRewrite(offset, oldLineBytes, newLine) {
334
+ const tempPath = this.filePath + '.tmp';
335
+ const reader = await fs.open(this.filePath, 'r');
336
+ const writer = await fs.open(tempPath, 'w');
337
+
338
+ try {
339
+ let position = 0;
340
+ const buffer = Buffer.alloc(8192);
341
+
342
+ while (position < offset) {
343
+ const { bytesRead } = await reader.read(buffer, 0, buffer.length, position);
344
+ if (bytesRead === 0) break;
345
+ await writer.write(buffer, 0, bytesRead);
346
+ position += bytesRead;
347
+ }
348
+
349
+ // Write the new line
350
+ await writer.write(newLine);
351
+
352
+ // Skip the old line
353
+ position += oldLineBytes;
354
+
355
+ // Copy the rest of the file
356
+ while (true) {
357
+ const { bytesRead } = await reader.read(buffer, 0, buffer.length, position);
358
+ if (bytesRead === 0) break;
359
+ await writer.write(buffer, 0, bytesRead);
360
+ position += bytesRead;
361
+ }
362
+
363
+ await writer.sync();
364
+ } finally {
365
+ await reader.close();
366
+ await writer.close();
367
+ }
368
+
369
+ // Replace the original file with better error handling
370
+ try {
371
+ // On Windows, we need to handle file permission issues
372
+ await fs.unlink(this.filePath);
373
+ } catch (error) {
374
+ // If unlink fails, try to overwrite the file instead
375
+ if (error.code === 'EPERM' || error.code === 'EACCES') {
376
+ // Copy temp file content to original file
377
+ const tempReader = await fs.open(tempPath, 'r');
378
+ const originalWriter = await fs.open(this.filePath, 'w');
379
+
380
+ try {
381
+ const buffer = Buffer.alloc(8192);
382
+ while (true) {
383
+ const { bytesRead } = await tempReader.read(buffer, 0, buffer.length);
384
+ if (bytesRead === 0) break;
385
+ await originalWriter.write(buffer, 0, bytesRead);
386
+ }
387
+ await originalWriter.sync();
388
+ } finally {
389
+ await tempReader.close();
390
+ await originalWriter.close();
391
+ }
392
+
393
+ // Remove temp file
394
+ try {
395
+ await fs.unlink(tempPath);
396
+ } catch (unlinkError) {
397
+ // Ignore temp file cleanup errors
398
+ }
399
+
400
+ return offset;
401
+ }
402
+ throw error;
403
+ }
404
+
405
+ try {
406
+ await fs.rename(tempPath, this.filePath);
407
+ } catch (error) {
408
+ // If rename fails, try to copy instead
409
+ if (error.code === 'EPERM' || error.code === 'EACCES') {
410
+ const tempReader = await fs.open(tempPath, 'r');
411
+ const originalWriter = await fs.open(this.filePath, 'w');
412
+
413
+ try {
414
+ const buffer = Buffer.alloc(8192);
415
+ while (true) {
416
+ const { bytesRead } = await tempReader.read(buffer, 0, buffer.length);
417
+ if (bytesRead === 0) break;
418
+ await originalWriter.write(buffer, 0, bytesRead);
419
+ }
420
+ await originalWriter.sync();
421
+ } finally {
422
+ await tempReader.close();
423
+ await originalWriter.close();
424
+ }
425
+
426
+ // Remove temp file
427
+ try {
428
+ await fs.unlink(tempPath);
429
+ } catch (unlinkError) {
430
+ // Ignore temp file cleanup errors
431
+ }
432
+ } else {
433
+ throw error;
434
+ }
435
+ }
436
+
437
+ return offset; // Return the same offset since the line position didn't change
438
+ }
439
+
440
+ /**
441
+ * Removes a line (marks as deleted or removes physically)
442
+ */
443
+ async removeLine(offset, markAsDeleted = true) {
444
+ if (markAsDeleted) {
445
+ // Mark as deleted
446
+ const deletedData = { _deleted: true, _deletedAt: new Date().toISOString() };
447
+ return await this.replaceLine(offset, deletedData);
448
+ } else {
449
+ // Remove physically
450
+ return await this.removeLinePhysically(offset);
451
+ }
452
+ }
453
+
454
+ /**
455
+ * Physically removes a line from the file
456
+ */
457
+ async removeLinePhysically(offset) {
458
+ const oldLine = await this.readLine(offset);
459
+ if (oldLine === null) {
460
+ return 0;
461
+ }
462
+
463
+ const oldLineBytes = this.getByteLength(oldLine + '\n');
464
+
465
+ const tempPath = this.filePath + '.tmp';
466
+ const reader = await fs.open(this.filePath, 'r');
467
+ const writer = await fs.open(tempPath, 'w');
468
+
469
+ try {
470
+ let position = 0;
471
+ const buffer = Buffer.alloc(8192);
472
+
473
+ // Copy until the line to be removed
474
+ while (position < offset) {
475
+ const { bytesRead } = await reader.read(buffer, 0, buffer.length, position);
476
+ if (bytesRead === 0) break;
477
+ await writer.write(buffer, 0, bytesRead);
478
+ position += bytesRead;
479
+ }
480
+
481
+ // Skip the line to be removed
482
+ position += oldLineBytes;
483
+
484
+ // Copy the rest of the file
485
+ while (true) {
486
+ const { bytesRead } = await reader.read(buffer, 0, buffer.length, position);
487
+ if (bytesRead === 0) break;
488
+ await writer.write(buffer, 0, bytesRead);
489
+ position += bytesRead;
490
+ }
491
+
492
+ await writer.sync();
493
+ } finally {
494
+ await reader.close();
495
+ await writer.close();
496
+ }
497
+
498
+ // Replace the original file with better error handling
499
+ try {
500
+ // On Windows, we need to handle file permission issues
501
+ await fs.unlink(this.filePath);
502
+ } catch (error) {
503
+ // If unlink fails, try to overwrite the file instead
504
+ if (error.code === 'EPERM' || error.code === 'EACCES') {
505
+ // Copy temp file content to original file
506
+ const tempReader = await fs.open(tempPath, 'r');
507
+ const originalWriter = await fs.open(this.filePath, 'w');
508
+
509
+ try {
510
+ const buffer = Buffer.alloc(8192);
511
+ while (true) {
512
+ const { bytesRead } = await tempReader.read(buffer, 0, buffer.length);
513
+ if (bytesRead === 0) break;
514
+ await originalWriter.write(buffer, 0, bytesRead);
515
+ }
516
+ await originalWriter.sync();
517
+ } finally {
518
+ await tempReader.close();
519
+ await originalWriter.close();
520
+ }
521
+
522
+ // Remove temp file
523
+ try {
524
+ await fs.unlink(tempPath);
525
+ } catch (unlinkError) {
526
+ // Ignore temp file cleanup errors
527
+ }
528
+
529
+ return oldLineBytes;
530
+ }
531
+ throw error;
532
+ }
533
+
534
+ try {
535
+ await fs.rename(tempPath, this.filePath);
536
+ } catch (error) {
537
+ // If rename fails, try to copy instead
538
+ if (error.code === 'EPERM' || error.code === 'EACCES') {
539
+ const tempReader = await fs.open(tempPath, 'r');
540
+ const originalWriter = await fs.open(this.filePath, 'w');
541
+
542
+ try {
543
+ const buffer = Buffer.alloc(8192);
544
+ while (true) {
545
+ const { bytesRead } = await tempReader.read(buffer, 0, buffer.length);
546
+ if (bytesRead === 0) break;
547
+ await originalWriter.write(buffer, 0, bytesRead);
548
+ }
549
+ await originalWriter.sync();
550
+ } finally {
551
+ await tempReader.close();
552
+ await originalWriter.close();
553
+ }
554
+
555
+ // Remove temp file
556
+ try {
557
+ await fs.unlink(tempPath);
558
+ } catch (unlinkError) {
559
+ // Ignore temp file cleanup errors
560
+ }
561
+ } else {
562
+ throw error;
563
+ }
564
+ }
565
+
566
+ return oldLineBytes;
567
+ }
568
+
569
+ /**
570
+ * Reads the index file
571
+ */
572
+ async readIndex() {
573
+ try {
574
+ const data = await fs.readFile(this.indexPath, 'utf8');
575
+ return JSON.parse(data);
576
+ } catch (error) {
577
+ if (error.code === 'ENOENT') {
578
+ return { indexes: {}, offsets: [] };
579
+ }
580
+ throw error;
581
+ }
582
+ }
583
+
584
+ /**
585
+ * Writes the index file
586
+ */
587
+ async writeIndex(indexData) {
588
+ await this.ensureDirectory();
589
+ const data = JSON.stringify(indexData, null, 2);
590
+ await fs.writeFile(this.indexPath, data, 'utf8');
591
+ }
592
+
593
+ /**
594
+ * Reads the metadata file
595
+ */
596
+ async readMeta() {
597
+ try {
598
+ const data = await fs.readFile(this.metaPath, 'utf8');
599
+ return JSON.parse(data);
600
+ } catch (error) {
601
+ if (error.code === 'ENOENT') {
602
+ return {
603
+ version: '2.0.0', // Keep version number for internal tracking
604
+ created: new Date().toISOString(),
605
+ lastModified: new Date().toISOString(),
606
+ recordCount: 0,
607
+ fileSize: 0
608
+ };
609
+ }
610
+ throw error;
611
+ }
612
+ }
613
+
614
+ /**
615
+ * Writes the metadata file
616
+ */
617
+ async writeMeta(metaData) {
618
+ await this.ensureDirectory();
619
+ metaData.lastModified = new Date().toISOString();
620
+ const data = JSON.stringify(metaData, null, 2);
621
+ await fs.writeFile(this.metaPath, data, 'utf8');
622
+ }
623
+
624
+ /**
625
+ * Gets file statistics
626
+ */
627
+ async getStats() {
628
+ try {
629
+ const stats = await fs.stat(this.filePath);
630
+ return {
631
+ size: stats.size,
632
+ created: stats.birthtime,
633
+ modified: stats.mtime
634
+ };
635
+ } catch (error) {
636
+ if (error.code === 'ENOENT') {
637
+ return { size: 0, created: null, modified: null };
638
+ }
639
+ if (error.code === 'EPERM' || error.code === 'EACCES') {
640
+ // On Windows, file might be locked, return default values
641
+ return { size: 0, created: null, modified: null };
642
+ }
643
+ throw error;
644
+ }
645
+ }
646
+
647
+ /**
648
+ * Checks if the file exists
649
+ */
650
+ async exists() {
651
+ try {
652
+ await fs.access(this.filePath);
653
+ return true;
654
+ } catch {
655
+ return false;
656
+ }
657
+ }
658
+
659
+ /**
660
+ * Removes all related files
661
+ */
662
+ async destroy() {
663
+ const files = [this.filePath, this.indexPath, this.metaPath];
664
+ for (const file of files) {
665
+ try {
666
+ await fs.unlink(file);
667
+ } catch (error) {
668
+ // Ignore if file doesn't exist
669
+ }
670
+ }
671
+ }
672
+ }
673
+
674
+ export default FileHandler;