jexidb 1.1.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,688 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.default = void 0;
7
+ var _fs = require("fs");
8
+ var _path = _interopRequireDefault(require("path"));
9
+ function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
10
+ /**
11
+ * FileHandler - Secure file operations for JSONL
12
+ * Handles textLength vs byteLength (UTF-8)
13
+ * Implements safe truncation and guaranteed flush
14
+ */
15
+ class FileHandler {
16
+ constructor(filePath) {
17
+ this.filePath = _path.default.resolve(filePath);
18
+ this.indexPath = this.filePath.replace('.jsonl', '.index.json');
19
+ this.metaPath = this.filePath.replace('.jsonl', '.meta.json');
20
+ this.writeBuffer = [];
21
+ this.isWriting = false;
22
+ }
23
+
24
+ /**
25
+ * Ensures the directory exists
26
+ */
27
+ async ensureDirectory() {
28
+ const dir = _path.default.dirname(this.filePath);
29
+ try {
30
+ await _fs.promises.access(dir);
31
+ } catch {
32
+ await _fs.promises.mkdir(dir, {
33
+ recursive: true
34
+ });
35
+ }
36
+ }
37
+
38
+ /**
39
+ * Calculates the byte length of a UTF-8 string
40
+ */
41
+ getByteLength(str) {
42
+ return Buffer.byteLength(str, 'utf8');
43
+ }
44
+
45
+ /**
46
+ * Serializes an object to JSON with newline
47
+ */
48
+ serialize(obj) {
49
+ return JSON.stringify(obj) + '\n';
50
+ }
51
+
52
+ /**
53
+ * Deserializes a JSON line
54
+ */
55
+ deserialize(line) {
56
+ try {
57
+ const trimmed = line.trim();
58
+ if (!trimmed) {
59
+ throw new Error('Empty line');
60
+ }
61
+ const parsed = JSON.parse(trimmed);
62
+ return parsed;
63
+ } catch (error) {
64
+ // Add more context to the error
65
+ const context = line.length > 100 ? line.substring(0, 100) + '...' : line;
66
+ throw new Error(`Failed to deserialize JSON data: ${context} - ${error.message}`);
67
+ }
68
+ }
69
+
70
+ /**
71
+ * Reads a specific line from the file by offset
72
+ */
73
+ async readLine(offset) {
74
+ try {
75
+ const fd = await _fs.promises.open(this.filePath, 'r');
76
+ try {
77
+ // Read until newline is found
78
+ const buffer = Buffer.alloc(1024);
79
+ let line = '';
80
+ let position = offset;
81
+ while (true) {
82
+ const {
83
+ bytesRead
84
+ } = await fd.read(buffer, 0, buffer.length, position);
85
+ if (bytesRead === 0) break;
86
+ const chunk = buffer.toString('utf8', 0, bytesRead);
87
+ const newlineIndex = chunk.indexOf('\n');
88
+ if (newlineIndex !== -1) {
89
+ line += chunk.substring(0, newlineIndex);
90
+ break;
91
+ } else {
92
+ line += chunk;
93
+ position += bytesRead;
94
+ }
95
+ }
96
+ return line;
97
+ } finally {
98
+ await fd.close();
99
+ }
100
+ } catch (error) {
101
+ if (error.code === 'ENOENT') {
102
+ return null; // File doesn't exist
103
+ }
104
+ throw error;
105
+ }
106
+ }
107
+
108
+ /**
109
+ * Reads multiple lines by offsets with batch optimization
110
+ */
111
+ async readLines(offsets) {
112
+ const results = [];
113
+ for (const offset of offsets) {
114
+ const line = await this.readLine(offset);
115
+ if (line !== null) {
116
+ results.push({
117
+ offset,
118
+ line
119
+ });
120
+ }
121
+ }
122
+ return results;
123
+ }
124
+
125
+ /**
126
+ * Reads multiple lines with intelligent batching for optimal performance
127
+ * Groups consecutive offsets for sequential reads and processes non-consecutive in parallel
128
+ */
129
+ async readLinesBatch(offsets) {
130
+ if (!offsets || offsets.length === 0) {
131
+ return [];
132
+ }
133
+
134
+ // For now, use parallel processing for all reads
135
+ // This is simpler and still provides significant performance improvement
136
+ const promises = offsets.map(offset => this.readLine(offset));
137
+ const results = await Promise.all(promises);
138
+ return results.filter(line => line !== null);
139
+ }
140
+
141
+ /**
142
+ * Groups offsets into consecutive and non-consecutive batches
143
+ */
144
+ groupConsecutiveOffsets(offsets) {
145
+ if (offsets.length === 0) return [];
146
+ const sortedOffsets = [...offsets].sort((a, b) => a - b);
147
+ const batches = [];
148
+ let currentBatch = {
149
+ consecutive: true,
150
+ start: sortedOffsets[0],
151
+ count: 1,
152
+ offsets: [sortedOffsets[0]]
153
+ };
154
+ for (let i = 1; i < sortedOffsets.length; i++) {
155
+ const currentOffset = sortedOffsets[i];
156
+ const expectedOffset = currentBatch.start + currentBatch.count * this.estimateLineSize();
157
+ if (currentOffset === expectedOffset) {
158
+ // Consecutive offset
159
+ currentBatch.count++;
160
+ currentBatch.offsets.push(currentOffset);
161
+ } else {
162
+ // Non-consecutive offset, start new batch
163
+ batches.push(currentBatch);
164
+ currentBatch = {
165
+ consecutive: false,
166
+ start: currentOffset,
167
+ count: 1,
168
+ offsets: [currentOffset]
169
+ };
170
+ }
171
+ }
172
+ batches.push(currentBatch);
173
+ return batches;
174
+ }
175
+
176
+ /**
177
+ * Estimates average line size for consecutive offset calculation
178
+ */
179
+ estimateLineSize() {
180
+ // Default estimate of 200 bytes per line (JSON + newline)
181
+ // This can be improved with actual statistics
182
+ return 200;
183
+ }
184
+
185
+ /**
186
+ * Reads consecutive lines efficiently using a single file read
187
+ */
188
+ async readConsecutiveLines(startOffset, count) {
189
+ try {
190
+ const fd = await _fs.promises.open(this.filePath, 'r');
191
+ try {
192
+ // Read a larger buffer to get multiple lines
193
+ const bufferSize = Math.max(8192, count * this.estimateLineSize());
194
+ const buffer = Buffer.alloc(bufferSize);
195
+ const {
196
+ bytesRead
197
+ } = await fd.read(buffer, 0, buffer.size, startOffset);
198
+ if (bytesRead === 0) return [];
199
+ const content = buffer.toString('utf8', 0, bytesRead);
200
+ const lines = content.split('\n');
201
+
202
+ // Extract the requested number of complete lines
203
+ const results = [];
204
+ for (let i = 0; i < Math.min(count, lines.length - 1); i++) {
205
+ const line = lines[i];
206
+ if (line.trim()) {
207
+ results.push(line);
208
+ }
209
+ }
210
+ return results;
211
+ } finally {
212
+ await fd.close();
213
+ }
214
+ } catch (error) {
215
+ if (error.code === 'ENOENT') {
216
+ return [];
217
+ }
218
+ throw error;
219
+ }
220
+ }
221
+
222
+ /**
223
+ * Appends a line to the file
224
+ */
225
+ async appendLine(data) {
226
+ await this.ensureDirectory();
227
+ const line = this.serialize(data);
228
+
229
+ // Retry logic for file permission issues
230
+ let retries = 3;
231
+ while (retries > 0) {
232
+ try {
233
+ const fd = await _fs.promises.open(this.filePath, 'a');
234
+ try {
235
+ await fd.write(line);
236
+ await fd.sync(); // Ensures flush
237
+ return this.getByteLength(line);
238
+ } finally {
239
+ await fd.close();
240
+ }
241
+ } catch (error) {
242
+ retries--;
243
+ if (error.code === 'EPERM' || error.code === 'EACCES') {
244
+ if (retries > 0) {
245
+ // Wait a bit before retrying
246
+ await new Promise(resolve => setTimeout(resolve, 100));
247
+ continue;
248
+ }
249
+ }
250
+ throw error;
251
+ }
252
+ }
253
+ }
254
+
255
+ /**
256
+ * Appends multiple lines in a single batch operation with optimized buffering
257
+ * Uses chunked writes for better performance with large datasets
258
+ * Accepts both string and Buffer inputs
259
+ */
260
+ async appendBatch(batchData) {
261
+ await this.ensureDirectory();
262
+
263
+ // Convert to Buffer if it's a string, otherwise use as-is
264
+ const buffer = Buffer.isBuffer(batchData) ? batchData : Buffer.from(batchData, 'utf8');
265
+
266
+ // Retry logic for file permission issues
267
+ let retries = 3;
268
+ while (retries > 0) {
269
+ try {
270
+ // Use fs.appendFile for simpler, more reliable batch writing
271
+ await _fs.promises.appendFile(this.filePath, buffer);
272
+ return buffer.length;
273
+ } catch (error) {
274
+ retries--;
275
+ if (error.code === 'EPERM' || error.code === 'EACCES') {
276
+ if (retries > 0) {
277
+ // Wait a bit before retrying
278
+ await new Promise(resolve => setTimeout(resolve, 100));
279
+ continue;
280
+ }
281
+ }
282
+ throw error;
283
+ }
284
+ }
285
+ }
286
+
287
+ /**
288
+ * Replaces a specific line
289
+ */
290
+ async replaceLine(offset, data) {
291
+ const newLine = this.serialize(data);
292
+ const newLineBytes = this.getByteLength(newLine);
293
+
294
+ // Read the current line to calculate size
295
+ const oldLine = await this.readLine(offset);
296
+ if (oldLine === null) {
297
+ throw new Error(`Line at offset ${offset} not found`);
298
+ }
299
+ const oldLineBytes = this.getByteLength(oldLine + '\n');
300
+
301
+ // If the new line is larger, need to rewrite the file
302
+ if (newLineBytes > oldLineBytes) {
303
+ return await this.replaceLineWithRewrite(offset, oldLineBytes, newLine);
304
+ } else {
305
+ // Can overwrite directly
306
+ return await this.replaceLineInPlace(offset, oldLineBytes, newLine);
307
+ }
308
+ }
309
+
310
+ /**
311
+ * Replaces line by overwriting in place (when new line is smaller or equal)
312
+ */
313
+ async replaceLineInPlace(offset, oldLineBytes, newLine) {
314
+ const fd = await _fs.promises.open(this.filePath, 'r+');
315
+ try {
316
+ await fd.write(newLine, 0, newLine.length, offset);
317
+
318
+ // If the new line is smaller, truncate the file
319
+ if (newLine.length < oldLineBytes) {
320
+ const stats = await fd.stat();
321
+ const newSize = offset + newLine.length;
322
+ await fd.truncate(newSize);
323
+ }
324
+ await fd.sync(); // Ensures flush
325
+ return offset; // Return the same offset since we overwrote in place
326
+ } finally {
327
+ await fd.close();
328
+ }
329
+ }
330
+
331
+ /**
332
+ * Rewrites the file when the new line is larger
333
+ */
334
+ async replaceLineWithRewrite(offset, oldLineBytes, newLine) {
335
+ const tempPath = this.filePath + '.tmp';
336
+ const reader = await _fs.promises.open(this.filePath, 'r');
337
+ const writer = await _fs.promises.open(tempPath, 'w');
338
+ try {
339
+ let position = 0;
340
+ const buffer = Buffer.alloc(8192);
341
+ while (position < offset) {
342
+ const {
343
+ bytesRead
344
+ } = await reader.read(buffer, 0, buffer.length, position);
345
+ if (bytesRead === 0) break;
346
+ await writer.write(buffer, 0, bytesRead);
347
+ position += bytesRead;
348
+ }
349
+
350
+ // Write the new line
351
+ await writer.write(newLine);
352
+
353
+ // Skip the old line
354
+ position += oldLineBytes;
355
+
356
+ // Copy the rest of the file
357
+ while (true) {
358
+ const {
359
+ bytesRead
360
+ } = await reader.read(buffer, 0, buffer.length, position);
361
+ if (bytesRead === 0) break;
362
+ await writer.write(buffer, 0, bytesRead);
363
+ position += bytesRead;
364
+ }
365
+ await writer.sync();
366
+ } finally {
367
+ await reader.close();
368
+ await writer.close();
369
+ }
370
+
371
+ // Replace the original file with better error handling
372
+ try {
373
+ // On Windows, we need to handle file permission issues
374
+ await _fs.promises.unlink(this.filePath);
375
+ } catch (error) {
376
+ // If unlink fails, try to overwrite the file instead
377
+ if (error.code === 'EPERM' || error.code === 'EACCES') {
378
+ // Copy temp file content to original file
379
+ const tempReader = await _fs.promises.open(tempPath, 'r');
380
+ const originalWriter = await _fs.promises.open(this.filePath, 'w');
381
+ try {
382
+ const buffer = Buffer.alloc(8192);
383
+ while (true) {
384
+ const {
385
+ bytesRead
386
+ } = await tempReader.read(buffer, 0, buffer.length);
387
+ if (bytesRead === 0) break;
388
+ await originalWriter.write(buffer, 0, bytesRead);
389
+ }
390
+ await originalWriter.sync();
391
+ } finally {
392
+ await tempReader.close();
393
+ await originalWriter.close();
394
+ }
395
+
396
+ // Remove temp file
397
+ try {
398
+ await _fs.promises.unlink(tempPath);
399
+ } catch (unlinkError) {
400
+ // Ignore temp file cleanup errors
401
+ }
402
+ return offset;
403
+ }
404
+ throw error;
405
+ }
406
+ try {
407
+ await _fs.promises.rename(tempPath, this.filePath);
408
+ } catch (error) {
409
+ // If rename fails, try to copy instead
410
+ if (error.code === 'EPERM' || error.code === 'EACCES') {
411
+ const tempReader = await _fs.promises.open(tempPath, 'r');
412
+ const originalWriter = await _fs.promises.open(this.filePath, 'w');
413
+ try {
414
+ const buffer = Buffer.alloc(8192);
415
+ while (true) {
416
+ const {
417
+ bytesRead
418
+ } = await tempReader.read(buffer, 0, buffer.length);
419
+ if (bytesRead === 0) break;
420
+ await originalWriter.write(buffer, 0, bytesRead);
421
+ }
422
+ await originalWriter.sync();
423
+ } finally {
424
+ await tempReader.close();
425
+ await originalWriter.close();
426
+ }
427
+
428
+ // Remove temp file
429
+ try {
430
+ await _fs.promises.unlink(tempPath);
431
+ } catch (unlinkError) {
432
+ // Ignore temp file cleanup errors
433
+ }
434
+ } else {
435
+ throw error;
436
+ }
437
+ }
438
+ return offset; // Return the same offset since the line position didn't change
439
+ }
440
+
441
+ /**
442
+ * Removes a line (marks as deleted or removes physically)
443
+ */
444
+ async removeLine(offset, markAsDeleted = true) {
445
+ if (markAsDeleted) {
446
+ // Mark as deleted
447
+ const deletedData = {
448
+ _deleted: true,
449
+ _deletedAt: new Date().toISOString()
450
+ };
451
+ return await this.replaceLine(offset, deletedData);
452
+ } else {
453
+ // Remove physically
454
+ return await this.removeLinePhysically(offset);
455
+ }
456
+ }
457
+
458
+ /**
459
+ * Physically removes a line from the file
460
+ */
461
+ async removeLinePhysically(offset) {
462
+ const oldLine = await this.readLine(offset);
463
+ if (oldLine === null) {
464
+ return 0;
465
+ }
466
+ const oldLineBytes = this.getByteLength(oldLine + '\n');
467
+ const tempPath = this.filePath + '.tmp';
468
+ const reader = await _fs.promises.open(this.filePath, 'r');
469
+ const writer = await _fs.promises.open(tempPath, 'w');
470
+ try {
471
+ let position = 0;
472
+ const buffer = Buffer.alloc(8192);
473
+
474
+ // Copy until the line to be removed
475
+ while (position < offset) {
476
+ const {
477
+ bytesRead
478
+ } = await reader.read(buffer, 0, buffer.length, position);
479
+ if (bytesRead === 0) break;
480
+ await writer.write(buffer, 0, bytesRead);
481
+ position += bytesRead;
482
+ }
483
+
484
+ // Skip the line to be removed
485
+ position += oldLineBytes;
486
+
487
+ // Copy the rest of the file
488
+ while (true) {
489
+ const {
490
+ bytesRead
491
+ } = await reader.read(buffer, 0, buffer.length, position);
492
+ if (bytesRead === 0) break;
493
+ await writer.write(buffer, 0, bytesRead);
494
+ position += bytesRead;
495
+ }
496
+ await writer.sync();
497
+ } finally {
498
+ await reader.close();
499
+ await writer.close();
500
+ }
501
+
502
+ // Replace the original file with better error handling
503
+ try {
504
+ // On Windows, we need to handle file permission issues
505
+ await _fs.promises.unlink(this.filePath);
506
+ } catch (error) {
507
+ // If unlink fails, try to overwrite the file instead
508
+ if (error.code === 'EPERM' || error.code === 'EACCES') {
509
+ // Copy temp file content to original file
510
+ const tempReader = await _fs.promises.open(tempPath, 'r');
511
+ const originalWriter = await _fs.promises.open(this.filePath, 'w');
512
+ try {
513
+ const buffer = Buffer.alloc(8192);
514
+ while (true) {
515
+ const {
516
+ bytesRead
517
+ } = await tempReader.read(buffer, 0, buffer.length);
518
+ if (bytesRead === 0) break;
519
+ await originalWriter.write(buffer, 0, bytesRead);
520
+ }
521
+ await originalWriter.sync();
522
+ } finally {
523
+ await tempReader.close();
524
+ await originalWriter.close();
525
+ }
526
+
527
+ // Remove temp file
528
+ try {
529
+ await _fs.promises.unlink(tempPath);
530
+ } catch (unlinkError) {
531
+ // Ignore temp file cleanup errors
532
+ }
533
+ return oldLineBytes;
534
+ }
535
+ throw error;
536
+ }
537
+ try {
538
+ await _fs.promises.rename(tempPath, this.filePath);
539
+ } catch (error) {
540
+ // If rename fails, try to copy instead
541
+ if (error.code === 'EPERM' || error.code === 'EACCES') {
542
+ const tempReader = await _fs.promises.open(tempPath, 'r');
543
+ const originalWriter = await _fs.promises.open(this.filePath, 'w');
544
+ try {
545
+ const buffer = Buffer.alloc(8192);
546
+ while (true) {
547
+ const {
548
+ bytesRead
549
+ } = await tempReader.read(buffer, 0, buffer.length);
550
+ if (bytesRead === 0) break;
551
+ await originalWriter.write(buffer, 0, bytesRead);
552
+ }
553
+ await originalWriter.sync();
554
+ } finally {
555
+ await tempReader.close();
556
+ await originalWriter.close();
557
+ }
558
+
559
+ // Remove temp file
560
+ try {
561
+ await _fs.promises.unlink(tempPath);
562
+ } catch (unlinkError) {
563
+ // Ignore temp file cleanup errors
564
+ }
565
+ } else {
566
+ throw error;
567
+ }
568
+ }
569
+ return oldLineBytes;
570
+ }
571
+
572
+ /**
573
+ * Reads the index file
574
+ */
575
+ async readIndex() {
576
+ try {
577
+ const data = await _fs.promises.readFile(this.indexPath, 'utf8');
578
+ return JSON.parse(data);
579
+ } catch (error) {
580
+ if (error.code === 'ENOENT') {
581
+ return {
582
+ indexes: {},
583
+ offsets: []
584
+ };
585
+ }
586
+ throw error;
587
+ }
588
+ }
589
+
590
+ /**
591
+ * Writes the index file
592
+ */
593
+ async writeIndex(indexData) {
594
+ await this.ensureDirectory();
595
+ const data = JSON.stringify(indexData, null, 2);
596
+ await _fs.promises.writeFile(this.indexPath, data, 'utf8');
597
+ }
598
+
599
+ /**
600
+ * Reads the metadata file
601
+ */
602
+ async readMeta() {
603
+ try {
604
+ const data = await _fs.promises.readFile(this.metaPath, 'utf8');
605
+ return JSON.parse(data);
606
+ } catch (error) {
607
+ if (error.code === 'ENOENT') {
608
+ return {
609
+ version: '2.0.1',
610
+ // Keep version number for internal tracking
611
+ created: new Date().toISOString(),
612
+ lastModified: new Date().toISOString(),
613
+ recordCount: 0,
614
+ fileSize: 0
615
+ };
616
+ }
617
+ throw error;
618
+ }
619
+ }
620
+
621
+ /**
622
+ * Writes the metadata file
623
+ */
624
+ async writeMeta(metaData) {
625
+ await this.ensureDirectory();
626
+ metaData.lastModified = new Date().toISOString();
627
+ const data = JSON.stringify(metaData, null, 2);
628
+ await _fs.promises.writeFile(this.metaPath, data, 'utf8');
629
+ }
630
+
631
+ /**
632
+ * Gets file statistics
633
+ */
634
+ async getStats() {
635
+ try {
636
+ const stats = await _fs.promises.stat(this.filePath);
637
+ return {
638
+ size: stats.size,
639
+ created: stats.birthtime,
640
+ modified: stats.mtime
641
+ };
642
+ } catch (error) {
643
+ if (error.code === 'ENOENT') {
644
+ return {
645
+ size: 0,
646
+ created: null,
647
+ modified: null
648
+ };
649
+ }
650
+ if (error.code === 'EPERM' || error.code === 'EACCES') {
651
+ // On Windows, file might be locked, return default values
652
+ return {
653
+ size: 0,
654
+ created: null,
655
+ modified: null
656
+ };
657
+ }
658
+ throw error;
659
+ }
660
+ }
661
+
662
+ /**
663
+ * Checks if the file exists
664
+ */
665
+ async exists() {
666
+ try {
667
+ await _fs.promises.access(this.filePath);
668
+ return true;
669
+ } catch {
670
+ return false;
671
+ }
672
+ }
673
+
674
+ /**
675
+ * Removes all related files
676
+ */
677
+ async destroy() {
678
+ const files = [this.filePath, this.indexPath, this.metaPath];
679
+ for (const file of files) {
680
+ try {
681
+ await _fs.promises.unlink(file);
682
+ } catch (error) {
683
+ // Ignore if file doesn't exist
684
+ }
685
+ }
686
+ }
687
+ }
688
+ var _default = exports.default = FileHandler;