jexidb 2.0.3 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/.babelrc +13 -0
  2. package/.gitattributes +2 -0
  3. package/CHANGELOG.md +132 -101
  4. package/LICENSE +21 -21
  5. package/README.md +301 -639
  6. package/babel.config.json +5 -0
  7. package/dist/Database.cjs +3896 -0
  8. package/docs/API.md +1051 -390
  9. package/docs/EXAMPLES.md +701 -177
  10. package/docs/README.md +194 -184
  11. package/examples/iterate-usage-example.js +157 -0
  12. package/examples/simple-iterate-example.js +115 -0
  13. package/jest.config.js +24 -0
  14. package/package.json +63 -54
  15. package/scripts/README.md +47 -0
  16. package/scripts/clean-test-files.js +75 -0
  17. package/scripts/prepare.js +31 -0
  18. package/scripts/run-tests.js +80 -0
  19. package/src/Database.mjs +4130 -0
  20. package/src/FileHandler.mjs +1101 -0
  21. package/src/OperationQueue.mjs +279 -0
  22. package/src/SchemaManager.mjs +268 -0
  23. package/src/Serializer.mjs +511 -0
  24. package/src/managers/ConcurrencyManager.mjs +257 -0
  25. package/src/managers/IndexManager.mjs +1403 -0
  26. package/src/managers/QueryManager.mjs +1273 -0
  27. package/src/managers/StatisticsManager.mjs +262 -0
  28. package/src/managers/StreamingProcessor.mjs +429 -0
  29. package/src/managers/TermManager.mjs +278 -0
  30. package/test/$not-operator-with-and.test.js +282 -0
  31. package/test/README.md +8 -0
  32. package/test/close-init-cycle.test.js +256 -0
  33. package/test/critical-bugs-fixes.test.js +1069 -0
  34. package/test/index-persistence.test.js +306 -0
  35. package/test/index-serialization.test.js +314 -0
  36. package/test/indexed-query-mode.test.js +360 -0
  37. package/test/iterate-method.test.js +272 -0
  38. package/test/query-operators.test.js +238 -0
  39. package/test/regex-array-fields.test.js +129 -0
  40. package/test/score-method.test.js +238 -0
  41. package/test/setup.js +17 -0
  42. package/test/term-mapping-minimal.test.js +154 -0
  43. package/test/term-mapping-simple.test.js +257 -0
  44. package/test/term-mapping.test.js +514 -0
  45. package/test/writebuffer-flush-resilience.test.js +204 -0
  46. package/dist/FileHandler.js +0 -688
  47. package/dist/IndexManager.js +0 -353
  48. package/dist/IntegrityChecker.js +0 -364
  49. package/dist/JSONLDatabase.js +0 -1333
  50. package/dist/index.js +0 -617
  51. package/docs/MIGRATION.md +0 -295
  52. package/examples/auto-save-example.js +0 -158
  53. package/examples/cjs-usage.cjs +0 -82
  54. package/examples/close-vs-delete-example.js +0 -71
  55. package/examples/esm-usage.js +0 -113
  56. package/examples/example-columns.idx.jdb +0 -0
  57. package/examples/example-columns.jdb +0 -9
  58. package/examples/example-options.idx.jdb +0 -0
  59. package/examples/example-options.jdb +0 -0
  60. package/examples/example-users.idx.jdb +0 -0
  61. package/examples/example-users.jdb +0 -5
  62. package/examples/simple-test.js +0 -55
  63. package/src/FileHandler.js +0 -674
  64. package/src/IndexManager.js +0 -363
  65. package/src/IntegrityChecker.js +0 -379
  66. package/src/JSONLDatabase.js +0 -1391
  67. package/src/index.js +0 -608
@@ -0,0 +1,4130 @@
1
+ import { EventEmitter } from 'events'
2
+ import IndexManager from './managers/IndexManager.mjs'
3
+ import Serializer from './Serializer.mjs'
4
+ import { Mutex } from 'async-mutex'
5
+ import fs from 'fs'
6
+ import readline from 'readline'
7
+ import { OperationQueue } from './OperationQueue.mjs'
8
+
9
+ /**
10
+ * IterateEntry class for intuitive API with automatic change detection
11
+ * Uses native JavaScript setters for maximum performance
12
+ */
13
+ class IterateEntry {
14
+ constructor(entry, originalRecord) {
15
+ this._entry = entry
16
+ this._originalRecord = originalRecord
17
+ this._modified = false
18
+ this._markedForDeletion = false
19
+ }
20
+
21
+ // Generic getter that returns values from the original entry
22
+ get(property) {
23
+ return this._entry[property]
24
+ }
25
+
26
+ // Generic setter that sets values in the original entry
27
+ set(property, value) {
28
+ this._entry[property] = value
29
+ this._modified = true
30
+ }
31
+
32
+ // Delete method for intuitive deletion
33
+ delete() {
34
+ this._markedForDeletion = true
35
+ return true
36
+ }
37
+
38
+ // Getter for the underlying entry (for compatibility)
39
+ get value() {
40
+ return this._entry
41
+ }
42
+
43
+ // Check if entry was modified
44
+ get isModified() {
45
+ return this._modified
46
+ }
47
+
48
+ // Check if entry is marked for deletion
49
+ get isMarkedForDeletion() {
50
+ return this._markedForDeletion
51
+ }
52
+
53
+ // Proxy all property access to the underlying entry
54
+ get [Symbol.toPrimitive]() {
55
+ return this._entry
56
+ }
57
+
58
+ // Handle property access dynamically
59
+ get [Symbol.toStringTag]() {
60
+ return 'IterateEntry'
61
+ }
62
+ }
63
+
64
+ // Import managers
65
+ import FileHandler from './FileHandler.mjs'
66
+ import { QueryManager } from './managers/QueryManager.mjs'
67
+ import { ConcurrencyManager } from './managers/ConcurrencyManager.mjs'
68
+ import { StatisticsManager } from './managers/StatisticsManager.mjs'
69
+ import StreamingProcessor from './managers/StreamingProcessor.mjs'
70
+ import TermManager from './managers/TermManager.mjs'
71
+
72
+ /**
73
+ * InsertSession - Simple batch insertion without memory duplication
74
+ */
75
+ class InsertSession {
76
+ constructor(database, sessionOptions = {}) {
77
+ this.database = database
78
+ this.batchSize = sessionOptions.batchSize || 100
79
+ this.totalInserted = 0
80
+ this.flushing = false
81
+ this.batches = [] // Array of batches to avoid slice() in flush()
82
+ this.currentBatch = [] // Current batch being filled
83
+ this.sessionId = Math.random().toString(36).substr(2, 9)
84
+
85
+ // Register this session as active
86
+ this.database.activeInsertSessions.add(this)
87
+ }
88
+
89
+ async add(record) {
90
+ // CRITICAL FIX: Remove the committed check to allow auto-reusability
91
+ // The session should be able to handle multiple commits
92
+
93
+ if (this.database.destroyed) {
94
+ throw new Error('Database is destroyed')
95
+ }
96
+
97
+ // Process record
98
+ const finalRecord = { ...record }
99
+ const id = finalRecord.id || this.database.generateId()
100
+ finalRecord.id = id
101
+
102
+ // Add to current batch
103
+ this.currentBatch.push(finalRecord)
104
+ this.totalInserted++
105
+
106
+ // If batch is full, move it to batches array
107
+ if (this.currentBatch.length >= this.batchSize) {
108
+ this.batches.push(this.currentBatch)
109
+ this.currentBatch = []
110
+ }
111
+
112
+ return finalRecord
113
+ }
114
+
115
+ async flush() {
116
+ // Check if there's anything to flush
117
+ if (this.batches.length === 0 && this.currentBatch.length === 0) return
118
+
119
+ // Prevent concurrent flushes
120
+ if (this.flushing) return
121
+ this.flushing = true
122
+
123
+ try {
124
+ // Process all complete batches
125
+ for (const batch of this.batches) {
126
+ await this.database.insertBatch(batch)
127
+ }
128
+
129
+ // Process remaining records in current batch
130
+ if (this.currentBatch.length > 0) {
131
+ await this.database.insertBatch(this.currentBatch)
132
+ }
133
+
134
+ // Clear all batches
135
+ this.batches = []
136
+ this.currentBatch = []
137
+ } finally {
138
+ this.flushing = false
139
+ }
140
+ }
141
+
142
+ async commit() {
143
+ // CRITICAL FIX: Make session auto-reusable by removing committed state
144
+ // Allow multiple commits on the same session
145
+
146
+ await this.flush()
147
+
148
+ // Reset session state for next commit cycle
149
+ const insertedCount = this.totalInserted
150
+ this.totalInserted = 0
151
+ return insertedCount
152
+ }
153
+
154
+ /**
155
+ * Wait for this session's operations to complete
156
+ */
157
+ async waitForOperations(maxWaitTime = null) {
158
+ const startTime = Date.now()
159
+ const hasTimeout = maxWaitTime !== null && maxWaitTime !== undefined
160
+
161
+ while (this.flushing || this.batches.length > 0 || this.currentBatch.length > 0) {
162
+ // Check timeout only if we have one
163
+ if (hasTimeout && (Date.now() - startTime) >= maxWaitTime) {
164
+ return false
165
+ }
166
+
167
+ await new Promise(resolve => setTimeout(resolve, 1))
168
+ }
169
+
170
+ return true
171
+ }
172
+
173
+ /**
174
+ * Check if this session has pending operations
175
+ */
176
+ hasPendingOperations() {
177
+ return this.flushing || this.batches.length > 0 || this.currentBatch.length > 0
178
+ }
179
+
180
+ /**
181
+ * Destroy this session and unregister it
182
+ */
183
+ destroy() {
184
+ // Unregister from database
185
+ this.database.activeInsertSessions.delete(this)
186
+
187
+ // Clear all data
188
+ this.batches = []
189
+ this.currentBatch = []
190
+ this.totalInserted = 0
191
+ this.flushing = false
192
+ }
193
+ }
194
+
195
+ /**
196
+ * JexiDB - A high-performance, in-memory database with persistence
197
+ *
198
+ * Features:
199
+ * - In-memory storage with optional persistence
200
+ * - Advanced indexing and querying
201
+ * - Transaction support
202
+ * - Manual save functionality
203
+ * - Recovery mechanisms
204
+ * - Performance optimizations
205
+ */
206
+ class Database extends EventEmitter {
207
+ constructor(file, opts = {}) {
208
+ super()
209
+
210
+ // Generate unique instance ID for debugging
211
+ this.instanceId = Math.random().toString(36).substr(2, 9)
212
+
213
+ // Initialize state flags
214
+ this.managersInitialized = false
215
+
216
+ // Track active insert sessions
217
+ this.activeInsertSessions = new Set()
218
+
219
+ // Set default options
220
+ this.opts = Object.assign({
221
+ // Core options - auto-save removed, user must call save() manually
222
+ // File creation options
223
+ create: opts.create !== false, // Create file if it doesn't exist (default true)
224
+ clear: opts.clear === true, // Clear existing files before loading (default false)
225
+ // Timeout configurations for preventing hangs
226
+ mutexTimeout: opts.mutexTimeout || 15000, // 15 seconds timeout for mutex operations
227
+ maxFlushAttempts: opts.maxFlushAttempts || 50, // Maximum flush attempts before giving up
228
+ // Term mapping options (always enabled and auto-detected from indexes)
229
+ termMappingCleanup: opts.termMappingCleanup !== false, // Clean up orphaned terms on save (enabled by default)
230
+ // Recovery options
231
+ enableRecovery: opts.enableRecovery === true, // Recovery mechanisms disabled by default for large databases
232
+ // Buffer size options for range merging
233
+ maxBufferSize: opts.maxBufferSize || 4 * 1024 * 1024, // 4MB default maximum buffer size for grouped ranges
234
+ // Memory management options (similar to published v1.1.0)
235
+ maxMemoryUsage: opts.maxMemoryUsage || 64 * 1024, // 64KB limit like published version
236
+ maxWriteBufferSize: opts.maxWriteBufferSize || 1000, // Maximum records in writeBuffer
237
+ // Query strategy options
238
+ streamingThreshold: opts.streamingThreshold || 0.8, // Use streaming when limit > 80% of total records
239
+ // Serialization options
240
+ enableArraySerialization: opts.enableArraySerialization !== false, // Enable array serialization by default
241
+ }, opts)
242
+
243
+ // CRITICAL FIX: Initialize AbortController for lifecycle management
244
+ this.abortController = new AbortController()
245
+ this.pendingOperations = new Set()
246
+ this.pendingPromises = new Set()
247
+ this.destroyed = false
248
+ this.destroying = false
249
+ this.closed = false
250
+ this.operationCounter = 0
251
+
252
+ // CRITICAL FIX: Initialize OperationQueue to prevent race conditions
253
+ this.operationQueue = new OperationQueue(false) // Disable debug mode for queue
254
+
255
+ // Normalize file path to ensure it ends with .jdb
256
+ this.normalizedFile = this.normalizeFilePath(file)
257
+
258
+ // Initialize core properties
259
+ this.offsets = [] // Array of byte offsets for each record
260
+ this.indexOffset = 0 // Current position in file for new records
261
+ this.deletedIds = new Set() // Track deleted record IDs
262
+ this.shouldSave = false
263
+ this.isLoading = false
264
+ this.isSaving = false
265
+ this.lastSaveTime = null
266
+ this.initialized = false
267
+
268
+
269
+ // Initialize managers
270
+ this.initializeManagers()
271
+
272
+ // Initialize file mutex for thread safety
273
+ this.fileMutex = new Mutex()
274
+
275
+ // Initialize performance tracking
276
+ this.performanceStats = {
277
+ operations: 0,
278
+ saves: 0,
279
+ loads: 0,
280
+ queryTime: 0,
281
+ saveTime: 0,
282
+ loadTime: 0
283
+ }
284
+
285
+ // Initialize usage stats for QueryManager
286
+ this.usageStats = {
287
+ totalQueries: 0,
288
+ indexedQueries: 0,
289
+ streamingQueries: 0,
290
+ indexedAverageTime: 0,
291
+ streamingAverageTime: 0
292
+ }
293
+
294
+ // Note: Validation will be done after configuration conversion in initializeManagers()
295
+ }
296
+
297
+ /**
298
+ * Validate field and index configuration
299
+ */
300
+ validateIndexConfiguration() {
301
+ // Validate fields configuration
302
+ if (this.opts.fields && typeof this.opts.fields === 'object') {
303
+ this.validateFieldTypes(this.opts.fields, 'fields')
304
+ }
305
+
306
+ // Validate indexes configuration (legacy support)
307
+ if (this.opts.indexes && typeof this.opts.indexes === 'object') {
308
+ this.validateFieldTypes(this.opts.indexes, 'indexes')
309
+ }
310
+
311
+ // Validate indexes array (new format) - but only if we have fields
312
+ if (this.opts.originalIndexes && Array.isArray(this.opts.originalIndexes)) {
313
+ if (!this.opts.fields) {
314
+ throw new Error('Index fields array requires fields configuration. Use: { fields: {...}, indexes: [...] }')
315
+ }
316
+ this.validateIndexFields(this.opts.originalIndexes)
317
+ }
318
+
319
+ if (this.opts.debugMode) {
320
+ const fieldCount = this.opts.fields ? Object.keys(this.opts.fields).length : 0
321
+ const indexCount = Array.isArray(this.opts.indexes) ? this.opts.indexes.length :
322
+ (this.opts.indexes && typeof this.opts.indexes === 'object' ? Object.keys(this.opts.indexes).length : 0)
323
+ if (fieldCount > 0 || indexCount > 0) {
324
+ console.log(`✅ Configuration validated: ${fieldCount} fields, ${indexCount} indexes [${this.instanceId}]`)
325
+ }
326
+ }
327
+ }
328
+
329
+ /**
330
+ * Validate field types
331
+ */
332
+ validateFieldTypes(fields, configType) {
333
+ const supportedTypes = ['string', 'number', 'boolean', 'array:string', 'array:number', 'array:boolean', 'array', 'object']
334
+ const errors = []
335
+
336
+ for (const [fieldName, fieldType] of Object.entries(fields)) {
337
+ // Check if type is supported
338
+ if (!supportedTypes.includes(fieldType)) {
339
+ errors.push(`Unsupported ${configType} type '${fieldType}' for field '${fieldName}'. Supported types: ${supportedTypes.join(', ')}`)
340
+ }
341
+
342
+ // Warn about legacy array type but don't error
343
+ if (fieldType === 'array') {
344
+ if (this.opts.debugMode) {
345
+ console.log(`⚠️ Legacy array type '${fieldType}' for field '${fieldName}'. Consider using 'array:string' for better performance.`)
346
+ }
347
+ }
348
+
349
+ // Check for common mistakes
350
+ if (fieldType === 'array:') {
351
+ errors.push(`Incomplete array type '${fieldType}' for field '${fieldName}'. Must specify element type after colon: array:string, array:number, or array:boolean`)
352
+ }
353
+ }
354
+
355
+ if (errors.length > 0) {
356
+ throw new Error(`${configType.charAt(0).toUpperCase() + configType.slice(1)} configuration errors:\n${errors.map(e => ` - ${e}`).join('\n')}`)
357
+ }
358
+ }
359
+
360
+ /**
361
+ * Validate index fields array
362
+ */
363
+ validateIndexFields(indexFields) {
364
+ if (!this.opts.fields) {
365
+ throw new Error('Index fields array requires fields configuration. Use: { fields: {...}, indexes: [...] }')
366
+ }
367
+
368
+ const availableFields = Object.keys(this.opts.fields)
369
+ const errors = []
370
+
371
+ for (const fieldName of indexFields) {
372
+ if (!availableFields.includes(fieldName)) {
373
+ errors.push(`Index field '${fieldName}' not found in fields configuration. Available fields: ${availableFields.join(', ')}`)
374
+ }
375
+ }
376
+
377
+ if (errors.length > 0) {
378
+ throw new Error(`Index configuration errors:\n${errors.map(e => ` - ${e}`).join('\n')}`)
379
+ }
380
+ }
381
+
382
+ /**
383
+ * Prepare index configuration for IndexManager
384
+ */
385
+ prepareIndexConfiguration() {
386
+ // Convert new fields/indexes format to legacy format for IndexManager
387
+ if (this.opts.fields && Array.isArray(this.opts.indexes)) {
388
+ // New format: { fields: {...}, indexes: [...] }
389
+ const indexedFields = {}
390
+ const originalIndexes = [...this.opts.indexes] // Keep original for validation
391
+
392
+ for (const fieldName of this.opts.indexes) {
393
+ if (this.opts.fields[fieldName]) {
394
+ indexedFields[fieldName] = this.opts.fields[fieldName]
395
+ }
396
+ }
397
+
398
+ // Store original indexes for validation
399
+ this.opts.originalIndexes = originalIndexes
400
+
401
+ // Replace indexes array with object for IndexManager
402
+ this.opts.indexes = indexedFields
403
+
404
+ if (this.opts.debugMode) {
405
+ console.log(`🔍 Converted fields/indexes format: ${Object.keys(indexedFields).join(', ')} [${this.instanceId}]`)
406
+ }
407
+ }
408
+ // Legacy format (indexes as object) is already compatible
409
+ }
410
+
411
+ /**
412
+ * Initialize all managers
413
+ */
414
+ initializeManagers() {
415
+ // CRITICAL FIX: Prevent double initialization which corrupts term mappings
416
+ if (this.managersInitialized) {
417
+ if (this.opts.debugMode) {
418
+ console.log(`⚠️ initializeManagers() called again - skipping to prevent corruption [${this.instanceId}]`)
419
+ }
420
+ return
421
+ }
422
+
423
+ // CRITICAL FIX: Initialize serializer first - this was missing and causing crashes
424
+ this.serializer = new Serializer(this.opts)
425
+
426
+ // Initialize schema for array-based serialization
427
+ if (this.opts.enableArraySerialization !== false) {
428
+ this.initializeSchema()
429
+ }
430
+
431
+ // Initialize TermManager - always enabled for optimal performance
432
+ this.termManager = new TermManager()
433
+
434
+ // Auto-detect term mapping fields from indexes
435
+ const termMappingFields = this.getTermMappingFields()
436
+ this.termManager.termMappingFields = termMappingFields
437
+ this.opts.termMapping = true // Always enable term mapping for optimal performance
438
+
439
+ if (this.opts.debugMode) {
440
+ if (termMappingFields.length > 0) {
441
+ console.log(`🔍 TermManager initialized for fields: ${termMappingFields.join(', ')} [${this.instanceId}]`)
442
+ } else {
443
+ console.log(`🔍 TermManager initialized (no array:string fields detected) [${this.instanceId}]`)
444
+ }
445
+ }
446
+
447
+ // Prepare index configuration for IndexManager
448
+ this.prepareIndexConfiguration()
449
+
450
+ // Validate configuration after conversion
451
+ this.validateIndexConfiguration()
452
+
453
+ // Initialize IndexManager with database reference for term mapping
454
+ this.indexManager = new IndexManager(this.opts, null, this)
455
+ if (this.opts.debugMode) {
456
+ console.log(`🔍 IndexManager initialized with fields: ${this.indexManager.indexedFields.join(', ')} [${this.instanceId}]`)
457
+ }
458
+
459
+ // Mark managers as initialized
460
+ this.managersInitialized = true
461
+ this.indexOffset = 0
462
+ this.writeBuffer = []
463
+ this.writeBufferOffsets = [] // Track offsets for writeBuffer records
464
+ this.writeBufferSizes = [] // Track sizes for writeBuffer records
465
+ this.isInsideOperationQueue = false // Flag to prevent deadlock in save() calls
466
+
467
+ // Initialize other managers
468
+ this.fileHandler = new FileHandler(this.normalizedFile, this.fileMutex, this.opts)
469
+ this.queryManager = new QueryManager(this)
470
+ this.concurrencyManager = new ConcurrencyManager(this.opts)
471
+ this.statisticsManager = new StatisticsManager(this, this.opts)
472
+ this.streamingProcessor = new StreamingProcessor(this.opts)
473
+ }
474
+
475
+ /**
476
+ * Get term mapping fields from indexes (auto-detected)
477
+ * @returns {string[]} Array of field names that use term mapping
478
+ */
479
+ getTermMappingFields() {
480
+ if (!this.opts.indexes) return []
481
+
482
+ // Auto-detect fields that benefit from term mapping
483
+ const termMappingFields = []
484
+
485
+ for (const [field, type] of Object.entries(this.opts.indexes)) {
486
+ // Fields that should use term mapping
487
+ if (type === 'array:string' || type === 'string') {
488
+ termMappingFields.push(field)
489
+ }
490
+ }
491
+
492
+ return termMappingFields
493
+ }
494
+
495
+ /**
496
+ * CRITICAL FIX: Validate database state before critical operations
497
+ * Prevents crashes from undefined methods and invalid states
498
+ */
499
+ validateState() {
500
+ if (this.destroyed) {
501
+ throw new Error('Database is destroyed')
502
+ }
503
+
504
+ if (this.closed) {
505
+ throw new Error('Database is closed. Call init() to reopen it.')
506
+ }
507
+
508
+ // Allow operations during destroying phase for proper cleanup
509
+
510
+ if (!this.serializer) {
511
+ throw new Error('Database serializer not initialized - this indicates a critical bug')
512
+ }
513
+
514
+ if (!this.normalizedFile) {
515
+ throw new Error('Database file path not set - this indicates file path management failure')
516
+ }
517
+
518
+ if (!this.fileHandler) {
519
+ throw new Error('Database file handler not initialized')
520
+ }
521
+
522
+ if (!this.indexManager) {
523
+ throw new Error('Database index manager not initialized')
524
+ }
525
+
526
+ return true
527
+ }
528
+
529
+ /**
530
+ * CRITICAL FIX: Ensure file path is valid and accessible
531
+ * Prevents file path loss issues mentioned in crash report
532
+ */
533
+ ensureFilePath() {
534
+ if (!this.normalizedFile) {
535
+ throw new Error('Database file path is missing after initialization - this indicates a critical file path management failure')
536
+ }
537
+ return this.normalizedFile
538
+ }
539
+
540
+ /**
541
+ * Normalize file path to ensure it ends with .jdb
542
+ */
543
+ normalizeFilePath(file) {
544
+ if (!file) return null
545
+ return file.endsWith('.jdb') ? file : `${file}.jdb`
546
+ }
547
+
548
+ /**
549
+ * Initialize the database
550
+ */
551
+ async initialize() {
552
+ // Check if database is destroyed first (before checking initialized)
553
+ if (this.destroyed) {
554
+ throw new Error('Cannot initialize destroyed database. Use a new instance instead.')
555
+ }
556
+
557
+ if (this.initialized) return
558
+
559
+ // Prevent concurrent initialization - wait for ongoing init to complete
560
+ if (this.isLoading) {
561
+ if (this.opts.debugMode) {
562
+ console.log('🔄 init() already in progress - waiting for completion')
563
+ }
564
+ // Wait for ongoing initialization to complete
565
+ while (this.isLoading) {
566
+ await new Promise(resolve => setTimeout(resolve, 10))
567
+ }
568
+ // Check if initialization completed successfully
569
+ if (this.initialized) {
570
+ if (this.opts.debugMode) {
571
+ console.log('✅ Concurrent init() completed - database is now initialized')
572
+ }
573
+ return
574
+ }
575
+ // If we get here, initialization failed - we can try again
576
+ }
577
+
578
+ try {
579
+ this.isLoading = true
580
+
581
+ // Reset closed state when reinitializing
582
+ this.closed = false
583
+
584
+ // Initialize managers (protected against double initialization)
585
+ this.initializeManagers()
586
+
587
+ // Handle clear option - delete existing files before loading
588
+ if (this.opts.clear && this.normalizedFile) {
589
+ await this.clearExistingFiles()
590
+ }
591
+
592
+ // Check file existence and handle create option
593
+ if (this.normalizedFile) {
594
+ const fileExists = await this.fileHandler.exists()
595
+
596
+ if (!fileExists) {
597
+ if (!this.opts.create) {
598
+ throw new Error(`Database file '${this.normalizedFile}' does not exist and create option is disabled`)
599
+ }
600
+ // File will be created when first data is written
601
+ } else {
602
+ // Load existing data if file exists
603
+ await this.load()
604
+ }
605
+ }
606
+
607
+ // Manual save is now the default behavior
608
+
609
+ this.initialized = true
610
+ this.emit('initialized')
611
+
612
+ if (this.opts.debugMode) {
613
+ console.log(`✅ Database initialized with ${this.writeBuffer.length} records`)
614
+ }
615
+ } catch (error) {
616
+ console.error('Failed to initialize database:', error)
617
+ throw error
618
+ } finally {
619
+ this.isLoading = false
620
+ }
621
+ }
622
+
623
+ /**
624
+ * Validate that the database is initialized before performing operations
625
+ * @param {string} operation - The operation being attempted
626
+ * @throws {Error} If database is not initialized
627
+ */
628
+ _validateInitialization(operation) {
629
+ if (this.destroyed) {
630
+ throw new Error(`❌ Cannot perform '${operation}' on a destroyed database. Create a new instance instead.`)
631
+ }
632
+
633
+ if (this.closed) {
634
+ throw new Error(`❌ Database is closed. Call 'await db.init()' to reopen it before performing '${operation}' operations.`)
635
+ }
636
+
637
+ if (!this.initialized) {
638
+ const errorMessage = `❌ Database not initialized. Call 'await db.init()' before performing '${operation}' operations.\n\n` +
639
+ `Example:\n` +
640
+ ` const db = new Database('./myfile.jdb')\n` +
641
+ ` await db.init() // ← Required before any operations\n` +
642
+ ` await db.insert({ name: 'test' }) // ← Now you can use database operations\n\n` +
643
+ `File: ${this.normalizedFile || 'unknown'}`
644
+
645
+ throw new Error(errorMessage)
646
+ }
647
+ }
648
+
649
+ /**
650
+ * Clear existing database files (.jdb and .idx.jdb)
651
+ */
652
+ async clearExistingFiles() {
653
+ if (!this.normalizedFile) return
654
+
655
+ try {
656
+ // Clear main database file
657
+ if (await this.fileHandler.exists()) {
658
+ await this.fileHandler.delete()
659
+ if (this.opts.debugMode) {
660
+ console.log(`🗑️ Cleared database file: ${this.normalizedFile}`)
661
+ }
662
+ }
663
+
664
+ // Clear index file
665
+ const idxPath = this.normalizedFile.replace('.jdb', '.idx.jdb')
666
+ const idxFileHandler = new FileHandler(idxPath, this.fileMutex, this.opts)
667
+ if (await idxFileHandler.exists()) {
668
+ await idxFileHandler.delete()
669
+ if (this.opts.debugMode) {
670
+ console.log(`🗑️ Cleared index file: ${idxPath}`)
671
+ }
672
+ }
673
+
674
+ // Reset internal state
675
+ this.offsets = []
676
+ this.indexOffset = 0
677
+ this.deletedIds.clear()
678
+ this.shouldSave = false
679
+
680
+ // Create empty files to ensure they exist
681
+ await this.fileHandler.writeAll('')
682
+ await idxFileHandler.writeAll('')
683
+
684
+ if (this.opts.debugMode) {
685
+ console.log('🗑️ Database cleared successfully')
686
+ }
687
+ } catch (error) {
688
+ console.error('Failed to clear existing files:', error)
689
+ throw error
690
+ }
691
+ }
692
+
693
+ /**
694
+ * Load data from file
695
+ */
696
+ async load() {
697
+ if (!this.normalizedFile) return
698
+
699
+
700
+ try {
701
+ const startTime = Date.now()
702
+ this.isLoading = true
703
+
704
+ // Don't load the entire file - just initialize empty state
705
+ // The actual record count will come from loaded offsets
706
+ this.writeBuffer = [] // writeBuffer is only for new unsaved records
707
+
708
+ // recordCount will be determined from loaded offsets
709
+ // If no offsets were loaded, we'll count records only if needed
710
+
711
+ // Load index data if available (always try to load offsets, even without indexed fields)
712
+ if (this.indexManager) {
713
+ const idxPath = this.normalizedFile.replace('.jdb', '.idx.jdb')
714
+ try {
715
+ const idxFileHandler = new FileHandler(idxPath, this.fileMutex, this.opts)
716
+ const idxData = await idxFileHandler.readAll()
717
+ if (idxData && idxData.trim()) {
718
+ const parsedIdxData = JSON.parse(idxData)
719
+
720
+ // Always load offsets if available (even without indexed fields)
721
+ if (parsedIdxData.offsets && Array.isArray(parsedIdxData.offsets)) {
722
+ this.offsets = parsedIdxData.offsets
723
+ if (this.opts.debugMode) {
724
+ console.log(`📂 Loaded ${this.offsets.length} offsets from ${idxPath}`)
725
+ }
726
+ }
727
+
728
+ // Load indexOffset for proper range calculations
729
+ if (parsedIdxData.indexOffset !== undefined) {
730
+ this.indexOffset = parsedIdxData.indexOffset
731
+ if (this.opts.debugMode) {
732
+ console.log(`📂 Loaded indexOffset: ${this.indexOffset} from ${idxPath}`)
733
+ }
734
+ }
735
+
736
+ // Load index data only if available and we have indexed fields
737
+ if (parsedIdxData && parsedIdxData.index && this.indexManager.indexedFields && this.indexManager.indexedFields.length > 0) {
738
+ this.indexManager.load(parsedIdxData.index)
739
+
740
+ // Load term mapping data from .idx file if it exists
741
+ if (parsedIdxData.termMapping && this.termManager) {
742
+ await this.termManager.loadTerms(parsedIdxData.termMapping)
743
+ if (this.opts.debugMode) {
744
+ console.log(`📂 Loaded term mapping from ${idxPath}`)
745
+ }
746
+ }
747
+
748
+ if (this.opts.debugMode) {
749
+ console.log(`📂 Loaded index data from ${idxPath}`)
750
+ }
751
+ }
752
+
753
+ // Load configuration from .idx file if database exists
754
+ if (parsedIdxData.config) {
755
+ const config = parsedIdxData.config
756
+
757
+ // Override constructor options with saved configuration
758
+ if (config.fields) {
759
+ this.opts.fields = config.fields
760
+ if (this.opts.debugMode) {
761
+ console.log(`📂 Loaded fields config from ${idxPath}:`, Object.keys(config.fields))
762
+ }
763
+ }
764
+
765
+ if (config.indexes) {
766
+ this.opts.indexes = config.indexes
767
+ if (this.opts.debugMode) {
768
+ console.log(`📂 Loaded indexes config from ${idxPath}:`, Object.keys(config.indexes))
769
+ }
770
+ }
771
+
772
+ if (config.originalIndexes) {
773
+ this.opts.originalIndexes = config.originalIndexes
774
+ if (this.opts.debugMode) {
775
+ console.log(`📂 Loaded originalIndexes config from ${idxPath}:`, config.originalIndexes.length, 'indexes')
776
+ }
777
+ }
778
+
779
+ // Reinitialize schema from saved configuration
780
+ if (config.schema && this.serializer) {
781
+ this.serializer.initializeSchema(config.schema)
782
+ if (this.opts.debugMode) {
783
+ console.log(`📂 Loaded schema from ${idxPath}:`, config.schema.join(', '))
784
+ }
785
+ }
786
+ }
787
+ }
788
+ } catch (idxError) {
789
+ // Index file doesn't exist or is corrupted, rebuild from data
790
+ if (this.opts.debugMode) {
791
+ console.log('📂 No index file found, rebuilding indexes from data')
792
+ }
793
+ // We can't rebuild index without violating no-memory-storage rule
794
+ // Index will be rebuilt as needed during queries
795
+ }
796
+ } else {
797
+ // No indexed fields, no need to rebuild indexes
798
+ }
799
+
800
+ this.performanceStats.loads++
801
+ this.performanceStats.loadTime += Date.now() - startTime
802
+ this.emit('loaded', this.writeBuffer.length)
803
+ } catch (error) {
804
+ console.error('Failed to load database:', error)
805
+ throw error
806
+ } finally {
807
+ this.isLoading = false
808
+ }
809
+ }
810
+
811
+
812
+ /**
813
+ * Save data to file
814
+ * @param {boolean} inQueue - Whether to execute within the operation queue (default: false)
815
+ */
816
+ async save(inQueue = false) {
817
+ this._validateInitialization('save')
818
+
819
+ if (this.opts.debugMode) {
820
+ console.log(`💾 save() called: writeBuffer.length=${this.writeBuffer.length}, offsets.length=${this.offsets.length}`)
821
+ }
822
+
823
+ // Auto-save removed - no need to pause anything
824
+
825
+ try {
826
+ // CRITICAL FIX: Wait for any ongoing save operations to complete
827
+ if (this.isSaving) {
828
+ if (this.opts.debugMode) {
829
+ console.log('💾 save(): waiting for previous save to complete')
830
+ }
831
+ // Wait for previous save to complete
832
+ while (this.isSaving) {
833
+ await new Promise(resolve => setTimeout(resolve, 10))
834
+ }
835
+
836
+ // Check if data changed since the previous save completed
837
+ const hasDataToSave = this.writeBuffer.length > 0 || this.deletedIds.size > 0
838
+ const needsStructureCreation = this.indexManager && this.indexManager.indexedFields && this.indexManager.indexedFields.length > 0
839
+
840
+ if (!hasDataToSave && !needsStructureCreation) {
841
+ if (this.opts.debugMode) {
842
+ console.log('💾 Save: No new data to save since previous save completed')
843
+ }
844
+ return // Nothing new to save
845
+ }
846
+ }
847
+
848
+ // CRITICAL FIX: Check if there's actually data to save before proceeding
849
+ // But allow save if we need to create database structure (index files, etc.)
850
+ const hasDataToSave = this.writeBuffer.length > 0 || this.deletedIds.size > 0
851
+ const needsStructureCreation = this.indexManager && this.indexManager.indexedFields && this.indexManager.indexedFields.length > 0
852
+
853
+ if (!hasDataToSave && !needsStructureCreation) {
854
+ if (this.opts.debugMode) {
855
+ console.log('💾 Save: No data to save (writeBuffer empty and no deleted records)')
856
+ }
857
+ return // Nothing to save
858
+ }
859
+
860
+ if (inQueue) {
861
+ if (this.opts.debugMode) {
862
+ console.log(`💾 save(): executing in queue`)
863
+ }
864
+ return this.operationQueue.enqueue(async () => {
865
+ return this._doSave()
866
+ })
867
+ } else {
868
+ if (this.opts.debugMode) {
869
+ console.log(`💾 save(): calling _doSave() directly`)
870
+ }
871
+ return this._doSave()
872
+ }
873
+ } finally {
874
+ // Auto-save removed - no need to resume anything
875
+ }
876
+ }
877
+
878
+ /**
879
+ * Internal save implementation (without queue)
880
+ */
881
+ async _doSave() {
882
+ // CRITICAL FIX: Check if database is destroyed
883
+ if (this.destroyed) return
884
+
885
+ // CRITICAL FIX: Use atomic check-and-set to prevent concurrent save operations
886
+ if (this.isSaving) {
887
+ if (this.opts.debugMode) {
888
+ console.log('💾 _doSave: Save operation already in progress, skipping')
889
+ }
890
+ return
891
+ }
892
+
893
+ // CRITICAL FIX: Check if there's actually data to save or structure to create
894
+ const hasDataToSave = this.writeBuffer.length > 0 || this.deletedIds.size > 0
895
+ const needsStructureCreation = this.indexManager && this.indexManager.indexedFields && this.indexManager.indexedFields.length > 0
896
+
897
+ if (!hasDataToSave && !needsStructureCreation) {
898
+ if (this.opts.debugMode) {
899
+ console.log('💾 _doSave: No data to save (writeBuffer empty and no deleted records)')
900
+ }
901
+ return // Nothing to save
902
+ }
903
+
904
+ // CRITICAL FIX: Set saving flag immediately to prevent race conditions
905
+ this.isSaving = true
906
+
907
+ try {
908
+ const startTime = Date.now()
909
+
910
+ // CRITICAL FIX: Ensure file path is valid
911
+ this.ensureFilePath()
912
+
913
+ // CRITICAL FIX: Wait for ALL pending operations to complete before save
914
+ await this._waitForPendingOperations()
915
+
916
+ // CRITICAL FIX: Capture writeBuffer and deletedIds at the start to prevent race conditions
917
+ const writeBufferSnapshot = [...this.writeBuffer]
918
+ const deletedIdsSnapshot = new Set(this.deletedIds)
919
+
920
+ // OPTIMIZATION: Process pending index updates in batch before save
921
+ if (this.pendingIndexUpdates && this.pendingIndexUpdates.length > 0) {
922
+ if (this.opts.debugMode) {
923
+ console.log(`💾 Save: Processing ${this.pendingIndexUpdates.length} pending index updates in batch`)
924
+ }
925
+
926
+ // Extract records and line numbers for batch processing
927
+ const records = this.pendingIndexUpdates.map(update => update.record)
928
+ const startLineNumber = this.pendingIndexUpdates[0].lineNumber
929
+
930
+ // Process index updates in batch
931
+ await this.indexManager.addBatch(records, startLineNumber)
932
+
933
+ // Clear pending updates
934
+ this.pendingIndexUpdates = []
935
+ }
936
+
937
+ // CRITICAL FIX: Flush write buffer completely after capturing snapshot
938
+ await this._flushWriteBufferCompletely()
939
+
940
+ // CRITICAL FIX: Wait for all I/O operations to complete before clearing writeBuffer
941
+ await this._waitForIOCompletion()
942
+
943
+ // CRITICAL FIX: Verify write buffer is empty after I/O completion
944
+ // But allow for ongoing insertions during high-volume scenarios
945
+ if (this.writeBuffer.length > 0) {
946
+ if (this.opts.debugMode) {
947
+ console.log(`💾 Save: WriteBuffer still has ${this.writeBuffer.length} items after flush - this may indicate ongoing insertions`)
948
+ }
949
+
950
+ // If we have a reasonable number of items, continue processing
951
+ if (this.writeBuffer.length < 10000) { // Reasonable threshold
952
+ if (this.opts.debugMode) {
953
+ console.log(`💾 Save: Continuing to process remaining ${this.writeBuffer.length} items`)
954
+ }
955
+ // Continue with the save process - the remaining items will be included in the final save
956
+ } else {
957
+ // Too many items remaining - likely a real problem
958
+ throw new Error(`WriteBuffer has too many items after flush: ${this.writeBuffer.length} items remaining (threshold: 10000)`)
959
+ }
960
+ }
961
+
962
+ // OPTIMIZATION: Parallel operations - cleanup and data preparation
963
+ let allData = []
964
+ let orphanedCount = 0
965
+
966
+ // Check if there are new records to save (after flush, writeBuffer should be empty)
967
+ if (this.opts.debugMode) {
968
+ console.log(`💾 Save: writeBuffer.length=${this.writeBuffer.length}, writeBufferSnapshot.length=${writeBufferSnapshot.length}`)
969
+ }
970
+ if (this.writeBuffer.length > 0) {
971
+ if (this.opts.debugMode) {
972
+ console.log(`💾 Save: WriteBuffer has ${writeBufferSnapshot.length} records, using streaming approach`)
973
+ }
974
+
975
+ // Note: processTermMapping is already called during insert/update operations
976
+ // No need to call it again here to avoid double processing
977
+
978
+ // OPTIMIZATION: Check if we can skip reading existing records
979
+ // Only use streaming if we have existing records AND we're not just appending new records
980
+ const hasExistingRecords = this.indexOffset > 0 && this.offsets.length > 0 && writeBufferSnapshot.length > 0
981
+
982
+ if (!hasExistingRecords && deletedIdsSnapshot.size === 0) {
983
+ // OPTIMIZATION: No existing records to read, just use writeBuffer
984
+ allData = [...writeBufferSnapshot]
985
+ } else {
986
+ // OPTIMIZATION: Parallel operations - cleanup and streaming
987
+ const parallelOperations = []
988
+
989
+ // Add term cleanup if enabled
990
+ if (this.opts.termMappingCleanup && this.termManager) {
991
+ parallelOperations.push(
992
+ Promise.resolve().then(() => {
993
+ orphanedCount = this.termManager.cleanupOrphanedTerms()
994
+ if (this.opts.debugMode && orphanedCount > 0) {
995
+ console.log(`🧹 Cleaned up ${orphanedCount} orphaned terms`)
996
+ }
997
+ })
998
+ )
999
+ }
1000
+
1001
+ // Add streaming operation
1002
+ parallelOperations.push(
1003
+ this._streamExistingRecords(deletedIdsSnapshot, writeBufferSnapshot).then(existingRecords => {
1004
+ allData = [...existingRecords]
1005
+
1006
+ // OPTIMIZATION: Use Map for faster lookups
1007
+ const existingRecordMap = new Map(existingRecords.filter(r => r && r.id).map(r => [r.id, r]))
1008
+
1009
+ for (const record of writeBufferSnapshot) {
1010
+ if (!deletedIdsSnapshot.has(record.id)) {
1011
+ if (existingRecordMap.has(record.id)) {
1012
+ // Replace existing record
1013
+ const existingIndex = allData.findIndex(r => r.id === record.id)
1014
+ allData[existingIndex] = record
1015
+ } else {
1016
+ // Add new record
1017
+ allData.push(record)
1018
+ }
1019
+ }
1020
+ }
1021
+ })
1022
+ )
1023
+
1024
+ // Execute parallel operations
1025
+ await Promise.all(parallelOperations)
1026
+ }
1027
+ } else {
1028
+ // CRITICAL FIX: When writeBuffer is empty, use streaming approach for existing records
1029
+ if (this.opts.debugMode) {
1030
+ console.log(`💾 Save: Checking streaming condition: indexOffset=${this.indexOffset}, deletedIds.size=${this.deletedIds.size}`)
1031
+ console.log(`💾 Save: writeBuffer.length=${this.writeBuffer.length}`)
1032
+ }
1033
+ if (this.indexOffset > 0 || this.deletedIds.size > 0) {
1034
+ try {
1035
+ if (this.opts.debugMode) {
1036
+ console.log(`💾 Save: Using streaming approach for existing records`)
1037
+ console.log(`💾 Save: indexOffset: ${this.indexOffset}, offsets.length: ${this.offsets.length}`)
1038
+ console.log(`💾 Save: deletedIds to filter:`, Array.from(deletedIdsSnapshot))
1039
+ }
1040
+
1041
+ // OPTIMIZATION: Parallel operations - cleanup and streaming
1042
+ const parallelOperations = []
1043
+
1044
+ // Add term cleanup if enabled
1045
+ if (this.opts.termMappingCleanup && this.termManager) {
1046
+ parallelOperations.push(
1047
+ Promise.resolve().then(() => {
1048
+ orphanedCount = this.termManager.cleanupOrphanedTerms()
1049
+ if (this.opts.debugMode && orphanedCount > 0) {
1050
+ console.log(`🧹 Cleaned up ${orphanedCount} orphaned terms`)
1051
+ }
1052
+ })
1053
+ )
1054
+ }
1055
+
1056
+ // Add streaming operation
1057
+ parallelOperations.push(
1058
+ this._streamExistingRecords(deletedIdsSnapshot, writeBufferSnapshot).then(existingRecords => {
1059
+ if (this.opts.debugMode) {
1060
+ console.log(`💾 Save: _streamExistingRecords returned ${existingRecords.length} records`)
1061
+ console.log(`💾 Save: existingRecords:`, existingRecords)
1062
+ }
1063
+ // Combine existing records with new records from writeBuffer
1064
+ allData = [...existingRecords, ...writeBufferSnapshot.filter(record => !deletedIdsSnapshot.has(record.id))]
1065
+ }).catch(error => {
1066
+ if (this.opts.debugMode) {
1067
+ console.log(`💾 Save: _streamExistingRecords failed:`, error.message)
1068
+ }
1069
+ // CRITICAL FIX: Use safe fallback to preserve existing data instead of losing it
1070
+ return this._loadExistingRecordsFallback(deletedIdsSnapshot, writeBufferSnapshot).then(fallbackRecords => {
1071
+ allData = [...fallbackRecords, ...writeBufferSnapshot.filter(record => !deletedIdsSnapshot.has(record.id))]
1072
+ if (this.opts.debugMode) {
1073
+ console.log(`💾 Save: Fallback preserved ${fallbackRecords.length} existing records, total: ${allData.length}`)
1074
+ }
1075
+ }).catch(fallbackError => {
1076
+ if (this.opts.debugMode) {
1077
+ console.log(`💾 Save: All fallback methods failed:`, fallbackError.message)
1078
+ console.log(`💾 Save: CRITICAL - Data loss may occur, only writeBuffer will be saved`)
1079
+ }
1080
+ // Last resort: at least save what we have in writeBuffer
1081
+ allData = writeBufferSnapshot.filter(record => !deletedIdsSnapshot.has(record.id))
1082
+ })
1083
+ })
1084
+ )
1085
+
1086
+ // Execute parallel operations
1087
+ await Promise.all(parallelOperations)
1088
+ } catch (error) {
1089
+ if (this.opts.debugMode) {
1090
+ console.log(`💾 Save: Streaming approach failed, falling back to writeBuffer only: ${error.message}`)
1091
+ }
1092
+ // CRITICAL FIX: Use safe fallback to preserve existing data instead of losing it
1093
+ try {
1094
+ const fallbackRecords = await this._loadExistingRecordsFallback(deletedIdsSnapshot, writeBufferSnapshot)
1095
+ allData = [...fallbackRecords, ...writeBufferSnapshot.filter(record => !deletedIdsSnapshot.has(record.id))]
1096
+ if (this.opts.debugMode) {
1097
+ console.log(`💾 Save: Fallback preserved ${fallbackRecords.length} existing records, total: ${allData.length}`)
1098
+ }
1099
+ } catch (fallbackError) {
1100
+ if (this.opts.debugMode) {
1101
+ console.log(`💾 Save: All fallback methods failed:`, fallbackError.message)
1102
+ console.log(`💾 Save: CRITICAL - Data loss may occur, only writeBuffer will be saved`)
1103
+ }
1104
+ // Last resort: at least save what we have in writeBuffer
1105
+ allData = writeBufferSnapshot.filter(record => !deletedIdsSnapshot.has(record.id))
1106
+ }
1107
+ }
1108
+ } else {
1109
+ // No existing data, use only writeBuffer
1110
+ allData = writeBufferSnapshot.filter(record => !deletedIdsSnapshot.has(record.id))
1111
+ }
1112
+ }
1113
+
1114
+ // CRITICAL FIX: Calculate offsets based on actual serialized data that will be written
1115
+ // This ensures consistency between offset calculation and file writing
1116
+ const jsonlData = allData.length > 0
1117
+ ? this.serializer.serializeBatch(allData)
1118
+ : ''
1119
+ const jsonlString = jsonlData.toString('utf8')
1120
+ const lines = jsonlString.split('\n').filter(line => line.trim())
1121
+
1122
+ this.offsets = []
1123
+ let currentOffset = 0
1124
+ for (let i = 0; i < lines.length; i++) {
1125
+ this.offsets.push(currentOffset)
1126
+ // CRITICAL FIX: Use actual line length including newline for accurate offset calculation
1127
+ // This accounts for UTF-8 encoding differences (e.g., 'ação' vs 'acao')
1128
+ const lineWithNewline = lines[i] + '\n'
1129
+ currentOffset += Buffer.byteLength(lineWithNewline, 'utf8')
1130
+ }
1131
+
1132
+ // CRITICAL FIX: Ensure indexOffset matches actual file size
1133
+ this.indexOffset = currentOffset
1134
+
1135
+ if (this.opts.debugMode) {
1136
+ console.log(`💾 Save: Calculated indexOffset: ${this.indexOffset}, allData.length: ${allData.length}`)
1137
+ }
1138
+
1139
+ // OPTIMIZATION: Parallel operations - file writing and index data preparation
1140
+ const parallelWriteOperations = []
1141
+
1142
+ // Add main file write operation
1143
+ parallelWriteOperations.push(
1144
+ this.fileHandler.writeBatch([jsonlData])
1145
+ )
1146
+
1147
+ // Add index file operations - ALWAYS save offsets, even without indexed fields
1148
+ if (this.indexManager) {
1149
+ const idxPath = this.normalizedFile.replace('.jdb', '.idx.jdb')
1150
+
1151
+ // OPTIMIZATION: Parallel data preparation
1152
+ const indexDataPromise = Promise.resolve({
1153
+ index: this.indexManager.indexedFields && this.indexManager.indexedFields.length > 0 ? this.indexManager.toJSON() : {},
1154
+ offsets: this.offsets, // Save actual offsets for efficient file operations
1155
+ indexOffset: this.indexOffset // Save file size for proper range calculations
1156
+ })
1157
+
1158
+ // Add term mapping data if needed
1159
+ const termMappingFields = this.getTermMappingFields()
1160
+ if (termMappingFields.length > 0 && this.termManager) {
1161
+ const termDataPromise = this.termManager.saveTerms()
1162
+
1163
+ // Combine index data and term data
1164
+ const combinedDataPromise = Promise.all([indexDataPromise, termDataPromise]).then(([indexData, termData]) => {
1165
+ indexData.termMapping = termData
1166
+ return indexData
1167
+ })
1168
+
1169
+ // Add index file write operation
1170
+ parallelWriteOperations.push(
1171
+ combinedDataPromise.then(indexData => {
1172
+ const idxFileHandler = new FileHandler(idxPath, this.fileMutex, this.opts)
1173
+ return idxFileHandler.writeAll(JSON.stringify(indexData, null, 2))
1174
+ })
1175
+ )
1176
+ } else {
1177
+ // Add index file write operation without term mapping
1178
+ parallelWriteOperations.push(
1179
+ indexDataPromise.then(indexData => {
1180
+ const idxFileHandler = new FileHandler(idxPath, this.fileMutex, this.opts)
1181
+ return idxFileHandler.writeAll(JSON.stringify(indexData, null, 2))
1182
+ })
1183
+ )
1184
+ }
1185
+ }
1186
+
1187
+ // Execute parallel write operations
1188
+ await Promise.all(parallelWriteOperations)
1189
+
1190
+ if (this.opts.debugMode) {
1191
+ console.log(`💾 Saved ${allData.length} records to ${this.normalizedFile}`)
1192
+ }
1193
+
1194
+ // CRITICAL FIX: Invalidate file size cache after save operation
1195
+ this._cachedFileStats = null
1196
+
1197
+ this.shouldSave = false
1198
+ this.lastSaveTime = Date.now()
1199
+
1200
+ // Clear writeBuffer and deletedIds after successful save only if we had data to save
1201
+ if (allData.length > 0) {
1202
+ // Rebuild index when records were deleted to maintain consistency
1203
+ const hadDeletedRecords = deletedIdsSnapshot.size > 0
1204
+ if (this.indexManager && this.indexManager.indexedFields && this.indexManager.indexedFields.length > 0) {
1205
+ if (hadDeletedRecords) {
1206
+ // Clear the index and rebuild it from the remaining records
1207
+ this.indexManager.clear()
1208
+ if (this.opts.debugMode) {
1209
+ console.log(`🧹 Rebuilding index after removing ${deletedIdsSnapshot.size} deleted records`)
1210
+ }
1211
+
1212
+ // Rebuild index from the saved records
1213
+ for (let i = 0; i < allData.length; i++) {
1214
+ const record = allData[i]
1215
+ await this.indexManager.add(record, i)
1216
+ }
1217
+ }
1218
+ }
1219
+
1220
+ // CRITICAL FIX: Clear all records that were in the snapshot
1221
+ // Use a more robust comparison that handles different data types
1222
+ const originalLength = this.writeBuffer.length
1223
+ this.writeBuffer = this.writeBuffer.filter(record => {
1224
+ // For objects with id, compare by id
1225
+ if (typeof record === 'object' && record !== null && record.id) {
1226
+ return !writeBufferSnapshot.some(snapshotRecord =>
1227
+ typeof snapshotRecord === 'object' && snapshotRecord !== null &&
1228
+ snapshotRecord.id && snapshotRecord.id === record.id
1229
+ )
1230
+ }
1231
+ // For other types (Buffers, primitives), use strict equality
1232
+ return !writeBufferSnapshot.some(snapshotRecord => snapshotRecord === record)
1233
+ })
1234
+
1235
+ // Remove only the deleted IDs that were in the snapshot
1236
+ for (const deletedId of deletedIdsSnapshot) {
1237
+ this.deletedIds.delete(deletedId)
1238
+ }
1239
+
1240
+ // CRITICAL FIX: Ensure writeBuffer is completely cleared after successful save
1241
+ if (this.writeBuffer.length > 0) {
1242
+ if (this.opts.debugMode) {
1243
+ console.log(`💾 Save: Force clearing remaining ${this.writeBuffer.length} items from writeBuffer`)
1244
+ }
1245
+ // If there are still items in writeBuffer after filtering, clear them
1246
+ // This prevents the "writeBuffer has records" bug in destroy()
1247
+ this.writeBuffer = []
1248
+ this.writeBufferOffsets = []
1249
+ this.writeBufferSizes = []
1250
+ }
1251
+
1252
+ // indexOffset already set correctly to currentOffset (total file size) above
1253
+ // No need to override it with record count
1254
+ }
1255
+
1256
+ // CRITICAL FIX: Always save index data to file after saving records
1257
+ await this._saveIndexDataToFile()
1258
+
1259
+ this.performanceStats.saves++
1260
+ this.performanceStats.saveTime += Date.now() - startTime
1261
+ this.emit('saved', this.writeBuffer.length)
1262
+
1263
+ } catch (error) {
1264
+ console.error('Failed to save database:', error)
1265
+ throw error
1266
+ } finally {
1267
+ this.isSaving = false
1268
+ }
1269
+ }
1270
+
1271
+ /**
1272
+ * Process term mapping for a record
1273
+ * @param {Object} record - Record to process
1274
+ * @param {boolean} isUpdate - Whether this is an update operation
1275
+ * @param {Object} oldRecord - Original record (for updates)
1276
+ */
1277
+ processTermMapping(record, isUpdate = false, oldRecord = null) {
1278
+ const termMappingFields = this.getTermMappingFields()
1279
+ if (!this.termManager || termMappingFields.length === 0) {
1280
+ return
1281
+ }
1282
+
1283
+
1284
+ // CRITICAL FIX: Don't modify the original record object
1285
+ // The record should already be a copy created in insert/update methods
1286
+ // This prevents reference modification issues
1287
+
1288
+ // Process each term mapping field
1289
+ for (const field of termMappingFields) {
1290
+ if (record[field] && Array.isArray(record[field])) {
1291
+
1292
+ // Decrement old terms if this is an update
1293
+ if (isUpdate && oldRecord) {
1294
+ // Check if oldRecord has term IDs or terms
1295
+ const termIdsField = `${field}Ids`
1296
+ if (oldRecord[termIdsField] && Array.isArray(oldRecord[termIdsField])) {
1297
+ // Use term IDs directly for decrementing
1298
+ for (const termId of oldRecord[termIdsField]) {
1299
+ this.termManager.decrementTermCount(termId)
1300
+ }
1301
+ } else if (oldRecord[field] && Array.isArray(oldRecord[field])) {
1302
+ // Use terms to decrement (fallback for backward compatibility)
1303
+ for (const term of oldRecord[field]) {
1304
+ const termId = this.termManager.termToId.get(term)
1305
+ if (termId) {
1306
+ this.termManager.decrementTermCount(termId)
1307
+ }
1308
+ }
1309
+ }
1310
+ }
1311
+
1312
+ // Clear old term IDs if this is an update
1313
+ if (isUpdate) {
1314
+ delete record[`${field}Ids`]
1315
+ }
1316
+
1317
+ // Process new terms - getTermId already increments the count
1318
+ const termIds = []
1319
+ for (const term of record[field]) {
1320
+ const termId = this.termManager.getTermId(term)
1321
+ termIds.push(termId)
1322
+ }
1323
+ // Store term IDs in the record (for internal use)
1324
+ record[`${field}Ids`] = termIds
1325
+
1326
+ }
1327
+ }
1328
+ }
1329
+
1330
+ /**
1331
+ * Convert terms to term IDs for serialization (SPACE OPTIMIZATION)
1332
+ * @param {Object} record - Record to process
1333
+ * @returns {Object} - Record with terms converted to term IDs
1334
+ */
1335
+ removeTermIdsForSerialization(record) {
1336
+ const termMappingFields = this.getTermMappingFields()
1337
+ if (termMappingFields.length === 0 || !this.termManager) {
1338
+ return record
1339
+ }
1340
+
1341
+ // Create a copy and convert terms to term IDs
1342
+ const optimizedRecord = { ...record }
1343
+
1344
+ for (const field of termMappingFields) {
1345
+ if (optimizedRecord[field] && Array.isArray(optimizedRecord[field])) {
1346
+ // CRITICAL FIX: Only convert if values are strings (terms), skip if already numbers (term IDs)
1347
+ const firstValue = optimizedRecord[field][0]
1348
+ if (typeof firstValue === 'string') {
1349
+ // Convert terms to term IDs for storage
1350
+ optimizedRecord[field] = optimizedRecord[field].map(term =>
1351
+ this.termManager.getTermIdWithoutIncrement(term)
1352
+ )
1353
+ }
1354
+ // If already numbers (term IDs), leave as-is
1355
+ }
1356
+ }
1357
+
1358
+ return optimizedRecord
1359
+ }
1360
+
1361
+ /**
1362
+ * Convert term IDs back to terms after deserialization (SPACE OPTIMIZATION)
1363
+ * @param {Object} record - Record with term IDs
1364
+ * @returns {Object} - Record with terms restored
1365
+ */
1366
+ restoreTermIdsAfterDeserialization(record) {
1367
+ const termMappingFields = this.getTermMappingFields()
1368
+ if (termMappingFields.length === 0 || !this.termManager) {
1369
+ return record
1370
+ }
1371
+
1372
+
1373
+ // Create a copy and convert term IDs back to terms
1374
+ const restoredRecord = { ...record }
1375
+
1376
+ for (const field of termMappingFields) {
1377
+ if (restoredRecord[field] && Array.isArray(restoredRecord[field])) {
1378
+
1379
+ // Convert term IDs back to terms for user
1380
+ restoredRecord[field] = restoredRecord[field].map(termId => {
1381
+ const term = this.termManager.idToTerm.get(termId) || termId
1382
+
1383
+
1384
+ return term
1385
+ })
1386
+ }
1387
+
1388
+ // Remove the *Ids field that was added during serialization
1389
+ const idsFieldName = field + 'Ids'
1390
+ if (restoredRecord[idsFieldName]) {
1391
+ delete restoredRecord[idsFieldName]
1392
+ }
1393
+ }
1394
+
1395
+
1396
+ return restoredRecord
1397
+ }
1398
+
1399
+
1400
+ /**
1401
+ * Remove term mapping for a record
1402
+ * @param {Object} record - Record to process
1403
+ */
1404
+ removeTermMapping(record) {
1405
+ const termMappingFields = this.getTermMappingFields()
1406
+ if (!this.termManager || termMappingFields.length === 0) {
1407
+ return
1408
+ }
1409
+
1410
+ // Process each term mapping field
1411
+ for (const field of termMappingFields) {
1412
+ // Use terms to decrement (term IDs are not stored in records anymore)
1413
+ if (record[field] && Array.isArray(record[field])) {
1414
+ for (const term of record[field]) {
1415
+ const termId = this.termManager.termToId.get(term)
1416
+ if (termId) {
1417
+ this.termManager.decrementTermCount(termId)
1418
+ }
1419
+ }
1420
+ }
1421
+ }
1422
+ }
1423
+
1424
+ /**
1425
+ * Process term mapping for multiple records in batch (OPTIMIZATION)
1426
+ * @param {Array} records - Records to process
1427
+ * @returns {Array} - Processed records with term mappings
1428
+ */
1429
+ processTermMappingBatch(records) {
1430
+ const termMappingFields = this.getTermMappingFields()
1431
+ if (!this.termManager || termMappingFields.length === 0 || !records.length) {
1432
+ return records
1433
+ }
1434
+
1435
+ // OPTIMIZATION: Pre-collect all unique terms to minimize Map operations
1436
+ const allTerms = new Set()
1437
+ const fieldTerms = new Map() // field -> Set of terms
1438
+
1439
+ for (const field of termMappingFields) {
1440
+ fieldTerms.set(field, new Set())
1441
+ for (const record of records) {
1442
+ if (record[field] && Array.isArray(record[field])) {
1443
+ for (const term of record[field]) {
1444
+ allTerms.add(term)
1445
+ fieldTerms.get(field).add(term)
1446
+ }
1447
+ }
1448
+ }
1449
+ }
1450
+
1451
+ // OPTIMIZATION: Batch process all terms at once using bulk operations
1452
+ const termIdMap = new Map()
1453
+ if (this.termManager.bulkGetTermIds) {
1454
+ // Use bulk operation if available
1455
+ const allTermsArray = Array.from(allTerms)
1456
+ const termIds = this.termManager.bulkGetTermIds(allTermsArray)
1457
+ for (let i = 0; i < allTermsArray.length; i++) {
1458
+ termIdMap.set(allTermsArray[i], termIds[i])
1459
+ }
1460
+ } else {
1461
+ // Fallback to individual operations
1462
+ for (const term of allTerms) {
1463
+ termIdMap.set(term, this.termManager.getTermId(term))
1464
+ }
1465
+ }
1466
+
1467
+ // OPTIMIZATION: Process records using pre-computed term IDs
1468
+ return records.map(record => {
1469
+ const processedRecord = { ...record }
1470
+
1471
+ for (const field of termMappingFields) {
1472
+ if (record[field] && Array.isArray(record[field])) {
1473
+ const termIds = record[field].map(term => termIdMap.get(term))
1474
+ processedRecord[`${field}Ids`] = termIds
1475
+ }
1476
+ }
1477
+
1478
+ return processedRecord
1479
+ })
1480
+ }
1481
+
1482
+
1483
+ /**
1484
+ * Calculate total size of serialized records (OPTIMIZATION)
1485
+ * @param {Array} records - Records to calculate size for
1486
+ * @returns {number} - Total size in bytes
1487
+ */
1488
+ calculateBatchSize(records) {
1489
+ if (!records || !records.length) return 0
1490
+
1491
+ let totalSize = 0
1492
+ for (const record of records) {
1493
+ // OPTIMIZATION: Calculate size without creating the actual string
1494
+ // SPACE OPTIMIZATION: Remove term IDs before size calculation
1495
+ const cleanRecord = this.removeTermIdsForSerialization(record)
1496
+ const jsonString = this.serializer.serialize(cleanRecord).toString('utf8')
1497
+ totalSize += Buffer.byteLength(jsonString, 'utf8') + 1 // +1 for newline
1498
+ }
1499
+
1500
+ return totalSize
1501
+ }
1502
+
1503
+ /**
1504
+ * Begin an insert session for batch operations
1505
+ * @param {Object} sessionOptions - Options for the insert session
1506
+ * @returns {InsertSession} - The insert session instance
1507
+ */
1508
+ beginInsertSession(sessionOptions = {}) {
1509
+ if (this.destroyed) {
1510
+ throw new Error('Database is destroyed')
1511
+ }
1512
+
1513
+ if (this.closed) {
1514
+ throw new Error('Database is closed. Call init() to reopen it.')
1515
+ }
1516
+
1517
+ return new InsertSession(this, sessionOptions)
1518
+ }
1519
+
1520
+ /**
1521
+ * Insert a new record
1522
+ */
1523
+ async insert(data) {
1524
+ this._validateInitialization('insert')
1525
+
1526
+ return this.operationQueue.enqueue(async () => {
1527
+ this.isInsideOperationQueue = true
1528
+ try {
1529
+ // CRITICAL FIX: Validate state before insert operation
1530
+ this.validateState()
1531
+
1532
+ if (!data || typeof data !== 'object') {
1533
+ throw new Error('Data must be an object')
1534
+ }
1535
+
1536
+ // CRITICAL FIX: Check abort signal before operation, but allow during destroy cleanup
1537
+ if (this.abortController.signal.aborted && !this.destroying) {
1538
+ throw new Error('Database is destroyed')
1539
+ }
1540
+
1541
+ // Initialize schema if not already done (auto-detect from first record)
1542
+ if (this.serializer && !this.serializer.schemaManager.isInitialized) {
1543
+ this.serializer.initializeSchema(data, true)
1544
+ if (this.opts.debugMode) {
1545
+ console.log(`🔍 Schema auto-detected from first insert: ${this.serializer.getSchema().join(', ')} [${this.instanceId}]`)
1546
+ }
1547
+ }
1548
+
1549
+ // OPTIMIZATION: Process single insert with deferred index updates
1550
+ // CRITICAL FIX: Clone the object to prevent reference modification
1551
+ const clonedData = {...data}
1552
+ const id = clonedData.id || this.generateId()
1553
+ const record = { ...data, id }
1554
+
1555
+ // OPTIMIZATION: Process term mapping
1556
+ this.processTermMapping(record)
1557
+ if (this.opts.debugMode) {
1558
+ // console.log(`💾 insert(): writeBuffer(before)=${this.writeBuffer.length}`)
1559
+ }
1560
+
1561
+ // Apply schema enforcement - convert to array format and back to enforce schema
1562
+ const schemaEnforcedRecord = this.applySchemaEnforcement(record)
1563
+
1564
+ // Don't store in this.data - only use writeBuffer and index
1565
+ this.writeBuffer.push(schemaEnforcedRecord)
1566
+ if (this.opts.debugMode) {
1567
+ console.log(`🔍 INSERT: Added record to writeBuffer, length now: ${this.writeBuffer.length}`)
1568
+ }
1569
+
1570
+ // OPTIMIZATION: Calculate and store offset and size for writeBuffer record
1571
+ // SPACE OPTIMIZATION: Remove term IDs before serialization
1572
+ const cleanRecord = this.removeTermIdsForSerialization(record)
1573
+ const recordJson = this.serializer.serialize(cleanRecord).toString('utf8')
1574
+ const recordSize = Buffer.byteLength(recordJson, 'utf8')
1575
+
1576
+ // Calculate offset based on end of file + previous writeBuffer sizes
1577
+ const previousWriteBufferSize = this.writeBufferSizes.reduce((sum, size) => sum + size, 0)
1578
+ const recordOffset = this.indexOffset + previousWriteBufferSize
1579
+
1580
+ this.writeBufferOffsets.push(recordOffset)
1581
+ this.writeBufferSizes.push(recordSize)
1582
+
1583
+ // OPTIMIZATION: Use the current writeBuffer size as the line number (0-based index)
1584
+ const lineNumber = this.writeBuffer.length - 1
1585
+
1586
+ // OPTIMIZATION: Defer index updates to batch processing
1587
+ // Store the record for batch index processing
1588
+ if (!this.pendingIndexUpdates) {
1589
+ this.pendingIndexUpdates = []
1590
+ }
1591
+ this.pendingIndexUpdates.push({ record, lineNumber })
1592
+
1593
+ // Manual save is now the responsibility of the application
1594
+ this.shouldSave = true
1595
+
1596
+ this.performanceStats.operations++
1597
+
1598
+ // Auto-save manager removed - manual save required
1599
+
1600
+ this.emit('inserted', record)
1601
+ return record
1602
+ } finally {
1603
+ this.isInsideOperationQueue = false
1604
+ }
1605
+ })
1606
+ }
1607
+
1608
+ /**
1609
+ * Insert multiple records in batch (OPTIMIZATION)
1610
+ */
1611
+ async insertBatch(dataArray) {
1612
+ this._validateInitialization('insertBatch')
1613
+
1614
+ // If we're already inside the operation queue (e.g., from insert()), avoid re-enqueueing to prevent deadlocks
1615
+ if (this.isInsideOperationQueue) {
1616
+ if (this.opts.debugMode) {
1617
+ console.log(`💾 insertBatch inline: insideQueue=${this.isInsideOperationQueue}, size=${Array.isArray(dataArray) ? dataArray.length : 0}`)
1618
+ }
1619
+ return await this._insertBatchInternal(dataArray)
1620
+ }
1621
+
1622
+ return this.operationQueue.enqueue(async () => {
1623
+ this.isInsideOperationQueue = true
1624
+ try {
1625
+ if (this.opts.debugMode) {
1626
+ console.log(`💾 insertBatch enqueued: size=${Array.isArray(dataArray) ? dataArray.length : 0}`)
1627
+ }
1628
+ return await this._insertBatchInternal(dataArray)
1629
+ } finally {
1630
+ this.isInsideOperationQueue = false
1631
+ }
1632
+ })
1633
+ }
1634
+
1635
+ /**
1636
+ * Internal implementation for insertBatch to allow inline execution when already inside the queue
1637
+ */
1638
+ async _insertBatchInternal(dataArray) {
1639
+ // CRITICAL FIX: Validate state before insert operation
1640
+ this.validateState()
1641
+
1642
+ if (!Array.isArray(dataArray) || dataArray.length === 0) {
1643
+ throw new Error('DataArray must be a non-empty array')
1644
+ }
1645
+
1646
+ // CRITICAL FIX: Check abort signal before operation, but allow during destroy cleanup
1647
+ if (this.abortController.signal.aborted && !this.destroying) {
1648
+ throw new Error('Database is destroyed')
1649
+ }
1650
+
1651
+ if (this.opts.debugMode) {
1652
+ console.log(`💾 _insertBatchInternal: processing size=${dataArray.length}, startWriteBuffer=${this.writeBuffer.length}`)
1653
+ }
1654
+ const records = []
1655
+ const startLineNumber = this.writeBuffer.length
1656
+
1657
+ // Initialize schema if not already done (auto-detect from first record)
1658
+ if (this.serializer && !this.serializer.schemaManager.isInitialized && dataArray.length > 0) {
1659
+ this.serializer.initializeSchema(dataArray[0], true)
1660
+ if (this.opts.debugMode) {
1661
+ console.log(`🔍 Schema auto-detected from first batch insert: ${this.serializer.getSchema().join(', ')} [${this.instanceId}]`)
1662
+ }
1663
+ }
1664
+
1665
+ // OPTIMIZATION: Process all records in batch
1666
+ for (let i = 0; i < dataArray.length; i++) {
1667
+ const data = dataArray[i]
1668
+ if (!data || typeof data !== 'object') {
1669
+ throw new Error(`Data at index ${i} must be an object`)
1670
+ }
1671
+
1672
+ const id = data.id || this.generateId()
1673
+ const record = { ...data, id }
1674
+ records.push(record)
1675
+ }
1676
+
1677
+ // OPTIMIZATION: Batch process term mapping
1678
+ const processedRecords = this.processTermMappingBatch(records)
1679
+
1680
+ // Apply schema enforcement to all records
1681
+ const schemaEnforcedRecords = processedRecords.map(record => this.applySchemaEnforcement(record))
1682
+
1683
+ // OPTIMIZATION: Add all records to writeBuffer at once
1684
+ this.writeBuffer.push(...schemaEnforcedRecords)
1685
+
1686
+ // OPTIMIZATION: Calculate offsets and sizes in batch (O(n))
1687
+ let runningTotalSize = this.writeBufferSizes.reduce((sum, size) => sum + size, 0)
1688
+ for (let i = 0; i < processedRecords.length; i++) {
1689
+ const record = processedRecords[i]
1690
+ // SPACE OPTIMIZATION: Remove term IDs before serialization
1691
+ const cleanRecord = this.removeTermIdsForSerialization(record)
1692
+ const recordJson = this.serializer.serialize(cleanRecord).toString('utf8')
1693
+ const recordSize = Buffer.byteLength(recordJson, 'utf8')
1694
+
1695
+ const recordOffset = this.indexOffset + runningTotalSize
1696
+ runningTotalSize += recordSize
1697
+
1698
+ this.writeBufferOffsets.push(recordOffset)
1699
+ this.writeBufferSizes.push(recordSize)
1700
+ }
1701
+
1702
+ // OPTIMIZATION: Batch process index updates
1703
+ if (!this.pendingIndexUpdates) {
1704
+ this.pendingIndexUpdates = []
1705
+ }
1706
+
1707
+ for (let i = 0; i < processedRecords.length; i++) {
1708
+ const lineNumber = startLineNumber + i
1709
+ this.pendingIndexUpdates.push({ record: processedRecords[i], lineNumber })
1710
+ }
1711
+
1712
+ this.shouldSave = true
1713
+ this.performanceStats.operations += processedRecords.length
1714
+
1715
+ // Emit events for all records
1716
+ if (this.listenerCount('inserted') > 0) {
1717
+ for (const record of processedRecords) {
1718
+ this.emit('inserted', record)
1719
+ }
1720
+ }
1721
+
1722
+ if (this.opts.debugMode) {
1723
+ console.log(`💾 _insertBatchInternal: done. added=${processedRecords.length}, writeBuffer=${this.writeBuffer.length}`)
1724
+ }
1725
+ return processedRecords
1726
+ }
1727
+
1728
+ /**
1729
+ * Find records matching criteria
1730
+ */
1731
+ async find(criteria = {}, options = {}) {
1732
+ this._validateInitialization('find')
1733
+
1734
+ // CRITICAL FIX: Validate state before find operation
1735
+ this.validateState()
1736
+
1737
+ // OPTIMIZATION: Find searches writeBuffer directly
1738
+
1739
+ const startTime = Date.now()
1740
+
1741
+ if (this.opts.debugMode) {
1742
+ console.log(`🔍 FIND START: criteria=${JSON.stringify(criteria)}, writeBuffer=${this.writeBuffer.length}`)
1743
+ }
1744
+
1745
+ try {
1746
+ // Validate indexed query mode if enabled
1747
+ if (this.opts.indexedQueryMode === 'strict') {
1748
+ this._validateIndexedQuery(criteria)
1749
+ }
1750
+
1751
+ // Get results from file (QueryManager already handles term ID restoration)
1752
+ const fileResultsWithTerms = await this.queryManager.find(criteria, options)
1753
+
1754
+ // Get results from writeBuffer
1755
+ const allPendingRecords = [...this.writeBuffer]
1756
+
1757
+ const writeBufferResults = this.queryManager.matchesCriteria ?
1758
+ allPendingRecords.filter(record => this.queryManager.matchesCriteria(record, criteria, options)) :
1759
+ allPendingRecords
1760
+
1761
+ // SPACE OPTIMIZATION: Restore term IDs to terms for writeBuffer results (unless disabled)
1762
+ const writeBufferResultsWithTerms = options.restoreTerms !== false ?
1763
+ writeBufferResults.map(record => this.restoreTermIdsAfterDeserialization(record)) :
1764
+ writeBufferResults
1765
+
1766
+
1767
+ // Combine results, removing duplicates (writeBuffer takes precedence)
1768
+ // OPTIMIZATION: Use parallel processing for better performance when writeBuffer has many records
1769
+ let allResults
1770
+ if (writeBufferResults.length > 50) {
1771
+ // Parallel approach for large writeBuffer
1772
+ const [fileResultsSet, writeBufferSet] = await Promise.all([
1773
+ Promise.resolve(new Set(fileResultsWithTerms.map(r => r.id))),
1774
+ Promise.resolve(new Set(writeBufferResultsWithTerms.map(r => r.id)))
1775
+ ])
1776
+
1777
+ // Merge efficiently: keep file results not in writeBuffer, then add all writeBuffer results
1778
+ const filteredFileResults = await Promise.resolve(
1779
+ fileResultsWithTerms.filter(r => !writeBufferSet.has(r.id))
1780
+ )
1781
+ allResults = [...filteredFileResults, ...writeBufferResultsWithTerms]
1782
+ } else {
1783
+ // Sequential approach for small writeBuffer (original logic)
1784
+ allResults = [...fileResultsWithTerms]
1785
+
1786
+ // Replace file records with writeBuffer records and add new writeBuffer records
1787
+ for (const record of writeBufferResultsWithTerms) {
1788
+ const existingIndex = allResults.findIndex(r => r.id === record.id)
1789
+ if (existingIndex !== -1) {
1790
+ // Replace existing record with writeBuffer version
1791
+ allResults[existingIndex] = record
1792
+ } else {
1793
+ // Add new record from writeBuffer
1794
+ allResults.push(record)
1795
+ }
1796
+ }
1797
+ }
1798
+
1799
+ // Remove records that are marked as deleted
1800
+ const finalResults = allResults.filter(record => !this.deletedIds.has(record.id))
1801
+
1802
+ if (this.opts.debugMode) {
1803
+ console.log(`🔍 Database.find returning: ${finalResults?.length || 0} records (${fileResultsWithTerms.length} from file, ${writeBufferResults.length} from writeBuffer, ${this.deletedIds.size} deleted), type: ${typeof finalResults}, isArray: ${Array.isArray(finalResults)}`)
1804
+ }
1805
+
1806
+ this.performanceStats.queryTime += Date.now() - startTime
1807
+ return finalResults
1808
+ } catch (error) {
1809
+ // Don't log expected errors in strict mode or for array field validation
1810
+ if (this.opts.indexedQueryMode !== 'strict' || !error.message.includes('Strict indexed mode')) {
1811
+ // Don't log errors for array field validation as they are expected
1812
+ if (!error.message.includes('Invalid query for array field')) {
1813
+ console.error('Query failed:', error)
1814
+ }
1815
+ }
1816
+ throw error
1817
+ }
1818
+ }
1819
+
1820
+ /**
1821
+ * Validate indexed query mode for strict mode
1822
+ * @private
1823
+ */
1824
+ _validateIndexedQuery(criteria) {
1825
+ if (!criteria || typeof criteria !== 'object') {
1826
+ return // Allow null/undefined criteria
1827
+ }
1828
+
1829
+ const indexedFields = Object.keys(this.opts.indexes || {})
1830
+ if (indexedFields.length === 0) {
1831
+ return // No indexed fields, allow all queries
1832
+ }
1833
+
1834
+ const queryFields = this._extractQueryFields(criteria)
1835
+ const nonIndexedFields = queryFields.filter(field => !indexedFields.includes(field))
1836
+
1837
+ if (nonIndexedFields.length > 0) {
1838
+ const availableFields = indexedFields.length > 0 ? indexedFields.join(', ') : 'none'
1839
+ if (nonIndexedFields.length === 1) {
1840
+ throw new Error(`Strict indexed mode: Field '${nonIndexedFields[0]}' is not indexed. Available indexed fields: ${availableFields}`)
1841
+ } else {
1842
+ throw new Error(`Strict indexed mode: Fields '${nonIndexedFields.join("', '")}' are not indexed. Available indexed fields: ${availableFields}`)
1843
+ }
1844
+ }
1845
+ }
1846
+
1847
+ /**
1848
+ * Create a shallow copy of a record for change detection
1849
+ * Optimized for known field types: number, string, null, or single-level arrays
1850
+ * @private
1851
+ */
1852
+ _createShallowCopy(record) {
1853
+ const copy = {}
1854
+ // Use for...in loop for better performance
1855
+ for (const key in record) {
1856
+ const value = record[key]
1857
+ // Optimize for common types first
1858
+ if (value === null || typeof value === 'number' || typeof value === 'string' || typeof value === 'boolean') {
1859
+ copy[key] = value
1860
+ } else if (Array.isArray(value)) {
1861
+ // Only copy if array has elements and is not empty
1862
+ copy[key] = value.length > 0 ? value.slice() : []
1863
+ } else if (typeof value === 'object') {
1864
+ // For complex objects, use shallow copy
1865
+ copy[key] = { ...value }
1866
+ } else {
1867
+ copy[key] = value
1868
+ }
1869
+ }
1870
+ return copy
1871
+ }
1872
+
1873
+ /**
1874
+ * Create an intuitive API wrapper using a class with Proxy
1875
+ * Combines the benefits of classes with the flexibility of Proxy
1876
+ * @private
1877
+ */
1878
+ _createEntryProxy(entry, originalRecord) {
1879
+ // Create a class instance that wraps the entry
1880
+ const iterateEntry = new IterateEntry(entry, originalRecord)
1881
+
1882
+ // Create a lightweight proxy that only intercepts property access
1883
+ return new Proxy(iterateEntry, {
1884
+ get(target, property) {
1885
+ // Handle special methods
1886
+ if (property === 'delete') {
1887
+ return () => target.delete()
1888
+ }
1889
+ if (property === 'value') {
1890
+ return target.value
1891
+ }
1892
+ if (property === 'isModified') {
1893
+ return target.isModified
1894
+ }
1895
+ if (property === 'isMarkedForDeletion') {
1896
+ return target.isMarkedForDeletion
1897
+ }
1898
+
1899
+ // For all other properties, return from the underlying entry
1900
+ return target._entry[property]
1901
+ },
1902
+
1903
+ set(target, property, value) {
1904
+ // Set the value in the underlying entry
1905
+ target._entry[property] = value
1906
+ target._modified = true
1907
+ return true
1908
+ }
1909
+ })
1910
+ }
1911
+
1912
+ /**
1913
+ * Create a high-performance wrapper for maximum speed
1914
+ * @private
1915
+ */
1916
+ _createHighPerformanceWrapper(entry, originalRecord) {
1917
+ // Create a simple wrapper object for high performance
1918
+ const wrapper = {
1919
+ value: entry,
1920
+ delete: () => {
1921
+ entry._markedForDeletion = true
1922
+ return true
1923
+ }
1924
+ }
1925
+
1926
+ // Mark for change tracking
1927
+ entry._modified = false
1928
+ entry._markedForDeletion = false
1929
+
1930
+ return wrapper
1931
+ }
1932
+
1933
+ /**
1934
+ * Check if a record has changed using optimized comparison
1935
+ * Optimized for known field types: number, string, null, or single-level arrays
1936
+ * @private
1937
+ */
1938
+ _hasRecordChanged(current, original) {
1939
+ // Quick reference check first
1940
+ if (current === original) return false
1941
+
1942
+ // Compare each field - optimized for common types
1943
+ for (const key in current) {
1944
+ const currentValue = current[key]
1945
+ const originalValue = original[key]
1946
+
1947
+ // Quick reference check (most common case)
1948
+ if (currentValue === originalValue) continue
1949
+
1950
+ // Handle null values
1951
+ if (currentValue === null || originalValue === null) {
1952
+ if (currentValue !== originalValue) return true
1953
+ continue
1954
+ }
1955
+
1956
+ // Handle primitive types (number, string, boolean) - most common
1957
+ const currentType = typeof currentValue
1958
+ if (currentType === 'number' || currentType === 'string' || currentType === 'boolean') {
1959
+ if (currentType !== typeof originalValue || currentValue !== originalValue) return true
1960
+ continue
1961
+ }
1962
+
1963
+ // Handle arrays (single-level) - second most common
1964
+ if (Array.isArray(currentValue)) {
1965
+ if (!Array.isArray(originalValue) || currentValue.length !== originalValue.length) return true
1966
+
1967
+ // Fast array comparison for primitive types
1968
+ for (let i = 0; i < currentValue.length; i++) {
1969
+ if (currentValue[i] !== originalValue[i]) return true
1970
+ }
1971
+ continue
1972
+ }
1973
+
1974
+ // Handle objects (shallow comparison only) - least common
1975
+ if (currentType === 'object') {
1976
+ if (typeof originalValue !== 'object') return true
1977
+
1978
+ // Fast object comparison using for...in
1979
+ for (const objKey in currentValue) {
1980
+ if (currentValue[objKey] !== originalValue[objKey]) return true
1981
+ }
1982
+ // Check if original has extra keys
1983
+ for (const objKey in originalValue) {
1984
+ if (!(objKey in currentValue)) return true
1985
+ }
1986
+ continue
1987
+ }
1988
+
1989
+ // Fallback for other types
1990
+ if (currentValue !== originalValue) return true
1991
+ }
1992
+
1993
+ // Check if original has extra keys (only if we haven't found differences yet)
1994
+ for (const key in original) {
1995
+ if (!(key in current)) return true
1996
+ }
1997
+
1998
+ return false
1999
+ }
2000
+
2001
+ /**
2002
+ * Extract field names from query criteria
2003
+ * @private
2004
+ */
2005
+ _extractQueryFields(criteria) {
2006
+ const fields = new Set()
2007
+
2008
+ const extractFromObject = (obj) => {
2009
+ for (const [key, value] of Object.entries(obj)) {
2010
+ if (key.startsWith('$')) {
2011
+ // Handle logical operators
2012
+ if (Array.isArray(value)) {
2013
+ value.forEach(item => {
2014
+ if (typeof item === 'object' && item !== null) {
2015
+ extractFromObject(item)
2016
+ }
2017
+ })
2018
+ } else if (typeof value === 'object' && value !== null) {
2019
+ extractFromObject(value)
2020
+ }
2021
+ } else {
2022
+ // Regular field
2023
+ fields.add(key)
2024
+ }
2025
+ }
2026
+ }
2027
+
2028
+ extractFromObject(criteria)
2029
+ return Array.from(fields)
2030
+ }
2031
+
2032
+ /**
2033
+ * Update records matching criteria
2034
+ */
2035
+ async update(criteria, updateData) {
2036
+ this._validateInitialization('update')
2037
+
2038
+ return this.operationQueue.enqueue(async () => {
2039
+ this.isInsideOperationQueue = true
2040
+ try {
2041
+ const startTime = Date.now()
2042
+ if (this.opts.debugMode) {
2043
+ console.log(`🔄 UPDATE START: criteria=${JSON.stringify(criteria)}, updateData=${JSON.stringify(updateData)}`)
2044
+ }
2045
+
2046
+ // CRITICAL FIX: Validate state before update operation
2047
+ this.validateState()
2048
+
2049
+ // CRITICAL FIX: If there's data to save, call save() to persist it
2050
+ // Only save if there are actual records in writeBuffer
2051
+ if (this.shouldSave && this.writeBuffer.length > 0) {
2052
+ if (this.opts.debugMode) {
2053
+ console.log(`🔄 UPDATE: Calling save() before update - writeBuffer.length=${this.writeBuffer.length}`)
2054
+ }
2055
+ const saveStart = Date.now()
2056
+ await this.save(false) // Use save(false) since we're already in queue
2057
+ if (this.opts.debugMode) {
2058
+ console.log(`🔄 UPDATE: Save completed in ${Date.now() - saveStart}ms`)
2059
+ }
2060
+ }
2061
+
2062
+ if (this.opts.debugMode) {
2063
+ console.log(`🔄 UPDATE: Starting find() - writeBuffer=${this.writeBuffer.length}`)
2064
+ }
2065
+ const findStart = Date.now()
2066
+ // CRITICAL FIX: Get raw records without term restoration for update operations
2067
+ const records = await this.find(criteria, { restoreTerms: false })
2068
+ if (this.opts.debugMode) {
2069
+ console.log(`🔄 UPDATE: Find completed in ${Date.now() - findStart}ms, found ${records.length} records`)
2070
+ }
2071
+
2072
+ const updatedRecords = []
2073
+
2074
+ for (const record of records) {
2075
+ const recordStart = Date.now()
2076
+ if (this.opts.debugMode) {
2077
+ console.log(`🔄 UPDATE: Processing record ${record.id}`)
2078
+ }
2079
+
2080
+ const updated = { ...record, ...updateData }
2081
+
2082
+ // Process term mapping for update
2083
+ const termMappingStart = Date.now()
2084
+ this.processTermMapping(updated, true, record)
2085
+ if (this.opts.debugMode) {
2086
+ console.log(`🔄 UPDATE: Term mapping completed in ${Date.now() - termMappingStart}ms`)
2087
+ }
2088
+
2089
+ // CRITICAL FIX: Remove old terms from index before adding new ones
2090
+ if (this.indexManager) {
2091
+ await this.indexManager.remove(record)
2092
+ if (this.opts.debugMode) {
2093
+ console.log(`🔄 UPDATE: Removed old terms from index for record ${record.id}`)
2094
+ }
2095
+ }
2096
+
2097
+ // Update record in writeBuffer or add to writeBuffer if not present
2098
+ const index = this.writeBuffer.findIndex(r => r.id === record.id)
2099
+ let lineNumber = null
2100
+ if (index !== -1) {
2101
+ // Record is already in writeBuffer, update it
2102
+ this.writeBuffer[index] = updated
2103
+ lineNumber = index
2104
+ if (this.opts.debugMode) {
2105
+ console.log(`🔄 UPDATE: Updated existing writeBuffer record at index ${index}`)
2106
+ }
2107
+ } else {
2108
+ // Record is in file, add updated version to writeBuffer
2109
+ // This will ensure the updated record is saved and replaces the file version
2110
+ this.writeBuffer.push(updated)
2111
+ lineNumber = this.writeBuffer.length - 1
2112
+ if (this.opts.debugMode) {
2113
+ console.log(`🔄 UPDATE: Added new record to writeBuffer at index ${lineNumber}`)
2114
+ }
2115
+ }
2116
+
2117
+ const indexUpdateStart = Date.now()
2118
+ await this.indexManager.update(record, updated, lineNumber)
2119
+ if (this.opts.debugMode) {
2120
+ console.log(`🔄 UPDATE: Index update completed in ${Date.now() - indexUpdateStart}ms`)
2121
+ }
2122
+
2123
+ updatedRecords.push(updated)
2124
+ if (this.opts.debugMode) {
2125
+ console.log(`🔄 UPDATE: Record ${record.id} completed in ${Date.now() - recordStart}ms`)
2126
+ }
2127
+ }
2128
+
2129
+ this.shouldSave = true
2130
+ this.performanceStats.operations++
2131
+
2132
+ if (this.opts.debugMode) {
2133
+ console.log(`🔄 UPDATE COMPLETED: ${updatedRecords.length} records updated in ${Date.now() - startTime}ms`)
2134
+ }
2135
+
2136
+ this.emit('updated', updatedRecords)
2137
+ return updatedRecords
2138
+ } finally {
2139
+ this.isInsideOperationQueue = false
2140
+ }
2141
+ })
2142
+ }
2143
+
2144
+ /**
2145
+ * Delete records matching criteria
2146
+ */
2147
+ async delete(criteria) {
2148
+ this._validateInitialization('delete')
2149
+
2150
+ return this.operationQueue.enqueue(async () => {
2151
+ this.isInsideOperationQueue = true
2152
+ try {
2153
+ // CRITICAL FIX: Validate state before delete operation
2154
+ this.validateState()
2155
+
2156
+ const records = await this.find(criteria)
2157
+ const deletedIds = []
2158
+
2159
+ if (this.opts.debugMode) {
2160
+ console.log(`🗑️ Delete operation: found ${records.length} records to delete`)
2161
+ console.log(`🗑️ Records to delete:`, records.map(r => ({ id: r.id, name: r.name })))
2162
+ console.log(`🗑️ Current writeBuffer length: ${this.writeBuffer.length}`)
2163
+ console.log(`🗑️ Current deletedIds:`, Array.from(this.deletedIds))
2164
+ }
2165
+
2166
+ for (const record of records) {
2167
+ // Remove term mapping
2168
+ this.removeTermMapping(record)
2169
+
2170
+ await this.indexManager.remove(record)
2171
+
2172
+ // Remove record from writeBuffer or mark as deleted
2173
+ const index = this.writeBuffer.findIndex(r => r.id === record.id)
2174
+ if (index !== -1) {
2175
+ this.writeBuffer.splice(index, 1)
2176
+ if (this.opts.debugMode) {
2177
+ console.log(`🗑️ Removed record ${record.id} from writeBuffer`)
2178
+ }
2179
+ } else {
2180
+ // If record is not in writeBuffer (was saved), mark it as deleted
2181
+ this.deletedIds.add(record.id)
2182
+ if (this.opts.debugMode) {
2183
+ console.log(`🗑️ Marked record ${record.id} as deleted (not in writeBuffer)`)
2184
+ }
2185
+ }
2186
+ deletedIds.push(record.id)
2187
+ }
2188
+
2189
+ if (this.opts.debugMode) {
2190
+ console.log(`🗑️ After delete: writeBuffer length: ${this.writeBuffer.length}, deletedIds:`, Array.from(this.deletedIds))
2191
+ }
2192
+
2193
+ this.shouldSave = true
2194
+ this.performanceStats.operations++
2195
+
2196
+ this.emit('deleted', deletedIds)
2197
+ return deletedIds
2198
+ } finally {
2199
+ this.isInsideOperationQueue = false
2200
+ }
2201
+ })
2202
+ }
2203
+
2204
+
2205
+ /**
2206
+ * Generate a unique ID
2207
+ */
2208
+ generateId() {
2209
+ return Date.now().toString(36) + Math.random().toString(36).substr(2)
2210
+ }
2211
+
2212
+ /**
2213
+ * Apply schema enforcement to a record
2214
+ * Converts object to array and back to enforce schema (remove extra fields, add undefined for missing fields)
2215
+ */
2216
+ applySchemaEnforcement(record) {
2217
+ // Only apply schema enforcement if fields configuration is explicitly provided
2218
+ if (!this.opts.fields) {
2219
+ return record // No schema enforcement without explicit fields configuration
2220
+ }
2221
+
2222
+ if (!this.serializer || !this.serializer.schemaManager || !this.serializer.schemaManager.isInitialized) {
2223
+ return record // No schema enforcement if schema not initialized
2224
+ }
2225
+
2226
+ // Convert to array format (enforces schema)
2227
+ const arrayFormat = this.serializer.convertToArrayFormat(record)
2228
+
2229
+ // Convert back to object (only schema fields will be present)
2230
+ const enforcedRecord = this.serializer.convertFromArrayFormat(arrayFormat)
2231
+
2232
+ // Preserve the ID if it exists
2233
+ if (record.id) {
2234
+ enforcedRecord.id = record.id
2235
+ }
2236
+
2237
+ return enforcedRecord
2238
+ }
2239
+
2240
+ /**
2241
+ * Initialize schema for array-based serialization
2242
+ */
2243
+ initializeSchema() {
2244
+ if (!this.serializer || !this.serializer.schemaManager) {
2245
+ return
2246
+ }
2247
+
2248
+ // Try to get schema from options first
2249
+ if (this.opts.schema && Array.isArray(this.opts.schema)) {
2250
+ this.serializer.initializeSchema(this.opts.schema)
2251
+ if (this.opts.debugMode) {
2252
+ console.log(`🔍 Schema initialized from options: ${this.opts.schema.join(', ')} [${this.instanceId}]`)
2253
+ }
2254
+ return
2255
+ }
2256
+
2257
+ // Try to initialize from fields configuration (new format)
2258
+ if (this.opts.fields && typeof this.opts.fields === 'object') {
2259
+ const fieldNames = Object.keys(this.opts.fields)
2260
+ if (fieldNames.length > 0) {
2261
+ this.serializer.initializeSchema(fieldNames)
2262
+ if (this.opts.debugMode) {
2263
+ console.log(`🔍 Schema initialized from fields: ${fieldNames.join(', ')} [${this.instanceId}]`)
2264
+ }
2265
+ return
2266
+ }
2267
+ }
2268
+
2269
+ // Try to auto-detect schema from existing data
2270
+ if (this.data && this.data.length > 0) {
2271
+ this.serializer.initializeSchema(this.data, true) // autoDetect = true
2272
+ if (this.opts.debugMode) {
2273
+ console.log(`🔍 Schema auto-detected from data: ${this.serializer.getSchema().join(', ')} [${this.instanceId}]`)
2274
+ }
2275
+ return
2276
+ }
2277
+
2278
+ // CRITICAL FIX: Don't initialize schema from indexes
2279
+ // This was causing data loss because only indexed fields were preserved
2280
+ // Let schema be auto-detected from actual data instead
2281
+
2282
+ if (this.opts.debugMode) {
2283
+ console.log(`🔍 No schema initialization possible - will auto-detect on first insert [${this.instanceId}]`)
2284
+ }
2285
+ }
2286
+
2287
+ /**
2288
+ * Get database length (number of records)
2289
+ */
2290
+ get length() {
2291
+ // Return total records: writeBuffer + saved records
2292
+ // writeBuffer contains unsaved records
2293
+ // For saved records, use the length of offsets array (number of saved records)
2294
+ const savedRecords = this.offsets.length
2295
+ const writeBufferRecords = this.writeBuffer.length
2296
+
2297
+ // CRITICAL FIX: Validate that offsets array is consistent with actual data
2298
+ // This prevents the bug where database reassignment causes desynchronization
2299
+ if (this.initialized && savedRecords > 0) {
2300
+ try {
2301
+ // Check if the offsets array is consistent with the actual file
2302
+ // If offsets exist but file is empty or corrupted, reset offsets
2303
+ if (this.fileHandler && this.fileHandler.file) {
2304
+ try {
2305
+ // Use synchronous file stats to check if file is empty
2306
+ const stats = fs.statSync(this.fileHandler.file)
2307
+ if (stats && stats.size === 0 && savedRecords > 0) {
2308
+ // File is empty but offsets array has records - this is the bug condition
2309
+ if (this.opts.debugMode) {
2310
+ console.log(`🔧 LENGTH FIX: Detected desynchronized offsets (${savedRecords} records) with empty file, resetting offsets`)
2311
+ }
2312
+ this.offsets = []
2313
+ return writeBufferRecords // Return only writeBuffer records
2314
+ }
2315
+ } catch (fileError) {
2316
+ // File doesn't exist or can't be read - reset offsets
2317
+ if (savedRecords > 0) {
2318
+ if (this.opts.debugMode) {
2319
+ console.log(`🔧 LENGTH FIX: File doesn't exist but offsets array has ${savedRecords} records, resetting offsets`)
2320
+ }
2321
+ this.offsets = []
2322
+ return writeBufferRecords
2323
+ }
2324
+ }
2325
+ }
2326
+ } catch (error) {
2327
+ // If we can't validate, fall back to the original behavior
2328
+ if (this.opts.debugMode) {
2329
+ console.log(`🔧 LENGTH FIX: Could not validate offsets, using original calculation: ${error.message}`)
2330
+ }
2331
+ }
2332
+ }
2333
+
2334
+ return writeBufferRecords + savedRecords
2335
+ }
2336
+
2337
+
2338
+ /**
2339
+ * Calculate current writeBuffer size in bytes (similar to published v1.1.0)
2340
+ */
2341
+ currentWriteBufferSize() {
2342
+ if (!this.writeBuffer || this.writeBuffer.length === 0) {
2343
+ return 0
2344
+ }
2345
+
2346
+ // Calculate total size of all records in writeBuffer
2347
+ let totalSize = 0
2348
+ for (const record of this.writeBuffer) {
2349
+ if (record) {
2350
+ // SPACE OPTIMIZATION: Remove term IDs before size calculation
2351
+ const cleanRecord = this.removeTermIdsForSerialization(record)
2352
+ const recordJson = JSON.stringify(cleanRecord) + '\n'
2353
+ totalSize += Buffer.byteLength(recordJson, 'utf8')
2354
+ }
2355
+ }
2356
+
2357
+ return totalSize
2358
+ }
2359
+
2360
+ /**
2361
+ * Get database statistics
2362
+ */
2363
+ getStats() {
2364
+ const stats = {
2365
+ records: this.writeBuffer.length,
2366
+ writeBufferSize: this.currentWriteBufferSize(),
2367
+ maxMemoryUsage: this.opts.maxMemoryUsage,
2368
+ performance: this.performanceStats,
2369
+ lastSave: this.lastSaveTime,
2370
+ shouldSave: this.shouldSave,
2371
+ initialized: this.initialized
2372
+ }
2373
+
2374
+ // Add term mapping stats if enabled
2375
+ if (this.opts.termMapping && this.termManager) {
2376
+ stats.termMapping = this.termManager.getStats()
2377
+ }
2378
+
2379
+ return stats
2380
+ }
2381
+
2382
+ /**
2383
+ * Initialize database (alias for initialize for backward compatibility)
2384
+ */
2385
+ async init() {
2386
+ return this.initialize()
2387
+ }
2388
+
2389
+ /**
2390
+ * Destroy database - DESTRUCTIVE MODE
2391
+ * Assumes save() has already been called by user
2392
+ * If anything is still active, it indicates a bug - log error and force cleanup
2393
+ */
2394
+ async destroy() {
2395
+ if (this.destroyed) return
2396
+
2397
+ // Mark as destroying immediately to prevent new operations
2398
+ this.destroying = true
2399
+
2400
+ // Wait for all active insert sessions to complete before destroying
2401
+ if (this.activeInsertSessions.size > 0) {
2402
+ if (this.opts.debugMode) {
2403
+ console.log(`⏳ destroy: Waiting for ${this.activeInsertSessions.size} active insert sessions`)
2404
+ }
2405
+
2406
+ const sessionPromises = Array.from(this.activeInsertSessions).map(session =>
2407
+ session.waitForOperations(null) // Wait indefinitely for sessions to complete
2408
+ )
2409
+
2410
+ try {
2411
+ await Promise.all(sessionPromises)
2412
+ } catch (error) {
2413
+ if (this.opts.debugMode) {
2414
+ console.log(`⚠️ destroy: Error waiting for sessions: ${error.message}`)
2415
+ }
2416
+ // Continue with destruction even if sessions have issues
2417
+ }
2418
+
2419
+ // Destroy all active sessions
2420
+ for (const session of this.activeInsertSessions) {
2421
+ session.destroy()
2422
+ }
2423
+ this.activeInsertSessions.clear()
2424
+ }
2425
+
2426
+ // CRITICAL FIX: Add timeout protection to prevent destroy() from hanging
2427
+ const destroyPromise = this._performDestroy()
2428
+ let timeoutHandle = null
2429
+ const timeoutPromise = new Promise((_, reject) => {
2430
+ timeoutHandle = setTimeout(() => {
2431
+ reject(new Error('Destroy operation timed out after 5 seconds'))
2432
+ }, 5000)
2433
+ })
2434
+
2435
+ try {
2436
+ await Promise.race([destroyPromise, timeoutPromise])
2437
+ } catch (error) {
2438
+ if (error.message === 'Destroy operation timed out after 5 seconds') {
2439
+ console.error('🚨 DESTROY TIMEOUT: Force destroying database after timeout')
2440
+ // Force mark as destroyed even if cleanup failed
2441
+ this.destroyed = true
2442
+ this.destroying = false
2443
+ return
2444
+ }
2445
+ throw error
2446
+ } finally {
2447
+ // Clear the timeout to prevent Jest open handle warning
2448
+ if (timeoutHandle) {
2449
+ clearTimeout(timeoutHandle)
2450
+ }
2451
+ }
2452
+ }
2453
+
2454
+ /**
2455
+ * Internal destroy implementation
2456
+ */
2457
+ async _performDestroy() {
2458
+ try {
2459
+ // CRITICAL: Check for bugs - anything active indicates save() didn't work properly
2460
+ const bugs = []
2461
+
2462
+ // Check for pending data that should have been saved
2463
+ if (this.writeBuffer.length > 0) {
2464
+ const bug = `BUG: writeBuffer has ${this.writeBuffer.length} records - save() should have cleared this`
2465
+ bugs.push(bug)
2466
+ console.error(`🚨 ${bug}`)
2467
+ }
2468
+
2469
+ // Check for pending operations that should have completed
2470
+ if (this.pendingOperations.size > 0) {
2471
+ const bug = `BUG: ${this.pendingOperations.size} pending operations - save() should have completed these`
2472
+ bugs.push(bug)
2473
+ console.error(`🚨 ${bug}`)
2474
+ }
2475
+
2476
+ // Auto-save manager removed - no cleanup needed
2477
+
2478
+ // Check for active save operation
2479
+ if (this.isSaving) {
2480
+ const bug = `BUG: save operation still active - previous save() should have completed`
2481
+ bugs.push(bug)
2482
+ console.error(`🚨 ${bug}`)
2483
+ }
2484
+
2485
+ // If bugs detected, throw error with details
2486
+ if (bugs.length > 0) {
2487
+ const errorMessage = `Database destroy() found ${bugs.length} bug(s) - save() did not complete properly:\n${bugs.join('\n')}`
2488
+ console.error(`🚨 DESTROY ERROR: ${errorMessage}`)
2489
+ throw new Error(errorMessage)
2490
+ }
2491
+
2492
+ // FORCE DESTRUCTIVE CLEANUP - no waiting, no graceful shutdown
2493
+ if (this.opts.debugMode) {
2494
+ console.log('💥 DESTRUCTIVE DESTROY: Force cleaning up all resources')
2495
+ }
2496
+
2497
+ // Cancel all operations immediately
2498
+ this.abortController.abort()
2499
+
2500
+ // Auto-save removed - no cleanup needed
2501
+
2502
+ // Clear all buffers and data structures
2503
+ this.writeBuffer = []
2504
+ this.writeBufferOffsets = []
2505
+ this.writeBufferSizes = []
2506
+ this.deletedIds.clear()
2507
+ this.pendingOperations.clear()
2508
+ this.pendingIndexUpdates = []
2509
+
2510
+ // Force close file handlers
2511
+ if (this.fileHandler) {
2512
+ try {
2513
+ // Force close any open file descriptors
2514
+ await this.fileHandler.close?.()
2515
+ } catch (error) {
2516
+ // Ignore file close errors during destructive cleanup
2517
+ }
2518
+ }
2519
+
2520
+ // Clear all managers
2521
+ if (this.indexManager) {
2522
+ this.indexManager.clear?.()
2523
+ }
2524
+
2525
+ if (this.termManager) {
2526
+ this.termManager.clear?.()
2527
+ }
2528
+
2529
+ if (this.queryManager) {
2530
+ this.queryManager.clear?.()
2531
+ }
2532
+
2533
+ // Clear operation queue
2534
+ if (this.operationQueue) {
2535
+ this.operationQueue.clear?.()
2536
+ this.operationQueue = null
2537
+ }
2538
+
2539
+ // Mark as destroyed
2540
+ this.destroyed = true
2541
+ this.destroying = false
2542
+
2543
+ if (this.opts.debugMode) {
2544
+ console.log('💥 DESTRUCTIVE DESTROY: Database completely destroyed')
2545
+ }
2546
+
2547
+ } catch (error) {
2548
+ // Even if cleanup fails, mark as destroyed
2549
+ this.destroyed = true
2550
+ this.destroying = false
2551
+
2552
+ // Re-throw the error so user knows about the bug
2553
+ throw error
2554
+ }
2555
+ }
2556
+
2557
+ /**
2558
+ * Find one record
2559
+ */
2560
+ async findOne(criteria, options = {}) {
2561
+ this._validateInitialization('findOne')
2562
+
2563
+ const results = await this.find(criteria, { ...options, limit: 1 })
2564
+ return results.length > 0 ? results[0] : null
2565
+ }
2566
+
2567
+ /**
2568
+ * Count records
2569
+ */
2570
+ async count(criteria = {}, options = {}) {
2571
+ this._validateInitialization('count')
2572
+
2573
+ const results = await this.find(criteria, options)
2574
+ return results.length
2575
+ }
2576
+
2577
+ /**
2578
+ * Score records based on weighted terms in an indexed array:string field
2579
+ * @param {string} fieldName - Name of indexed array:string field
2580
+ * @param {object} scores - Map of terms to numeric weights
2581
+ * @param {object} options - Query options
2582
+ * @returns {Promise<Array>} Records with scores, sorted by score
2583
+ */
2584
+ async score(fieldName, scores, options = {}) {
2585
+ // Validate initialization
2586
+ this._validateInitialization('score')
2587
+
2588
+ // Set default options
2589
+ const opts = {
2590
+ limit: options.limit ?? 100,
2591
+ sort: options.sort ?? 'desc',
2592
+ includeScore: options.includeScore !== false
2593
+ }
2594
+
2595
+ // Validate fieldName
2596
+ if (typeof fieldName !== 'string' || !fieldName) {
2597
+ throw new Error('fieldName must be a non-empty string')
2598
+ }
2599
+
2600
+ // Validate scores object
2601
+ if (!scores || typeof scores !== 'object' || Array.isArray(scores)) {
2602
+ throw new Error('scores must be an object')
2603
+ }
2604
+
2605
+ // Handle empty scores - return empty array as specified
2606
+ if (Object.keys(scores).length === 0) {
2607
+ return []
2608
+ }
2609
+
2610
+ // Validate scores values are numeric
2611
+ for (const [term, weight] of Object.entries(scores)) {
2612
+ if (typeof weight !== 'number' || isNaN(weight)) {
2613
+ throw new Error(`Score value for term "${term}" must be a number`)
2614
+ }
2615
+ }
2616
+
2617
+ // Check if field is indexed and is array:string type
2618
+ if (!this.opts.indexes || !this.opts.indexes[fieldName]) {
2619
+ throw new Error(`Field "${fieldName}" is not indexed`)
2620
+ }
2621
+
2622
+ const fieldType = this.opts.indexes[fieldName]
2623
+ if (fieldType !== 'array:string') {
2624
+ throw new Error(`Field "${fieldName}" must be of type "array:string" (found: ${fieldType})`)
2625
+ }
2626
+
2627
+ // Check if this is a term-mapped field
2628
+ const isTermMapped = this.termManager &&
2629
+ this.termManager.termMappingFields &&
2630
+ this.termManager.termMappingFields.includes(fieldName)
2631
+
2632
+ // Access the index for this field
2633
+ const fieldIndex = this.indexManager.index.data[fieldName]
2634
+ if (!fieldIndex) {
2635
+ return []
2636
+ }
2637
+
2638
+ // Accumulate scores for each line number
2639
+ const scoreMap = new Map()
2640
+
2641
+ // Iterate through each term in the scores object
2642
+ for (const [term, weight] of Object.entries(scores)) {
2643
+ // Get term ID if this is a term-mapped field
2644
+ let termKey
2645
+ if (isTermMapped) {
2646
+ // For term-mapped fields, convert term to term ID
2647
+ const termId = this.termManager.getTermIdWithoutIncrement(term)
2648
+ if (termId === null || termId === undefined) {
2649
+ // Term doesn't exist, skip it
2650
+ continue
2651
+ }
2652
+ termKey = String(termId)
2653
+ } else {
2654
+ termKey = String(term)
2655
+ }
2656
+
2657
+ // Look up line numbers for this term
2658
+ const termData = fieldIndex[termKey]
2659
+ if (!termData) {
2660
+ // Term doesn't exist in index, skip
2661
+ continue
2662
+ }
2663
+
2664
+ // Get all line numbers for this term
2665
+ const lineNumbers = this.indexManager._getAllLineNumbers(termData)
2666
+
2667
+ // Add weight to score for each line number
2668
+ for (const lineNumber of lineNumbers) {
2669
+ const currentScore = scoreMap.get(lineNumber) || 0
2670
+ scoreMap.set(lineNumber, currentScore + weight)
2671
+ }
2672
+ }
2673
+
2674
+ // Filter out zero scores and sort by score
2675
+ const scoredEntries = Array.from(scoreMap.entries())
2676
+ .filter(([, score]) => score > 0)
2677
+
2678
+ // Sort by score
2679
+ scoredEntries.sort((a, b) => {
2680
+ return opts.sort === 'asc' ? a[1] - b[1] : b[1] - a[1]
2681
+ })
2682
+
2683
+ // Apply limit
2684
+ const limitedEntries = opts.limit > 0
2685
+ ? scoredEntries.slice(0, opts.limit)
2686
+ : scoredEntries
2687
+
2688
+ if (limitedEntries.length === 0) {
2689
+ return []
2690
+ }
2691
+
2692
+ // Fetch actual records
2693
+ const lineNumbers = limitedEntries.map(([lineNumber]) => lineNumber)
2694
+ const scoresByLineNumber = new Map(limitedEntries)
2695
+
2696
+ // Use getRanges and fileHandler to read records
2697
+ const ranges = this.getRanges(lineNumbers)
2698
+ const groupedRanges = await this.fileHandler.groupedRanges(ranges)
2699
+
2700
+ const fs = await import('fs')
2701
+ const fd = await fs.promises.open(this.fileHandler.file, 'r')
2702
+
2703
+ const results = []
2704
+
2705
+ try {
2706
+ for (const groupedRange of groupedRanges) {
2707
+ for await (const row of this.fileHandler.readGroupedRange(groupedRange, fd)) {
2708
+ try {
2709
+ const record = this.serializer.deserialize(row.line)
2710
+
2711
+ // Get line number from the row
2712
+ const lineNumber = row._ || 0
2713
+
2714
+ // Restore term IDs to terms
2715
+ const recordWithTerms = this.restoreTermIdsAfterDeserialization(record)
2716
+
2717
+ // Add line number
2718
+ recordWithTerms._ = lineNumber
2719
+
2720
+ // Add score if includeScore is true
2721
+ if (opts.includeScore) {
2722
+ recordWithTerms.score = scoresByLineNumber.get(lineNumber) || 0
2723
+ }
2724
+
2725
+ results.push(recordWithTerms)
2726
+ } catch (error) {
2727
+ // Skip invalid lines
2728
+ if (this.opts.debugMode) {
2729
+ console.error('Error deserializing record in score():', error)
2730
+ }
2731
+ }
2732
+ }
2733
+ }
2734
+ } finally {
2735
+ await fd.close()
2736
+ }
2737
+
2738
+ // Re-sort results to maintain score order (since reads might be out of order)
2739
+ results.sort((a, b) => {
2740
+ const scoreA = scoresByLineNumber.get(a._) || 0
2741
+ const scoreB = scoresByLineNumber.get(b._) || 0
2742
+ return opts.sort === 'asc' ? scoreA - scoreB : scoreB - scoreA
2743
+ })
2744
+
2745
+ return results
2746
+ }
2747
+
2748
+ /**
2749
+ * Wait for all pending operations to complete
2750
+ */
2751
+ async _waitForPendingOperations() {
2752
+ if (this.operationQueue && this.operationQueue.getQueueLength() > 0) {
2753
+ if (this.opts.debugMode) {
2754
+ console.log('💾 Save: Waiting for pending operations to complete')
2755
+ }
2756
+ // CRITICAL FIX: Wait without timeout to ensure all operations complete
2757
+ // This prevents race conditions and data loss
2758
+ await this.operationQueue.waitForCompletion(null)
2759
+
2760
+ // Verify queue is actually empty
2761
+ if (this.operationQueue.getQueueLength() > 0) {
2762
+ throw new Error('Operation queue not empty after wait')
2763
+ }
2764
+ }
2765
+ }
2766
+
2767
+ /**
2768
+ * Flush write buffer completely with smart detection of ongoing insertions
2769
+ */
2770
+ async _flushWriteBufferCompletely() {
2771
+ // Force complete flush of write buffer with intelligent detection
2772
+ let attempts = 0
2773
+ const maxStuckAttempts = 5 // Maximum attempts with identical data (only protection against infinite loops)
2774
+ let stuckAttempts = 0
2775
+ let lastBufferSample = null
2776
+
2777
+ // CRITICAL FIX: Remove maxAttempts limit - only stop when buffer is empty or truly stuck
2778
+ while (this.writeBuffer.length > 0) {
2779
+ const currentLength = this.writeBuffer.length
2780
+ const currentSample = this._getBufferSample() // Get lightweight sample
2781
+
2782
+ // Process write buffer items
2783
+ await this._processWriteBuffer()
2784
+
2785
+ // Check if buffer is actually stuck (same data) vs new data being added
2786
+ if (this.writeBuffer.length === currentLength) {
2787
+ // Check if the data is identical (stuck) or new data was added
2788
+ if (this._isBufferSampleIdentical(currentSample, lastBufferSample)) {
2789
+ stuckAttempts++
2790
+ if (this.opts.debugMode) {
2791
+ console.log(`💾 Flush: Buffer appears stuck (identical data), attempt ${stuckAttempts}/${maxStuckAttempts}`)
2792
+ }
2793
+
2794
+ if (stuckAttempts >= maxStuckAttempts) {
2795
+ throw new Error(`Write buffer flush stuck - identical data detected after ${maxStuckAttempts} attempts`)
2796
+ }
2797
+ } else {
2798
+ // New data was added, reset stuck counter
2799
+ stuckAttempts = 0
2800
+ if (this.opts.debugMode) {
2801
+ console.log(`💾 Flush: New data detected, continuing flush (${this.writeBuffer.length} items remaining)`)
2802
+ }
2803
+ }
2804
+ lastBufferSample = currentSample
2805
+ } else {
2806
+ // Progress was made, reset stuck counter
2807
+ stuckAttempts = 0
2808
+ lastBufferSample = null
2809
+ if (this.opts.debugMode) {
2810
+ console.log(`💾 Flush: Progress made, ${currentLength - this.writeBuffer.length} items processed, ${this.writeBuffer.length} remaining`)
2811
+ }
2812
+ }
2813
+
2814
+ attempts++
2815
+
2816
+ // Small delay to allow ongoing operations to complete
2817
+ if (this.writeBuffer.length > 0) {
2818
+ await new Promise(resolve => setTimeout(resolve, 10))
2819
+ }
2820
+ }
2821
+
2822
+ // CRITICAL FIX: Remove the artificial limit check - buffer should be empty by now
2823
+ // If we reach here, the buffer is guaranteed to be empty due to the while condition
2824
+
2825
+ if (this.opts.debugMode) {
2826
+ console.log(`💾 Flush completed successfully after ${attempts} attempts`)
2827
+ }
2828
+ }
2829
+
2830
+ /**
2831
+ * Get a lightweight sample of the write buffer for comparison
2832
+ * @returns {Object} - Sample data for comparison
2833
+ */
2834
+ _getBufferSample() {
2835
+ if (!this.writeBuffer || this.writeBuffer.length === 0) {
2836
+ return null
2837
+ }
2838
+
2839
+ // Create a lightweight sample using first few items and their IDs
2840
+ const sampleSize = Math.min(5, this.writeBuffer.length)
2841
+ const sample = {
2842
+ length: this.writeBuffer.length,
2843
+ firstIds: [],
2844
+ lastIds: [],
2845
+ checksum: 0
2846
+ }
2847
+
2848
+ // Sample first few items
2849
+ for (let i = 0; i < sampleSize; i++) {
2850
+ const item = this.writeBuffer[i]
2851
+ if (item && item.id) {
2852
+ sample.firstIds.push(item.id)
2853
+ // Simple checksum using ID hash
2854
+ sample.checksum += item.id.toString().split('').reduce((a, b) => a + b.charCodeAt(0), 0)
2855
+ }
2856
+ }
2857
+
2858
+ // Sample last few items if buffer is large
2859
+ if (this.writeBuffer.length > sampleSize) {
2860
+ for (let i = Math.max(0, this.writeBuffer.length - sampleSize); i < this.writeBuffer.length; i++) {
2861
+ const item = this.writeBuffer[i]
2862
+ if (item && item.id) {
2863
+ sample.lastIds.push(item.id)
2864
+ sample.checksum += item.id.toString().split('').reduce((a, b) => a + b.charCodeAt(0), 0)
2865
+ }
2866
+ }
2867
+ }
2868
+
2869
+ return sample
2870
+ }
2871
+
2872
+ /**
2873
+ * Check if two buffer samples are identical (indicating stuck flush)
2874
+ * @param {Object} sample1 - First sample
2875
+ * @param {Object} sample2 - Second sample
2876
+ * @returns {boolean} - True if samples are identical
2877
+ */
2878
+ _isBufferSampleIdentical(sample1, sample2) {
2879
+ if (!sample1 || !sample2) {
2880
+ return false
2881
+ }
2882
+
2883
+ // Quick checks: different lengths or checksums mean different data
2884
+ if (sample1.length !== sample2.length || sample1.checksum !== sample2.checksum) {
2885
+ return false
2886
+ }
2887
+
2888
+ // Compare first IDs
2889
+ if (sample1.firstIds.length !== sample2.firstIds.length) {
2890
+ return false
2891
+ }
2892
+
2893
+ for (let i = 0; i < sample1.firstIds.length; i++) {
2894
+ if (sample1.firstIds[i] !== sample2.firstIds[i]) {
2895
+ return false
2896
+ }
2897
+ }
2898
+
2899
+ // Compare last IDs
2900
+ if (sample1.lastIds.length !== sample2.lastIds.length) {
2901
+ return false
2902
+ }
2903
+
2904
+ for (let i = 0; i < sample1.lastIds.length; i++) {
2905
+ if (sample1.lastIds[i] !== sample2.lastIds[i]) {
2906
+ return false
2907
+ }
2908
+ }
2909
+
2910
+ return true
2911
+ }
2912
+
2913
+ /**
2914
+ * Process write buffer items
2915
+ */
2916
+ async _processWriteBuffer() {
2917
+ // Process write buffer items without loading entire file
2918
+ // OPTIMIZATION: Use Set directly for both processing and lookup - single variable, better performance
2919
+ const itemsToProcess = new Set(this.writeBuffer)
2920
+
2921
+ // CRITICAL FIX: Don't clear writeBuffer immediately - wait for processing to complete
2922
+ // This prevents race conditions where new operations arrive while old ones are still processing
2923
+
2924
+ // OPTIMIZATION: Separate buffer items from object items for batch processing
2925
+ const bufferItems = []
2926
+ const objectItems = []
2927
+
2928
+ for (const item of itemsToProcess) {
2929
+ if (Buffer.isBuffer(item)) {
2930
+ bufferItems.push(item)
2931
+ } else if (typeof item === 'object' && item !== null) {
2932
+ objectItems.push(item)
2933
+ }
2934
+ }
2935
+
2936
+ // Process buffer items individually (they're already optimized)
2937
+ for (const buffer of bufferItems) {
2938
+ await this._processBufferItem(buffer)
2939
+ }
2940
+
2941
+ // OPTIMIZATION: Process all object items in a single write operation
2942
+ if (objectItems.length > 0) {
2943
+ await this._processObjectItemsBatch(objectItems)
2944
+ }
2945
+
2946
+ // CRITICAL FIX: Only remove processed items from writeBuffer after all async operations complete
2947
+ // OPTIMIZATION: Use Set.has() for O(1) lookup - same Set used for processing
2948
+ const beforeLength = this.writeBuffer.length
2949
+ this.writeBuffer = this.writeBuffer.filter(item => !itemsToProcess.has(item))
2950
+ const afterLength = this.writeBuffer.length
2951
+
2952
+ if (this.opts.debugMode && beforeLength !== afterLength) {
2953
+ console.log(`💾 _processWriteBuffer: Removed ${beforeLength - afterLength} items from writeBuffer (${beforeLength} -> ${afterLength})`)
2954
+ }
2955
+ }
2956
+
2957
+ /**
2958
+ * Process individual buffer item
2959
+ */
2960
+ async _processBufferItem(buffer) {
2961
+ // Process buffer item without loading entire file
2962
+ // This ensures we don't load the entire data file into memory
2963
+ if (this.fileHandler) {
2964
+ // Use writeDataAsync for non-blocking I/O
2965
+ await this.fileHandler.writeDataAsync(buffer)
2966
+ }
2967
+ }
2968
+
2969
+ /**
2970
+ * Process individual object item
2971
+ */
2972
+ async _processObjectItem(obj) {
2973
+ // Process object item without loading entire file
2974
+ if (this.fileHandler) {
2975
+ // SPACE OPTIMIZATION: Remove term IDs before serialization
2976
+ const cleanRecord = this.removeTermIdsForSerialization(obj)
2977
+ const jsonString = this.serializer.serialize(cleanRecord).toString('utf8')
2978
+ // Use writeDataAsync for non-blocking I/O
2979
+ await this.fileHandler.writeDataAsync(Buffer.from(jsonString, 'utf8'))
2980
+ }
2981
+ }
2982
+
2983
+ /**
2984
+ * Process multiple object items in a single batch write operation
2985
+ */
2986
+ async _processObjectItemsBatch(objects) {
2987
+ if (!this.fileHandler || objects.length === 0) return
2988
+
2989
+ // OPTIMIZATION: Combine all objects into a single buffer for one write operation
2990
+ // SPACE OPTIMIZATION: Remove term IDs before serialization
2991
+ const jsonStrings = objects.map(obj => this.serializer.serialize(this.removeTermIdsForSerialization(obj)).toString('utf8'))
2992
+ const combinedString = jsonStrings.join('')
2993
+
2994
+ // CRITICAL FIX: Validate that the combined string ends with newline
2995
+ const validatedString = combinedString.endsWith('\n') ? combinedString : combinedString + '\n'
2996
+ const buffer = Buffer.from(validatedString, 'utf8')
2997
+
2998
+ // Single write operation for all objects
2999
+ await this.fileHandler.writeDataAsync(buffer)
3000
+ }
3001
+
3002
+ /**
3003
+ * Wait for all I/O operations to complete
3004
+ */
3005
+ async _waitForIOCompletion() {
3006
+ // Wait for all file operations to complete
3007
+ if (this.fileHandler && this.fileHandler.fileMutex) {
3008
+ await this.fileHandler.fileMutex.runExclusive(async () => {
3009
+ // Ensure all pending file operations complete
3010
+ await new Promise(resolve => setTimeout(resolve, 50))
3011
+ })
3012
+ }
3013
+ }
3014
+
3015
+ /**
3016
+ * CRITICAL FIX: Safe fallback method to load existing records when _streamExistingRecords fails
3017
+ * This prevents data loss by attempting alternative methods to preserve existing data
3018
+ */
3019
+ async _loadExistingRecordsFallback(deletedIdsSnapshot, writeBufferSnapshot) {
3020
+ const existingRecords = []
3021
+
3022
+ try {
3023
+ if (this.opts.debugMode) {
3024
+ console.log(`💾 Save: Attempting fallback method to load existing records`)
3025
+ }
3026
+
3027
+ // Method 1: Try to read the entire file and filter
3028
+ if (this.fileHandler.exists()) {
3029
+ const fs = await import('fs')
3030
+ const fileContent = await fs.promises.readFile(this.normalizedFile, 'utf8')
3031
+ const lines = fileContent.split('\n').filter(line => line.trim())
3032
+
3033
+ for (let i = 0; i < lines.length && i < this.offsets.length; i++) {
3034
+ try {
3035
+ const record = this.serializer.deserialize(lines[i])
3036
+ if (record && !deletedIdsSnapshot.has(record.id)) {
3037
+ // Check if this record is not being updated in writeBuffer
3038
+ const updatedRecord = writeBufferSnapshot.find(r => r.id === record.id)
3039
+ if (!updatedRecord) {
3040
+ existingRecords.push(record)
3041
+ }
3042
+ }
3043
+ } catch (error) {
3044
+ // Skip invalid lines
3045
+ if (this.opts.debugMode) {
3046
+ console.log(`💾 Save: Skipping invalid line ${i} in fallback:`, error.message)
3047
+ }
3048
+ }
3049
+ }
3050
+ }
3051
+
3052
+ if (this.opts.debugMode) {
3053
+ console.log(`💾 Save: Fallback method loaded ${existingRecords.length} existing records`)
3054
+ }
3055
+
3056
+ return existingRecords
3057
+
3058
+ } catch (error) {
3059
+ if (this.opts.debugMode) {
3060
+ console.log(`💾 Save: Fallback method failed:`, error.message)
3061
+ }
3062
+ // Return empty array as last resort - better than losing all data
3063
+ return []
3064
+ }
3065
+ }
3066
+
3067
+ /**
3068
+ * Stream existing records without loading entire file into memory
3069
+ * Optimized with group ranging and reduced JSON parsing
3070
+ */
3071
+ async _streamExistingRecords(deletedIdsSnapshot, writeBufferSnapshot) {
3072
+ const existingRecords = []
3073
+
3074
+ if (this.offsets.length === 0) {
3075
+ return existingRecords
3076
+ }
3077
+
3078
+ // OPTIMIZATION: Pre-allocate array with known size (but don't set length to avoid undefined slots)
3079
+ // existingRecords.length = this.offsets.length
3080
+
3081
+ // Create a map of updated records for quick lookup
3082
+ const updatedRecordsMap = new Map()
3083
+ writeBufferSnapshot.forEach(record => {
3084
+ updatedRecordsMap.set(record.id, record)
3085
+ })
3086
+
3087
+ // OPTIMIZATION: Cache file stats to avoid repeated stat() calls
3088
+ let fileSize = 0
3089
+ if (this._cachedFileStats && this._cachedFileStats.timestamp > Date.now() - 1000) {
3090
+ // Use cached stats if less than 1 second old
3091
+ fileSize = this._cachedFileStats.size
3092
+ } else {
3093
+ // Get fresh stats and cache them
3094
+ const fileStats = await this.fileHandler.exists() ? await fs.promises.stat(this.normalizedFile) : null
3095
+ fileSize = fileStats ? fileStats.size : 0
3096
+ this._cachedFileStats = {
3097
+ size: fileSize,
3098
+ timestamp: Date.now()
3099
+ }
3100
+ }
3101
+
3102
+ // CRITICAL FIX: Ensure indexOffset is consistent with actual file size
3103
+ if (this.indexOffset > fileSize) {
3104
+ if (this.opts.debugMode) {
3105
+ console.log(`💾 Save: Correcting indexOffset from ${this.indexOffset} to ${fileSize} (file size)`)
3106
+ }
3107
+ this.indexOffset = fileSize
3108
+ }
3109
+
3110
+ // Build ranges array for group reading
3111
+ const ranges = []
3112
+ for (let i = 0; i < this.offsets.length; i++) {
3113
+ const offset = this.offsets[i]
3114
+ let nextOffset = i + 1 < this.offsets.length ? this.offsets[i + 1] : this.indexOffset
3115
+
3116
+ if (this.opts.debugMode) {
3117
+ console.log(`💾 Save: Building range for record ${i}: offset=${offset}, nextOffset=${nextOffset}`)
3118
+ }
3119
+
3120
+ // CRITICAL FIX: Handle case where indexOffset is 0 (new database without index)
3121
+ if (nextOffset === 0 && i + 1 >= this.offsets.length) {
3122
+ // For the last record when there's no index yet, we need to find the actual end
3123
+ // Read a bit more data to find the newline character that ends the record
3124
+ const searchEnd = Math.min(offset + 1000, fileSize) // Search up to 1000 bytes ahead
3125
+ if (searchEnd > offset) {
3126
+ try {
3127
+ const searchBuffer = await this.fileHandler.readRange(offset, searchEnd)
3128
+ const searchText = searchBuffer.toString('utf8')
3129
+
3130
+ // Look for the end of the JSON record (closing brace followed by newline or end of data)
3131
+ let recordEnd = -1
3132
+ let braceCount = 0
3133
+ let inString = false
3134
+ let escapeNext = false
3135
+
3136
+ for (let j = 0; j < searchText.length; j++) {
3137
+ const char = searchText[j]
3138
+
3139
+ if (escapeNext) {
3140
+ escapeNext = false
3141
+ continue
3142
+ }
3143
+
3144
+ if (char === '\\') {
3145
+ escapeNext = true
3146
+ continue
3147
+ }
3148
+
3149
+ if (char === '"' && !escapeNext) {
3150
+ inString = !inString
3151
+ continue
3152
+ }
3153
+
3154
+ if (!inString) {
3155
+ if (char === '{') {
3156
+ braceCount++
3157
+ } else if (char === '}') {
3158
+ braceCount--
3159
+ if (braceCount === 0) {
3160
+ // Found the end of the JSON object
3161
+ recordEnd = j + 1
3162
+ break
3163
+ }
3164
+ }
3165
+ }
3166
+ }
3167
+
3168
+ if (recordEnd !== -1) {
3169
+ nextOffset = offset + recordEnd
3170
+ } else {
3171
+ // If we can't find the end, read to end of file
3172
+ nextOffset = fileSize
3173
+ }
3174
+ } catch (error) {
3175
+ // Fallback to end of file if search fails
3176
+ nextOffset = fileSize
3177
+ }
3178
+ } else {
3179
+ nextOffset = fileSize
3180
+ }
3181
+ }
3182
+
3183
+ // Validate offset ranges
3184
+ if (offset < 0) {
3185
+ if (this.opts.debugMode) {
3186
+ console.log(`💾 Save: Skipped negative offset ${offset}`)
3187
+ }
3188
+ continue
3189
+ }
3190
+
3191
+ // CRITICAL FIX: Allow offsets that are at or beyond file size (for new records)
3192
+ if (fileSize > 0 && offset > fileSize) {
3193
+ if (this.opts.debugMode) {
3194
+ console.log(`💾 Save: Skipped offset ${offset} beyond file size ${fileSize}`)
3195
+ }
3196
+ continue
3197
+ }
3198
+
3199
+ if (nextOffset <= offset) {
3200
+ if (this.opts.debugMode) {
3201
+ console.log(`💾 Save: Skipped invalid range [${offset}, ${nextOffset}]`)
3202
+ }
3203
+ continue
3204
+ }
3205
+
3206
+ ranges.push({ start: offset, end: nextOffset, index: i })
3207
+ }
3208
+
3209
+ if (ranges.length === 0) {
3210
+ return existingRecords
3211
+ }
3212
+
3213
+ // Use group ranging for efficient reading
3214
+ const recordLines = await this.fileHandler.readRanges(ranges, async (lineString, range) => {
3215
+ if (!lineString || !lineString.trim()) {
3216
+ return null
3217
+ }
3218
+
3219
+ const trimmedLine = lineString.trim()
3220
+
3221
+ // DEBUG: Log what we're reading (temporarily enabled for debugging)
3222
+ if (this.opts.debugMode) {
3223
+ console.log(`💾 Save: Reading range ${range.start}-${range.end}, length: ${trimmedLine.length}`)
3224
+ console.log(`💾 Save: First 100 chars: ${trimmedLine.substring(0, 100)}`)
3225
+ if (trimmedLine.length > 100) {
3226
+ console.log(`💾 Save: Last 100 chars: ${trimmedLine.substring(trimmedLine.length - 100)}`)
3227
+ }
3228
+ }
3229
+
3230
+ // OPTIMIZATION: Try to extract ID without full JSON parsing
3231
+ let recordId = null
3232
+ let needsFullParse = false
3233
+
3234
+ // For array format, try to extract ID from array position
3235
+ if (trimmedLine.startsWith('[') && trimmedLine.endsWith(']')) {
3236
+ // Array format: try to extract ID from the array
3237
+ try {
3238
+ const arrayData = JSON.parse(trimmedLine)
3239
+ if (Array.isArray(arrayData) && arrayData.length > 0) {
3240
+ // For arrays without explicit ID, use the first element as a fallback
3241
+ // or try to find the ID field if it exists
3242
+ if (arrayData.length > 2) {
3243
+ // ID is typically at position 2 in array format [age, city, id, name]
3244
+ recordId = arrayData[2]
3245
+ } else {
3246
+ // For arrays without ID field, use first element as fallback
3247
+ recordId = arrayData[0]
3248
+ }
3249
+ if (recordId !== undefined && recordId !== null) {
3250
+ recordId = String(recordId)
3251
+ // Check if this record needs full parsing (updated or deleted)
3252
+ needsFullParse = updatedRecordsMap.has(recordId) || deletedIdsSnapshot.has(recordId)
3253
+ } else {
3254
+ needsFullParse = true
3255
+ }
3256
+ } else {
3257
+ needsFullParse = true
3258
+ }
3259
+ } catch (e) {
3260
+ needsFullParse = true
3261
+ }
3262
+ } else {
3263
+ // Object format: use regex for backward compatibility
3264
+ const idMatch = trimmedLine.match(/"id"\s*:\s*"([^"]+)"|"id"\s*:\s*(\d+)/)
3265
+ if (idMatch) {
3266
+ recordId = idMatch[1] || idMatch[2]
3267
+ needsFullParse = updatedRecordsMap.has(recordId) || deletedIdsSnapshot.has(recordId)
3268
+ } else {
3269
+ needsFullParse = true
3270
+ }
3271
+ }
3272
+
3273
+ if (!needsFullParse) {
3274
+ // Record is unchanged - we can avoid parsing entirely
3275
+ // Store the raw line and parse only when needed for the final result
3276
+ return {
3277
+ type: 'unchanged',
3278
+ line: trimmedLine,
3279
+ id: recordId,
3280
+ needsParse: false
3281
+ }
3282
+ }
3283
+
3284
+ // Full parsing needed for updated/deleted records
3285
+ try {
3286
+ // Use serializer to properly deserialize array format
3287
+ const record = this.serializer ? this.serializer.deserialize(trimmedLine) : JSON.parse(trimmedLine)
3288
+
3289
+ // Use record directly (no need to restore term IDs)
3290
+ const recordWithIds = record
3291
+
3292
+ if (updatedRecordsMap.has(recordWithIds.id)) {
3293
+ // Replace with updated version
3294
+ const updatedRecord = updatedRecordsMap.get(recordWithIds.id)
3295
+ if (this.opts.debugMode) {
3296
+ console.log(`💾 Save: Updated record ${recordWithIds.id} (${recordWithIds.name || 'Unnamed'})`)
3297
+ }
3298
+ return {
3299
+ type: 'updated',
3300
+ record: updatedRecord,
3301
+ id: recordWithIds.id,
3302
+ needsParse: false
3303
+ }
3304
+ } else if (!deletedIdsSnapshot.has(recordWithIds.id)) {
3305
+ // Keep existing record if not deleted
3306
+ if (this.opts.debugMode) {
3307
+ console.log(`💾 Save: Kept record ${recordWithIds.id} (${recordWithIds.name || 'Unnamed'})`)
3308
+ }
3309
+ return {
3310
+ type: 'kept',
3311
+ record: recordWithIds,
3312
+ id: recordWithIds.id,
3313
+ needsParse: false
3314
+ }
3315
+ } else {
3316
+ // Skip deleted record
3317
+ if (this.opts.debugMode) {
3318
+ console.log(`💾 Save: Skipped record ${recordWithIds.id} (${recordWithIds.name || 'Unnamed'}) - deleted`)
3319
+ }
3320
+ return {
3321
+ type: 'deleted',
3322
+ id: recordWithIds.id,
3323
+ needsParse: false
3324
+ }
3325
+ }
3326
+ } catch (jsonError) {
3327
+ // RACE CONDITION FIX: Skip records that can't be parsed due to incomplete writes
3328
+ if (this.opts.debugMode) {
3329
+ console.log(`💾 Save: Skipped corrupted record at range ${range.start}-${range.end} - ${jsonError.message}`)
3330
+ // console.log(`💾 Save: Problematic line: ${trimmedLine}`)
3331
+ }
3332
+ return null
3333
+ }
3334
+ })
3335
+
3336
+ // Process results and build final records array
3337
+ // OPTIMIZATION: Pre-allocate arrays with known size
3338
+ const unchangedLines = []
3339
+ const parsedRecords = []
3340
+
3341
+ // OPTIMIZATION: Use for loop instead of Object.entries().sort() for better performance
3342
+ const sortedEntries = []
3343
+ for (const key in recordLines) {
3344
+ if (recordLines.hasOwnProperty(key)) {
3345
+ sortedEntries.push([key, recordLines[key]])
3346
+ }
3347
+ }
3348
+
3349
+ // OPTIMIZATION: Sort by offset position using numeric comparison
3350
+ sortedEntries.sort(([keyA], [keyB]) => parseInt(keyA) - parseInt(keyB))
3351
+
3352
+ // CRITICAL FIX: Maintain record order by processing in original offset order
3353
+ // and tracking which records are being kept vs deleted
3354
+ const keptRecords = []
3355
+ const deletedOffsets = new Set()
3356
+
3357
+ for (const [rangeKey, result] of sortedEntries) {
3358
+ if (!result) continue
3359
+
3360
+ const offset = parseInt(rangeKey)
3361
+
3362
+ switch (result.type) {
3363
+ case 'unchanged':
3364
+ // Collect unchanged lines for batch processing
3365
+ unchangedLines.push(result.line)
3366
+ keptRecords.push({ offset, type: 'unchanged', line: result.line })
3367
+ break
3368
+
3369
+ case 'updated':
3370
+ case 'kept':
3371
+ parsedRecords.push(result.record)
3372
+ keptRecords.push({ offset, type: 'parsed', record: result.record })
3373
+ break
3374
+
3375
+ case 'deleted':
3376
+ // Track deleted records by their offset
3377
+ deletedOffsets.add(offset)
3378
+ break
3379
+ }
3380
+ }
3381
+
3382
+ // CRITICAL FIX: Build final records array in the correct order
3383
+ // and update offsets array to match the new record order
3384
+ const newOffsets = []
3385
+ let currentOffset = 0
3386
+
3387
+ // OPTIMIZATION: Batch parse unchanged records for better performance
3388
+ if (unchangedLines.length > 0) {
3389
+ const batchParsedRecords = []
3390
+ for (let i = 0; i < unchangedLines.length; i++) {
3391
+ try {
3392
+ // Use serializer to properly deserialize array format
3393
+ const record = this.serializer ? this.serializer.deserialize(unchangedLines[i]) : JSON.parse(unchangedLines[i])
3394
+ batchParsedRecords.push(record)
3395
+ } catch (jsonError) {
3396
+ if (this.opts.debugMode) {
3397
+ console.log(`💾 Save: Failed to parse unchanged record: ${jsonError.message}`)
3398
+ }
3399
+ batchParsedRecords.push(null) // Mark as failed
3400
+ }
3401
+ }
3402
+
3403
+ // Process kept records in their original offset order
3404
+ let batchIndex = 0
3405
+ for (const keptRecord of keptRecords) {
3406
+ let record = null
3407
+
3408
+ if (keptRecord.type === 'unchanged') {
3409
+ record = batchParsedRecords[batchIndex++]
3410
+ if (!record) continue // Skip failed parses
3411
+ } else if (keptRecord.type === 'parsed') {
3412
+ record = keptRecord.record
3413
+ }
3414
+
3415
+ if (record && typeof record === 'object') {
3416
+ existingRecords.push(record)
3417
+ newOffsets.push(currentOffset)
3418
+ // OPTIMIZATION: Use cached string length if available
3419
+ const recordSize = keptRecord.type === 'unchanged'
3420
+ ? keptRecord.line.length + 1 // Use actual line length
3421
+ : JSON.stringify(this.removeTermIdsForSerialization(record)).length + 1
3422
+ currentOffset += recordSize
3423
+ }
3424
+ }
3425
+ } else {
3426
+ // Process kept records in their original offset order (no unchanged records)
3427
+ for (const keptRecord of keptRecords) {
3428
+ if (keptRecord.type === 'parsed') {
3429
+ const record = keptRecord.record
3430
+ if (record && typeof record === 'object' && record.id) {
3431
+ existingRecords.push(record)
3432
+ newOffsets.push(currentOffset)
3433
+ const recordSize = JSON.stringify(this.removeTermIdsForSerialization(record)).length + 1
3434
+ currentOffset += recordSize
3435
+ }
3436
+ }
3437
+ }
3438
+ }
3439
+
3440
+ // Update the offsets array to reflect the new record order
3441
+ this.offsets = newOffsets
3442
+
3443
+ return existingRecords
3444
+ }
3445
+
3446
+ /**
3447
+ * Flush write buffer
3448
+ */
3449
+ async flush() {
3450
+ return this.operationQueue.enqueue(async () => {
3451
+ this.isInsideOperationQueue = true
3452
+ try {
3453
+ // CRITICAL FIX: Actually flush the writeBuffer by saving data
3454
+ if (this.writeBuffer.length > 0 || this.shouldSave) {
3455
+ await this._doSave()
3456
+ }
3457
+ return Promise.resolve()
3458
+ } finally {
3459
+ this.isInsideOperationQueue = false
3460
+ }
3461
+ })
3462
+ }
3463
+
3464
+ /**
3465
+ * Flush insertion buffer (backward compatibility)
3466
+ */
3467
+ async flushInsertionBuffer() {
3468
+ // Flush insertion buffer implementation - save any pending data
3469
+ // Use the same robust flush logic as flush()
3470
+ return this.flush()
3471
+ }
3472
+
3473
+ /**
3474
+ * Get memory usage
3475
+ */
3476
+ getMemoryUsage() {
3477
+ return {
3478
+ offsetsCount: this.offsets.length,
3479
+ writeBufferSize: this.writeBuffer ? this.writeBuffer.length : 0,
3480
+ used: this.writeBuffer.length,
3481
+ total: this.offsets.length + this.writeBuffer.length,
3482
+ percentage: 0
3483
+ }
3484
+ }
3485
+
3486
+ _hasActualIndexData() {
3487
+ if (!this.indexManager) return false
3488
+
3489
+ const data = this.indexManager.index.data
3490
+ for (const field in data) {
3491
+ const fieldData = data[field]
3492
+ for (const value in fieldData) {
3493
+ const hybridData = fieldData[value]
3494
+ if (hybridData.set && hybridData.set.size > 0) {
3495
+ return true
3496
+ }
3497
+ }
3498
+ }
3499
+ return false
3500
+ }
3501
+
3502
+ /**
3503
+ * Locate a record by line number and return its byte range
3504
+ * @param {number} n - Line number
3505
+ * @returns {Array} - [start, end] byte range or undefined
3506
+ */
3507
+ locate(n) {
3508
+ if (this.offsets[n] === undefined) {
3509
+ return undefined // Record doesn't exist
3510
+ }
3511
+
3512
+ // CRITICAL FIX: Calculate end offset correctly to prevent cross-line reading
3513
+ let end
3514
+ if (n + 1 < this.offsets.length) {
3515
+ // Use next record's start minus 1 (to exclude newline) as this record's end
3516
+ end = this.offsets[n + 1] - 1
3517
+ } else {
3518
+ // For the last record, use indexOffset (includes the record but not newline)
3519
+ end = this.indexOffset
3520
+ }
3521
+
3522
+ return [this.offsets[n], end]
3523
+ }
3524
+
3525
+ /**
3526
+ * Get ranges for streaming based on line numbers
3527
+ * @param {Array|Set} map - Line numbers to get ranges for
3528
+ * @returns {Array} - Array of range objects {start, end, index}
3529
+ */
3530
+ getRanges(map) {
3531
+ return (map || Array.from(this.offsets.keys())).map(n => {
3532
+ const ret = this.locate(n)
3533
+ if (ret !== undefined) return { start: ret[0], end: ret[1], index: n }
3534
+ }).filter(n => n !== undefined)
3535
+ }
3536
+
3537
+ /**
3538
+ * Walk through records using streaming (real implementation)
3539
+ */
3540
+ async *walk(criteria, options = {}) {
3541
+ // CRITICAL FIX: Validate state before walk operation to prevent crashes
3542
+ this.validateState()
3543
+
3544
+ if (!this.initialized) await this.init()
3545
+
3546
+ // If no data at all, return empty
3547
+ if (this.indexOffset === 0 && this.writeBuffer.length === 0) return
3548
+
3549
+ let count = 0
3550
+
3551
+ let map
3552
+ if (!Array.isArray(criteria)) {
3553
+ if (criteria instanceof Set) {
3554
+ map = [...criteria]
3555
+ } else if (criteria && typeof criteria === 'object' && Object.keys(criteria).length > 0) {
3556
+ // Only use indexManager.query if criteria has actual filters
3557
+ map = [...this.indexManager.query(criteria, options)]
3558
+ } else {
3559
+ // For empty criteria {} or null/undefined, get all records
3560
+ // Use writeBuffer length when indexOffset is 0 (data not saved yet)
3561
+ const totalRecords = this.indexOffset > 0 ? this.indexOffset : this.writeBuffer.length
3562
+ map = [...Array(totalRecords).keys()]
3563
+ }
3564
+ } else {
3565
+ map = criteria
3566
+ }
3567
+
3568
+ // Use writeBuffer when available (unsaved data)
3569
+ if (this.writeBuffer.length > 0) {
3570
+ let count = 0
3571
+
3572
+ // If map is empty (no index results) but we have criteria, filter writeBuffer directly
3573
+ if (map.length === 0 && criteria && typeof criteria === 'object' && Object.keys(criteria).length > 0) {
3574
+ for (let i = 0; i < this.writeBuffer.length; i++) {
3575
+ if (options.limit && count >= options.limit) {
3576
+ break
3577
+ }
3578
+ const entry = this.writeBuffer[i]
3579
+ if (entry && this.queryManager.matchesCriteria(entry, criteria, options)) {
3580
+ count++
3581
+ if (options.includeOffsets) {
3582
+ yield { entry, start: 0, _: i }
3583
+ } else {
3584
+ if (this.opts.includeLinePosition) {
3585
+ entry._ = i
3586
+ }
3587
+ yield entry
3588
+ }
3589
+ }
3590
+ }
3591
+ } else {
3592
+ // Use map-based iteration (for all records or indexed results)
3593
+ for (const lineNumber of map) {
3594
+ if (options.limit && count >= options.limit) {
3595
+ break
3596
+ }
3597
+ if (lineNumber < this.writeBuffer.length) {
3598
+ const entry = this.writeBuffer[lineNumber]
3599
+ if (entry) {
3600
+ count++
3601
+ if (options.includeOffsets) {
3602
+ yield { entry, start: 0, _: lineNumber }
3603
+ } else {
3604
+ if (this.opts.includeLinePosition) {
3605
+ entry._ = lineNumber
3606
+ }
3607
+ yield entry
3608
+ }
3609
+ }
3610
+ }
3611
+ }
3612
+ }
3613
+
3614
+ return
3615
+ }
3616
+
3617
+ // If writeBuffer is empty but we have saved data, we need to load it from file
3618
+ if (this.writeBuffer.length === 0 && this.indexOffset > 0) {
3619
+ // Load data from file for querying
3620
+ try {
3621
+ let data
3622
+ let lines
3623
+
3624
+ // Smart threshold: decide between partial reads vs full read
3625
+ const resultPercentage = map ? (map.length / this.indexOffset) * 100 : 100
3626
+ const threshold = this.opts.partialReadThreshold || 60 // Default 60% threshold
3627
+
3628
+ // Use partial reads when:
3629
+ // 1. We have specific line numbers from index
3630
+ // 2. Results are below threshold percentage
3631
+ // 3. Database is large enough to benefit from partial reads
3632
+ const shouldUsePartialReads = map && map.length > 0 &&
3633
+ resultPercentage < threshold &&
3634
+ this.indexOffset > 100 // Only for databases with >100 records
3635
+
3636
+ if (shouldUsePartialReads) {
3637
+ if (this.opts.debugMode) {
3638
+ console.log(`🔍 Using PARTIAL READS: ${map.length}/${this.indexOffset} records (${resultPercentage.toFixed(1)}% < ${threshold}% threshold)`)
3639
+ }
3640
+ // OPTIMIZATION: Use ranges instead of reading entire file
3641
+ const ranges = this.getRanges(map)
3642
+ const groupedRanges = await this.fileHandler.groupedRanges(ranges)
3643
+
3644
+ const fs = await import('fs')
3645
+ const fd = await fs.promises.open(this.fileHandler.file, 'r')
3646
+
3647
+ try {
3648
+ for (const groupedRange of groupedRanges) {
3649
+ for await (const row of this.fileHandler.readGroupedRange(groupedRange, fd)) {
3650
+ if (options.limit && count >= options.limit) {
3651
+ break
3652
+ }
3653
+
3654
+ try {
3655
+ // CRITICAL FIX: Use serializer.deserialize instead of JSON.parse to handle array format
3656
+ const record = this.serializer.deserialize(row.line)
3657
+ // SPACE OPTIMIZATION: Restore term IDs to terms for user
3658
+ const recordWithTerms = this.restoreTermIdsAfterDeserialization(record)
3659
+
3660
+ count++
3661
+ if (options.includeOffsets) {
3662
+ yield { entry: recordWithTerms, start: row.start, _: row._ || 0 }
3663
+ } else {
3664
+ if (this.opts.includeLinePosition) {
3665
+ recordWithTerms._ = row._ || 0
3666
+ }
3667
+ yield recordWithTerms
3668
+ }
3669
+ } catch (error) {
3670
+ // Skip invalid lines
3671
+ }
3672
+ }
3673
+ if (options.limit && count >= options.limit) {
3674
+ break
3675
+ }
3676
+ }
3677
+ } finally {
3678
+ await fd.close()
3679
+ }
3680
+ return // Exit early since we processed partial reads
3681
+ } else {
3682
+ if (this.opts.debugMode) {
3683
+ console.log(`🔍 Using STREAMING READ: ${map?.length || 0}/${this.indexOffset} records (${resultPercentage.toFixed(1)}% >= ${threshold}% threshold or small DB)`)
3684
+ }
3685
+ // Use streaming instead of loading all data in memory
3686
+ // This prevents memory issues with large databases
3687
+ const streamingResults = await this.fileHandler.readWithStreaming(
3688
+ criteria,
3689
+ { limit: options.limit, skip: options.skip },
3690
+ matchesCriteria,
3691
+ this.serializer
3692
+ )
3693
+
3694
+ // Process streaming results directly without loading all lines
3695
+ for (const record of streamingResults) {
3696
+ if (options.limit && count >= options.limit) {
3697
+ break
3698
+ }
3699
+ count++
3700
+
3701
+ // SPACE OPTIMIZATION: Restore term IDs to terms for user
3702
+ const recordWithTerms = this.restoreTermIdsAfterDeserialization(record)
3703
+
3704
+ if (options.includeOffsets) {
3705
+ yield { entry: recordWithTerms, start: 0, _: 0 }
3706
+ } else {
3707
+ if (this.opts.includeLinePosition) {
3708
+ recordWithTerms._ = 0
3709
+ }
3710
+ yield recordWithTerms
3711
+ }
3712
+ }
3713
+ return // Exit early since we processed streaming results
3714
+ }
3715
+ } catch (error) {
3716
+ // If file reading fails, continue to file-based streaming
3717
+ }
3718
+ }
3719
+
3720
+ // Use file-based streaming for saved data
3721
+ const ranges = this.getRanges(map)
3722
+ const groupedRanges = await this.fileHandler.groupedRanges(ranges)
3723
+ const fd = await fs.promises.open(this.fileHandler.file, 'r')
3724
+
3725
+ try {
3726
+ let count = 0
3727
+ for (const groupedRange of groupedRanges) {
3728
+ if (options.limit && count >= options.limit) {
3729
+ break
3730
+ }
3731
+ for await (const row of this.fileHandler.readGroupedRange(groupedRange, fd)) {
3732
+ if (options.limit && count >= options.limit) {
3733
+ break
3734
+ }
3735
+ const entry = await this.serializer.deserialize(row.line, { compress: this.opts.compress, v8: this.opts.v8 })
3736
+ if (entry === null) continue
3737
+
3738
+ // SPACE OPTIMIZATION: Restore term IDs to terms for user
3739
+ const entryWithTerms = this.restoreTermIdsAfterDeserialization(entry)
3740
+
3741
+ count++
3742
+ if (options.includeOffsets) {
3743
+ yield { entry: entryWithTerms, start: row.start, _: row._ || this.offsets.findIndex(n => n === row.start) }
3744
+ } else {
3745
+ if (this.opts.includeLinePosition) {
3746
+ entryWithTerms._ = row._ || this.offsets.findIndex(n => n === row.start)
3747
+ }
3748
+ yield entryWithTerms
3749
+ }
3750
+ }
3751
+ }
3752
+ } finally {
3753
+ await fd.close()
3754
+ }
3755
+ }
3756
+
3757
+ /**
3758
+ * Iterate through records with bulk update capabilities
3759
+ * Allows in-place modifications and deletions with optimized performance
3760
+ *
3761
+ * @param {Object} criteria - Query criteria
3762
+ * @param {Object} options - Iteration options
3763
+ * @param {number} options.chunkSize - Batch size for processing (default: 1000)
3764
+ * @param {string} options.strategy - Processing strategy: 'streaming' (always uses walk() method)
3765
+ * @param {boolean} options.autoSave - Auto-save after each chunk (default: false)
3766
+ * @param {Function} options.progressCallback - Progress callback function
3767
+ * @param {boolean} options.detectChanges - Auto-detect changes (default: true)
3768
+ * @returns {AsyncGenerator} Generator yielding records for modification
3769
+ */
3770
+ async *iterate(criteria, options = {}) {
3771
+ // CRITICAL FIX: Validate state before iterate operation
3772
+ this.validateState()
3773
+
3774
+ if (!this.initialized) await this.init()
3775
+
3776
+ // Set default options
3777
+ const opts = {
3778
+ chunkSize: 1000,
3779
+ strategy: 'streaming', // Always use walk() method for optimal performance
3780
+ autoSave: false,
3781
+ detectChanges: true,
3782
+ ...options
3783
+ }
3784
+
3785
+ // If no data, return empty
3786
+ if (this.indexOffset === 0 && this.writeBuffer.length === 0) return
3787
+
3788
+ const startTime = Date.now()
3789
+ let processedCount = 0
3790
+ let modifiedCount = 0
3791
+ let deletedCount = 0
3792
+
3793
+ // Buffers for batch processing
3794
+ const updateBuffer = []
3795
+ const deleteBuffer = new Set()
3796
+ const originalRecords = new Map() // Track original records for change detection
3797
+
3798
+ try {
3799
+ // Always use walk() now that the bug is fixed - it works for both small and large datasets
3800
+ for await (const entry of this.walk(criteria, options)) {
3801
+ processedCount++
3802
+
3803
+ // Store original record for change detection BEFORE yielding
3804
+ let originalRecord = null
3805
+ if (opts.detectChanges) {
3806
+ originalRecord = this._createShallowCopy(entry)
3807
+ originalRecords.set(entry.id, originalRecord)
3808
+ }
3809
+
3810
+ // Create wrapper based on performance preference
3811
+ const entryWrapper = opts.highPerformance
3812
+ ? this._createHighPerformanceWrapper(entry, originalRecord)
3813
+ : this._createEntryProxy(entry, originalRecord)
3814
+
3815
+ // Yield the wrapper for user modification
3816
+ yield entryWrapper
3817
+
3818
+ // Check if entry was modified or deleted AFTER yielding
3819
+ if (entryWrapper.isMarkedForDeletion) {
3820
+ // Entry was marked for deletion
3821
+ if (originalRecord) {
3822
+ deleteBuffer.add(originalRecord.id)
3823
+ deletedCount++
3824
+ }
3825
+ } else if (opts.detectChanges && originalRecord) {
3826
+ // Check if entry was modified by comparing with original (optimized comparison)
3827
+ if (this._hasRecordChanged(entry, originalRecord)) {
3828
+ updateBuffer.push(entry)
3829
+ modifiedCount++
3830
+ }
3831
+ } else if (entryWrapper.isModified) {
3832
+ // Manual change detection
3833
+ updateBuffer.push(entry)
3834
+ modifiedCount++
3835
+ }
3836
+
3837
+ // Process batch when chunk size is reached
3838
+ if (updateBuffer.length >= opts.chunkSize || deleteBuffer.size >= opts.chunkSize) {
3839
+ await this._processIterateBatch(updateBuffer, deleteBuffer, opts)
3840
+
3841
+ // Clear buffers
3842
+ updateBuffer.length = 0
3843
+ deleteBuffer.clear()
3844
+ originalRecords.clear()
3845
+
3846
+ // Progress callback
3847
+ if (opts.progressCallback) {
3848
+ opts.progressCallback({
3849
+ processed: processedCount,
3850
+ modified: modifiedCount,
3851
+ deleted: deletedCount,
3852
+ elapsed: Date.now() - startTime
3853
+ })
3854
+ }
3855
+ }
3856
+ }
3857
+
3858
+ // Process remaining records in buffers
3859
+ if (updateBuffer.length > 0 || deleteBuffer.size > 0) {
3860
+ await this._processIterateBatch(updateBuffer, deleteBuffer, opts)
3861
+ }
3862
+
3863
+ // Final progress callback (always called)
3864
+ if (opts.progressCallback) {
3865
+ opts.progressCallback({
3866
+ processed: processedCount,
3867
+ modified: modifiedCount,
3868
+ deleted: deletedCount,
3869
+ elapsed: Date.now() - startTime,
3870
+ completed: true
3871
+ })
3872
+ }
3873
+
3874
+ if (this.opts.debugMode) {
3875
+ console.log(`🔄 ITERATE COMPLETED: ${processedCount} processed, ${modifiedCount} modified, ${deletedCount} deleted in ${Date.now() - startTime}ms`)
3876
+ }
3877
+
3878
+ } catch (error) {
3879
+ console.error('Iterate operation failed:', error)
3880
+ throw error
3881
+ }
3882
+ }
3883
+
3884
+ /**
3885
+ * Process a batch of updates and deletes from iterate operation
3886
+ * @private
3887
+ */
3888
+ async _processIterateBatch(updateBuffer, deleteBuffer, options) {
3889
+ if (updateBuffer.length === 0 && deleteBuffer.size === 0) return
3890
+
3891
+ const startTime = Date.now()
3892
+
3893
+ try {
3894
+ // Process updates
3895
+ if (updateBuffer.length > 0) {
3896
+ for (const record of updateBuffer) {
3897
+ // Remove the _modified flag if it exists
3898
+ delete record._modified
3899
+
3900
+ // Update record in writeBuffer or add to writeBuffer
3901
+ const index = this.writeBuffer.findIndex(r => r.id === record.id)
3902
+ if (index !== -1) {
3903
+ // Record is already in writeBuffer, update it
3904
+ this.writeBuffer[index] = record
3905
+ } else {
3906
+ // Record is in file, add updated version to writeBuffer
3907
+ this.writeBuffer.push(record)
3908
+ }
3909
+
3910
+ // Update index
3911
+ await this.indexManager.update(record, record, this.writeBuffer.length - 1)
3912
+ }
3913
+
3914
+ if (this.opts.debugMode) {
3915
+ console.log(`🔄 ITERATE: Updated ${updateBuffer.length} records in ${Date.now() - startTime}ms`)
3916
+ }
3917
+ }
3918
+
3919
+ // Process deletes
3920
+ if (deleteBuffer.size > 0) {
3921
+ for (const recordId of deleteBuffer) {
3922
+ // Find the record to get its data for term mapping removal
3923
+ const record = this.writeBuffer.find(r => r.id === recordId) ||
3924
+ await this.findOne({ id: recordId })
3925
+
3926
+ if (record) {
3927
+ // Remove term mapping
3928
+ this.removeTermMapping(record)
3929
+
3930
+ // Remove from index
3931
+ await this.indexManager.remove(record)
3932
+
3933
+ // Remove from writeBuffer or mark as deleted
3934
+ const index = this.writeBuffer.findIndex(r => r.id === recordId)
3935
+ if (index !== -1) {
3936
+ this.writeBuffer.splice(index, 1)
3937
+ } else {
3938
+ // Mark as deleted if not in writeBuffer
3939
+ this.deletedIds.add(recordId)
3940
+ }
3941
+ }
3942
+ }
3943
+
3944
+ if (this.opts.debugMode) {
3945
+ console.log(`🗑️ ITERATE: Deleted ${deleteBuffer.size} records in ${Date.now() - startTime}ms`)
3946
+ }
3947
+ }
3948
+
3949
+ // Auto-save if enabled
3950
+ if (options.autoSave) {
3951
+ await this.save()
3952
+ }
3953
+
3954
+ this.shouldSave = true
3955
+ this.performanceStats.operations++
3956
+
3957
+ } catch (error) {
3958
+ console.error('Batch processing failed:', error)
3959
+ throw error
3960
+ }
3961
+ }
3962
+
3963
+ /**
3964
+ * Close the database
3965
+ */
3966
+ async close() {
3967
+ if (this.destroyed || this.closed) return
3968
+
3969
+ try {
3970
+ if (this.opts.debugMode) {
3971
+ console.log(`💾 close(): Saving and closing database (reopenable)`)
3972
+ }
3973
+
3974
+ // 1. Save all pending data and index data to files
3975
+ if (this.writeBuffer.length > 0 || this.shouldSave) {
3976
+ await this.save()
3977
+ // Ensure writeBuffer is cleared after save
3978
+ if (this.writeBuffer.length > 0) {
3979
+ console.warn('⚠️ WriteBuffer not cleared after save() - forcing clear')
3980
+ this.writeBuffer = []
3981
+ this.writeBufferOffsets = []
3982
+ this.writeBufferSizes = []
3983
+ }
3984
+ } else {
3985
+ // Even if no data to save, ensure index data is persisted
3986
+ await this._saveIndexDataToFile()
3987
+ }
3988
+
3989
+ // 2. Mark as closed (but not destroyed) to allow reopening
3990
+ this.closed = true
3991
+ this.initialized = false
3992
+
3993
+ // 3. Clear any remaining state for clean reopening
3994
+ this.writeBuffer = []
3995
+ this.writeBufferOffsets = []
3996
+ this.writeBufferSizes = []
3997
+ this.shouldSave = false
3998
+ this.isSaving = false
3999
+ this.lastSaveTime = null
4000
+
4001
+ if (this.opts.debugMode) {
4002
+ console.log(`💾 Database closed (can be reopened with init())`)
4003
+ }
4004
+
4005
+ } catch (error) {
4006
+ console.error('Failed to close database:', error)
4007
+ // Mark as closed even if save failed
4008
+ this.closed = true
4009
+ this.initialized = false
4010
+ throw error
4011
+ }
4012
+ }
4013
+
4014
+ /**
4015
+ * Save index data to .idx.jdb file
4016
+ * @private
4017
+ */
4018
+ async _saveIndexDataToFile() {
4019
+ if (this.indexManager) {
4020
+ try {
4021
+ const idxPath = this.normalizedFile.replace('.jdb', '.idx.jdb')
4022
+ const indexData = {
4023
+ index: this.indexManager.indexedFields && this.indexManager.indexedFields.length > 0 ? this.indexManager.toJSON() : {},
4024
+ offsets: this.offsets, // Save actual offsets for efficient file operations
4025
+ indexOffset: this.indexOffset, // Save file size for proper range calculations
4026
+ // Save configuration for reuse when database exists
4027
+ config: {
4028
+ fields: this.opts.fields,
4029
+ indexes: this.opts.indexes,
4030
+ originalIndexes: this.opts.originalIndexes,
4031
+ schema: this.serializer?.getSchema?.() || null
4032
+ }
4033
+ }
4034
+
4035
+ // Include term mapping data in .idx file if term mapping fields exist
4036
+ const termMappingFields = this.getTermMappingFields()
4037
+ if (termMappingFields.length > 0 && this.termManager) {
4038
+ const termData = await this.termManager.saveTerms()
4039
+ indexData.termMapping = termData
4040
+ }
4041
+
4042
+ // Always create .idx file for databases with indexes, even if empty
4043
+ // This ensures the database structure is complete
4044
+ const originalFile = this.fileHandler.file
4045
+ this.fileHandler.file = idxPath
4046
+ await this.fileHandler.writeAll(JSON.stringify(indexData, null, 2))
4047
+ this.fileHandler.file = originalFile
4048
+
4049
+ if (this.opts.debugMode) {
4050
+ console.log(`💾 Index data saved to ${idxPath}`)
4051
+ }
4052
+ } catch (error) {
4053
+ console.warn('Failed to save index data:', error.message)
4054
+ throw error // Re-throw to let caller handle
4055
+ }
4056
+ }
4057
+ }
4058
+
4059
+ /**
4060
+ * Get operation queue statistics
4061
+ */
4062
+ getQueueStats() {
4063
+ if (!this.operationQueue) {
4064
+ return {
4065
+ queueLength: 0,
4066
+ isProcessing: false,
4067
+ totalOperations: 0,
4068
+ completedOperations: 0,
4069
+ failedOperations: 0,
4070
+ successRate: 0,
4071
+ averageProcessingTime: 0,
4072
+ maxProcessingTime: 0
4073
+ }
4074
+ }
4075
+ return this.operationQueue.getStats()
4076
+ }
4077
+
4078
+ /**
4079
+ * Wait for all pending operations to complete
4080
+ * This includes operation queue AND active insert sessions
4081
+ * If called with no arguments, interpret as waitForOperations(null).
4082
+ * If argument provided (maxWaitTime), pass that on.
4083
+ */
4084
+ async waitForOperations(maxWaitTime = null) {
4085
+ // Accept any falsy/undefined/empty call as "wait for all"
4086
+ const actualWaitTime = arguments.length === 0 ? null : maxWaitTime
4087
+ const startTime = Date.now()
4088
+ const hasTimeout = actualWaitTime !== null && actualWaitTime !== undefined
4089
+
4090
+ // Wait for operation queue
4091
+ if (this.operationQueue) {
4092
+ const queueCompleted = await this.operationQueue.waitForCompletion(actualWaitTime)
4093
+ if (!queueCompleted && hasTimeout) {
4094
+ return false
4095
+ }
4096
+ }
4097
+
4098
+ // Wait for active insert sessions
4099
+ if (this.activeInsertSessions.size > 0) {
4100
+ if (this.opts.debugMode) {
4101
+ console.log(`⏳ waitForOperations: Waiting for ${this.activeInsertSessions.size} active insert sessions`)
4102
+ }
4103
+
4104
+ // Wait for all active sessions to complete
4105
+ const sessionPromises = Array.from(this.activeInsertSessions).map(session =>
4106
+ session.waitForOperations(actualWaitTime)
4107
+ )
4108
+
4109
+ try {
4110
+ const sessionResults = await Promise.all(sessionPromises)
4111
+
4112
+ // Check if any session timed out
4113
+ if (hasTimeout && sessionResults.some(result => !result)) {
4114
+ return false
4115
+ }
4116
+ } catch (error) {
4117
+ if (this.opts.debugMode) {
4118
+ console.log(`⚠️ waitForOperations: Error waiting for sessions: ${error.message}`)
4119
+ }
4120
+ // Continue anyway - don't fail the entire operation
4121
+ }
4122
+ }
4123
+
4124
+ return true
4125
+ }
4126
+ }
4127
+
4128
+ export { Database }
4129
+ export default Database
4130
+