jexidb 2.1.5 → 2.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/Database.mjs CHANGED
@@ -4,6 +4,7 @@ import Serializer from './Serializer.mjs'
4
4
  import { Mutex } from 'async-mutex'
5
5
  import fs from 'fs'
6
6
  import readline from 'readline'
7
+ import pRetry from 'p-retry'
7
8
  import { OperationQueue } from './OperationQueue.mjs'
8
9
 
9
10
  /**
@@ -2332,9 +2333,19 @@ class Database extends EventEmitter {
2332
2333
 
2333
2334
  // CRITICAL FIX: Validate state before find operation
2334
2335
  this.validateState()
2335
-
2336
+
2337
+ // Check for index-only optimization
2338
+ if (options.indexOnly && this._canUseIndexOnlyForExists(criteria)) {
2339
+ if (this.opts.debugMode) {
2340
+ console.log(`⚡ find() using INDEX-ONLY optimization for: ${JSON.stringify(criteria)}`)
2341
+ }
2342
+ // Return records using index-only lookup (but we need to fetch actual records)
2343
+ // For now, this just ensures we use optimized path in QueryManager
2344
+ options._forceIndexOnly = true
2345
+ }
2346
+
2336
2347
  // OPTIMIZATION: Find searches writeBuffer directly
2337
-
2348
+
2338
2349
  const startTime = Date.now()
2339
2350
 
2340
2351
  if (this.opts.debugMode) {
@@ -2656,6 +2667,8 @@ class Database extends EventEmitter {
2656
2667
  return Array.from(fields)
2657
2668
  }
2658
2669
 
2670
+
2671
+
2659
2672
  /**
2660
2673
  * Update records matching criteria
2661
2674
  */
@@ -3088,121 +3101,184 @@ class Database extends EventEmitter {
3088
3101
  let count = 0
3089
3102
  const startTime = Date.now()
3090
3103
 
3091
- // Auto-detect schema from first line if not initialized
3092
- if (!this.serializer.schemaManager.isInitialized) {
3093
- const fs = await import('fs')
3094
- const readline = await import('readline')
3095
- const stream = fs.createReadStream(this.fileHandler.file, {
3096
- highWaterMark: 64 * 1024,
3097
- encoding: 'utf8'
3098
- })
3099
- const rl = readline.createInterface({
3100
- input: stream,
3101
- crlfDelay: Infinity
3102
- })
3103
-
3104
- for await (const line of rl) {
3105
- if (line && line.trim()) {
3106
- try {
3107
- const firstRecord = JSON.parse(line)
3108
- if (Array.isArray(firstRecord)) {
3109
- // Try to infer schema from opts.fields if available
3110
- if (this.opts.fields && typeof this.opts.fields === 'object') {
3111
- const fieldNames = Object.keys(this.opts.fields)
3112
- if (fieldNames.length >= firstRecord.length) {
3113
- // Use first N fields from opts.fields to match array length
3114
- const schema = fieldNames.slice(0, firstRecord.length)
3115
- this.serializer.initializeSchema(schema)
3116
- if (this.opts.debugMode) {
3117
- console.log(`🔍 Inferred schema from opts.fields: ${schema.join(', ')}`)
3104
+ // Use retry for the streaming rebuild only if timeout is configured
3105
+ if (this.opts.ioTimeoutMs && this.opts.ioTimeoutMs > 0) {
3106
+ count = await this._rebuildIndexesWithRetry()
3107
+ } else {
3108
+ // Use original logic without retry for backward compatibility
3109
+ count = await this._rebuildIndexesOriginal()
3110
+ }
3111
+
3112
+ // Update indexManager totalLines
3113
+ if (this.indexManager) {
3114
+ this.indexManager.setTotalLines(this.offsets.length)
3115
+ }
3116
+
3117
+ this._indexRebuildNeeded = false
3118
+
3119
+ if (this.opts.debugMode) {
3120
+ console.log(`✅ Index rebuilt from ${count} records in ${Date.now() - startTime}ms`)
3121
+ }
3122
+
3123
+ // Save the rebuilt index
3124
+ await this._saveIndexDataToFile()
3125
+ } catch (error) {
3126
+ if (this.opts.debugMode) {
3127
+ console.error('❌ Failed to rebuild indexes:', error.message)
3128
+ }
3129
+ // Don't throw - queries will fall back to streaming
3130
+ }
3131
+ }
3132
+
3133
+ /**
3134
+ * Rebuild indexes with retry logic to handle I/O hangs
3135
+ * @private
3136
+ */
3137
+ async _rebuildIndexesWithRetry() {
3138
+ // If no timeout configured, use original implementation without retry
3139
+ if (!this.opts.ioTimeoutMs) {
3140
+ return this._rebuildIndexesOriginal();
3141
+ }
3142
+
3143
+ const timeoutMs = this.opts.ioTimeoutMs || 10000; // Longer timeout for rebuild
3144
+ const maxRetries = this.opts.maxRetries || 3;
3145
+
3146
+ let count = 0;
3147
+
3148
+ await pRetry(async (attempt) => {
3149
+ const controller = new AbortController();
3150
+ const timeout = setTimeout(() => controller.abort(), timeoutMs);
3151
+
3152
+ try {
3153
+ // Auto-detect schema from first line if not initialized
3154
+ if (!this.serializer.schemaManager.isInitialized) {
3155
+ const stream = fs.createReadStream(this.fileHandler.file, {
3156
+ highWaterMark: 64 * 1024,
3157
+ encoding: 'utf8'
3158
+ })
3159
+ const rl = readline.createInterface({
3160
+ input: stream,
3161
+ crlfDelay: Infinity
3162
+ })
3163
+
3164
+ // Handle abort
3165
+ controller.signal.addEventListener('abort', () => {
3166
+ stream.destroy(new Error('AbortError'));
3167
+ rl.close();
3168
+ });
3169
+
3170
+ for await (const line of rl) {
3171
+ if (controller.signal.aborted) break;
3172
+ if (line && line.trim()) {
3173
+ try {
3174
+ const firstRecord = JSON.parse(line)
3175
+ if (Array.isArray(firstRecord)) {
3176
+ // Try to infer schema from opts.fields if available
3177
+ if (this.opts.fields && typeof this.opts.fields === 'object') {
3178
+ const fieldNames = Object.keys(this.opts.fields)
3179
+ if (fieldNames.length >= firstRecord.length) {
3180
+ // Use first N fields from opts.fields to match array length
3181
+ const schema = fieldNames.slice(0, firstRecord.length)
3182
+ this.serializer.initializeSchema(schema)
3183
+ if (this.opts.debugMode) {
3184
+ console.log(`🔍 Inferred schema from opts.fields: ${schema.join(', ')}`)
3185
+ }
3186
+ } else {
3187
+ throw new Error(`Cannot rebuild index: array has ${firstRecord.length} elements but opts.fields only defines ${fieldNames.length} fields. Schema must be explicitly provided.`)
3118
3188
  }
3119
3189
  } else {
3120
- throw new Error(`Cannot rebuild index: array has ${firstRecord.length} elements but opts.fields only defines ${fieldNames.length} fields. Schema must be explicitly provided.`)
3190
+ throw new Error('Cannot rebuild index: schema missing, file uses array format, and opts.fields not provided. The .idx.jdb file is corrupted.')
3121
3191
  }
3122
3192
  } else {
3123
- throw new Error('Cannot rebuild index: schema missing, file uses array format, and opts.fields not provided. The .idx.jdb file is corrupted.')
3193
+ // Object format, initialize from object keys
3194
+ this.serializer.initializeSchema(firstRecord, true)
3195
+ if (this.opts.debugMode) {
3196
+ console.log(`🔍 Auto-detected schema from object: ${Object.keys(firstRecord).join(', ')}`)
3197
+ }
3124
3198
  }
3125
- } else {
3126
- // Object format, initialize from object keys
3127
- this.serializer.initializeSchema(firstRecord, true)
3199
+ break
3200
+ } catch (error) {
3128
3201
  if (this.opts.debugMode) {
3129
- console.log(`🔍 Auto-detected schema from object: ${Object.keys(firstRecord).join(', ')}`)
3202
+ console.error('❌ Failed to auto-detect schema:', error.message)
3130
3203
  }
3204
+ throw error
3131
3205
  }
3132
- break
3133
- } catch (error) {
3134
- if (this.opts.debugMode) {
3135
- console.error('❌ Failed to auto-detect schema:', error.message)
3136
- }
3137
- throw error
3138
3206
  }
3139
3207
  }
3208
+ stream.destroy()
3140
3209
  }
3141
- stream.destroy()
3142
- }
3143
-
3144
- // Use streaming to read records without loading everything into memory
3145
- // Also rebuild offsets while we're at it
3146
- const fs = await import('fs')
3147
- const readline = await import('readline')
3148
-
3149
- this.offsets = []
3150
- let currentOffset = 0
3151
-
3152
- const stream = fs.createReadStream(this.fileHandler.file, {
3153
- highWaterMark: 64 * 1024,
3154
- encoding: 'utf8'
3155
- })
3156
-
3157
- const rl = readline.createInterface({
3158
- input: stream,
3159
- crlfDelay: Infinity
3160
- })
3161
-
3162
- try {
3210
+
3211
+ // Use streaming to read records without loading everything into memory
3212
+ // Also rebuild offsets while we're at it
3213
+
3214
+ this.offsets = []
3215
+ let currentOffset = 0
3216
+
3217
+ const stream = fs.createReadStream(this.fileHandler.file, {
3218
+ highWaterMark: 64 * 1024,
3219
+ encoding: 'utf8'
3220
+ })
3221
+
3222
+ const rl = readline.createInterface({
3223
+ input: stream,
3224
+ crlfDelay: Infinity
3225
+ })
3226
+
3227
+ // Handle abort
3228
+ controller.signal.addEventListener('abort', () => {
3229
+ stream.destroy(new Error('AbortError'));
3230
+ rl.close();
3231
+ });
3232
+
3233
+ let localCount = 0;
3163
3234
  for await (const line of rl) {
3235
+ if (controller.signal.aborted) break;
3164
3236
  if (line && line.trim()) {
3165
3237
  try {
3166
3238
  // Record the offset for this line
3167
3239
  this.offsets.push(currentOffset)
3168
-
3240
+
3169
3241
  const record = this.serializer.deserialize(line)
3170
3242
  const recordWithTerms = this.restoreTermIdsAfterDeserialization(record)
3171
- await this.indexManager.add(recordWithTerms, count)
3172
- count++
3243
+ await this.indexManager.add(recordWithTerms, count + localCount)
3244
+ localCount++
3173
3245
  } catch (error) {
3174
3246
  // Skip invalid lines
3175
3247
  if (this.opts.debugMode) {
3176
- console.log(`⚠️ Rebuild: Failed to deserialize line ${count}:`, error.message)
3248
+ console.log(`⚠️ Rebuild: Failed to deserialize line ${count + localCount}:`, error.message)
3177
3249
  }
3178
3250
  }
3179
3251
  }
3180
3252
  // Update offset for next line (including newline character)
3181
3253
  currentOffset += Buffer.byteLength(line, 'utf8') + 1
3182
3254
  }
3183
- } finally {
3255
+
3256
+ count += localCount;
3184
3257
  stream.destroy()
3258
+ } catch (error) {
3259
+ if (error.name === 'AbortError' || error.code === 'ETIMEDOUT') {
3260
+ if (this.opts.debugMode) {
3261
+ console.log(`⚠️ Index rebuild attempt ${attempt} timed out, retrying...`);
3262
+ }
3263
+ throw error; // p-retry will retry
3264
+ }
3265
+ // For other errors, don't retry
3266
+ throw new pRetry.AbortError(error);
3267
+ } finally {
3268
+ clearTimeout(timeout);
3185
3269
  }
3186
-
3187
- // Update indexManager totalLines
3188
- if (this.indexManager) {
3189
- this.indexManager.setTotalLines(this.offsets.length)
3190
- }
3191
-
3192
- this._indexRebuildNeeded = false
3193
-
3194
- if (this.opts.debugMode) {
3195
- console.log(`✅ Index rebuilt from ${count} records in ${Date.now() - startTime}ms`)
3196
- }
3197
-
3198
- // Save the rebuilt index
3199
- await this._saveIndexDataToFile()
3200
- } catch (error) {
3201
- if (this.opts.debugMode) {
3202
- console.error('❌ Failed to rebuild indexes:', error.message)
3270
+ }, {
3271
+ retries: maxRetries,
3272
+ minTimeout: 200,
3273
+ maxTimeout: 2000,
3274
+ onFailedAttempt: (error) => {
3275
+ if (this.opts.debugMode) {
3276
+ console.log(`Index rebuild failed (attempt ${error.attemptNumber}), ${error.retriesLeft} retries left`);
3277
+ }
3203
3278
  }
3204
- // Don't throw - queries will fall back to streaming
3205
- }
3279
+ });
3280
+
3281
+ return count;
3206
3282
  }
3207
3283
 
3208
3284
  /**
@@ -3404,35 +3480,258 @@ class Database extends EventEmitter {
3404
3480
  }
3405
3481
 
3406
3482
  /**
3407
- * Check if any records exist for given field and terms (index-only, ultra-fast)
3408
- * Delegates to IndexManager.exists() for maximum performance
3409
- *
3410
- * @param {string} fieldName - Indexed field name
3411
- * @param {string|Array<string>} terms - Single term or array of terms
3483
+ * Check if any records exist matching the given criteria (ultra-fast when using indexed fields)
3484
+ *
3485
+ * @param {string|object} fieldName - Indexed field name (legacy) OR query criteria object (new)
3486
+ * @param {string|Array<string>} terms - Single term or array of terms (when using legacy syntax)
3412
3487
  * @param {Object} options - Options: { $all: true/false, caseInsensitive: true/false, excludes: Array<string> }
3413
3488
  * @returns {Promise<boolean>} - True if at least one match exists
3414
- *
3489
+ *
3415
3490
  * @example
3416
- * // Check if channel exists
3417
- * const exists = await db.exists('nameTerms', ['a', 'e'], { $all: true });
3418
- *
3491
+ * // Legacy syntax - ultra-fast index-only check
3492
+ * const exists = await db.exists('nameTerms', 'tv');
3493
+ * const existsAll = await db.exists('nameTerms', ['tv', 'globo'], { $all: true });
3494
+ *
3419
3495
  * @example
3420
- * // Check if 'tv' exists but not 'globo'
3421
- * const exists = await db.exists('nameTerms', 'tv', { excludes: ['globo'] });
3496
+ * // New syntax - full query criteria support
3497
+ * const exists = await db.exists({ mediaType: 'live', status: 'active' });
3498
+ * const existsOr = await db.exists({ mediaType: ['live', 'vod'] });
3422
3499
  */
3423
- async exists(fieldName, terms, options = {}) {
3500
+ async exists(fieldNameOrCriteria, terms, options = {}) {
3424
3501
  this._validateInitialization('exists')
3425
- return this.indexManager.exists(fieldName, terms, options)
3502
+
3503
+ // Detect syntax: new criteria object vs legacy field/terms
3504
+ if (typeof fieldNameOrCriteria === 'object' && fieldNameOrCriteria !== null && !Array.isArray(fieldNameOrCriteria)) {
3505
+ // New syntax: exists(criteria)
3506
+ const criteria = fieldNameOrCriteria
3507
+ return this._existsWithCriteria(criteria)
3508
+ } else if (typeof fieldNameOrCriteria === 'string' || fieldNameOrCriteria === null || Array.isArray(fieldNameOrCriteria)) {
3509
+ // Legacy syntax: exists(fieldName, terms, options)
3510
+ // Also handle invalid inputs (null, array) for backward compatibility
3511
+ const fieldName = fieldNameOrCriteria
3512
+ return this.indexManager.exists(fieldName, terms, options)
3513
+ } else {
3514
+ // Invalid input type
3515
+ throw new Error('First parameter must be a string (fieldName) or object (criteria)')
3516
+ }
3517
+ }
3518
+
3519
+ /**
3520
+ * Check if any records exist using full query criteria
3521
+ * Uses index intersection when possible for maximum performance
3522
+ * @private
3523
+ * @param {object} criteria - Query criteria object
3524
+ * @returns {Promise<boolean>} - True if at least one match exists
3525
+ */
3526
+
3527
+ async _existsWithCriteria(criteria) {
3528
+ if (criteria === null || criteria === undefined || typeof criteria !== 'object' || Array.isArray(criteria)) {
3529
+ throw new Error('Criteria must be a non-null object')
3530
+ }
3531
+
3532
+ // Check if criteria is empty (should match all records)
3533
+ const criteriaFields = Object.keys(criteria)
3534
+ if (criteriaFields.length === 0) {
3535
+ // Empty criteria matches all records - check if any exist
3536
+ try {
3537
+ const result = await this.find({}, { limit: 1 })
3538
+ return result.length > 0
3539
+ } catch (error) {
3540
+ return false
3541
+ }
3542
+ }
3543
+
3544
+ // 🚀 OPTIMIZATION: Try index-only existence check for simple criteria
3545
+ if (this._canUseIndexOnlyForExists(criteria)) {
3546
+ if (this.opts.debugMode) {
3547
+ console.log(`⚡ exists() using INDEX-ONLY optimization for: ${JSON.stringify(criteria)}`)
3548
+ }
3549
+ return this._existsIndexOnly(criteria)
3550
+ }
3551
+
3552
+ // 🎯 FALLBACK: Use the same find() logic for complex criteria or non-indexed fields
3553
+ // This ensures exists() uses identical logic to find() for all criteria processing
3554
+ try {
3555
+ const result = await this.find(criteria, { limit: 1 })
3556
+ return result.length > 0
3557
+ } catch (error) {
3558
+ // If find() fails (e.g., strict mode violations), no records exist
3559
+ return false
3560
+ }
3561
+ }
3562
+
3563
+ /**
3564
+ * Check if criteria can use index-only existence check
3565
+ * @private
3566
+ * @param {object} criteria - Query criteria
3567
+ * @returns {boolean} - True if can use index-only
3568
+ */
3569
+ _canUseIndexOnlyForExists(criteria) {
3570
+ // Must have indexes configured
3571
+ if (!this.opts.indexes) return false
3572
+
3573
+ // All fields in criteria must be indexed
3574
+ const criteriaFields = Object.keys(criteria)
3575
+ const allFieldsIndexed = criteriaFields.every(field => this.opts.indexes[field])
3576
+
3577
+ if (!allFieldsIndexed) return false
3578
+
3579
+ // No complex operators allowed (only simple equality)
3580
+ for (const [field, value] of Object.entries(criteria)) {
3581
+ // Allow simple values (string, number, boolean)
3582
+ if (typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') {
3583
+ continue
3584
+ }
3585
+
3586
+ // Allow arrays (OR logic)
3587
+ if (Array.isArray(value)) {
3588
+ continue
3589
+ }
3590
+
3591
+ // Reject any object (complex operators)
3592
+ if (typeof value === 'object') {
3593
+ return false
3594
+ }
3595
+ }
3596
+
3597
+ return true
3598
+ }
3599
+
3600
+ /**
3601
+ * Perform index-only existence check (ultra-fast, no disk I/O)
3602
+ * @private
3603
+ * @param {object} criteria - Simple criteria with only indexed fields
3604
+ * @returns {boolean} - True if any records match the criteria
3605
+ */
3606
+ _existsIndexOnly(criteria) {
3607
+ const criteriaEntries = Object.entries(criteria)
3608
+
3609
+ // For single field criteria, use direct indexManager.exists()
3610
+ if (criteriaEntries.length === 1) {
3611
+ const [field, value] = criteriaEntries[0]
3612
+ return this.indexManager.exists(field, value)
3613
+ }
3614
+
3615
+ // For multiple field criteria, implement index intersection (AND logic)
3616
+ let intersection = null
3617
+
3618
+ for (const [field, value] of criteriaEntries) {
3619
+ // Get line numbers for this field/value combination
3620
+ const fieldLines = this._getIndexLinesForFieldValue(field, value)
3621
+
3622
+ if (intersection === null) {
3623
+ // First field - start with all its lines
3624
+ intersection = new Set(fieldLines)
3625
+ } else {
3626
+ // Intersect with previous results
3627
+ const currentLines = new Set(fieldLines)
3628
+ for (const line of Array.from(intersection)) {
3629
+ if (!currentLines.has(line)) {
3630
+ intersection.delete(line)
3631
+ }
3632
+ }
3633
+ }
3634
+
3635
+ // Early exit if no matches possible
3636
+ if (intersection.size === 0) {
3637
+ return false
3638
+ }
3639
+ }
3640
+
3641
+ return intersection.size > 0
3642
+ }
3643
+
3644
+ /**
3645
+ * Get line numbers for a field/value combination using index-only lookup
3646
+ * @private
3647
+ * @param {string} field - Field name
3648
+ * @param {*} value - Field value (string, number, or array)
3649
+ * @returns {Array<number>} Array of line numbers
3650
+ */
3651
+ _getIndexLinesForFieldValue(field, value) {
3652
+ // For arrays (OR logic), check each value
3653
+ if (Array.isArray(value)) {
3654
+ const allLines = new Set()
3655
+ for (const singleValue of value) {
3656
+ const lines = this._getSingleFieldLines(field, singleValue)
3657
+ lines.forEach(line => allLines.add(line))
3658
+ }
3659
+ return Array.from(allLines)
3660
+ }
3661
+
3662
+ // For single values
3663
+ return this._getSingleFieldLines(field, value)
3664
+ }
3665
+
3666
+ /**
3667
+ * Get line numbers for a single field/value using index lookup
3668
+ * @private
3669
+ * @param {string} field - Field name
3670
+ * @param {*} value - Single field value
3671
+ * @returns {Array<number>} Array of line numbers
3672
+ */
3673
+ _getSingleFieldLines(field, value) {
3674
+ // Use indexManager.exists() logic but return line numbers instead of boolean
3675
+ const fieldIndex = this.indexManager?.index?.data?.[field]
3676
+ if (!fieldIndex) return []
3677
+
3678
+ const fieldType = this.opts.indexes[field]
3679
+ const isTermMapped = this.termManager &&
3680
+ this.termManager.termMappingFields &&
3681
+ this.termManager.termMappingFields.includes(field)
3682
+
3683
+ let searchKey
3684
+
3685
+ if (fieldType === 'array:string') {
3686
+ // For array:string fields, match records that contain this value
3687
+ searchKey = isTermMapped ?
3688
+ this.termManager.getTermIdWithoutIncrement(String(value)) :
3689
+ String(value)
3690
+ } else {
3691
+ // For simple fields, exact match
3692
+ searchKey = isTermMapped ?
3693
+ this.termManager.getTermIdWithoutIncrement(String(value)) :
3694
+ String(value)
3695
+ }
3696
+
3697
+ if (searchKey === null || searchKey === undefined) {
3698
+ return []
3699
+ }
3700
+
3701
+ const termData = fieldIndex[searchKey]
3702
+ if (!termData) return []
3703
+
3704
+ // Extract all line numbers from termData (similar to indexManager._getAllLineNumbers)
3705
+ const lines = new Set()
3706
+
3707
+ // Check Set (most common case)
3708
+ if (termData.set && termData.set.size > 0) {
3709
+ for (const line of termData.set) {
3710
+ lines.add(line)
3711
+ }
3712
+ }
3713
+
3714
+ // Check ranges
3715
+ if (termData.ranges && termData.ranges.length > 0) {
3716
+ for (const range of termData.ranges) {
3717
+ for (let line = range.start; line <= range.end; line++) {
3718
+ lines.add(line)
3719
+ }
3720
+ }
3721
+ }
3722
+
3723
+ return Array.from(lines)
3426
3724
  }
3427
3725
 
3428
3726
  /**
3429
3727
  * Calculate coverage for grouped include/exclude term sets
3430
3728
  * @param {string} fieldName - Name of the indexed field
3431
3729
  * @param {Array<object>} groups - Array of { terms, excludes } objects
3730
+ * @param {object} filterCriteria - Optional filter criteria
3432
3731
  * @param {object} options - Optional settings
3433
3732
  * @returns {Promise<number>} Coverage percentage between 0 and 100
3434
3733
  */
3435
- async coverage(fieldName, groups, options = {}) {
3734
+ async coverage(fieldName, groups, filterCriteria = null, options = {}) {
3436
3735
  this._validateInitialization('coverage')
3437
3736
 
3438
3737
  if (typeof fieldName !== 'string' || !fieldName.trim()) {
@@ -3462,6 +3761,30 @@ class Database extends EventEmitter {
3462
3761
  return 0
3463
3762
  }
3464
3763
 
3764
+ // Validate filter criteria
3765
+ let filteredLines = null
3766
+ if (filterCriteria && typeof filterCriteria === 'object') {
3767
+ if (Array.isArray(filterCriteria)) {
3768
+ throw new Error('filterCriteria must be an object, not an array')
3769
+ }
3770
+
3771
+ // Get filtered records using QueryManager for consistency
3772
+ try {
3773
+ const filteredRecords = await this.queryManager.find(filterCriteria, {
3774
+ limit: null, // Get all matching records for coverage calculation
3775
+ indexedQueryMode: this.opts.indexedQueryMode,
3776
+ allowNonIndexed: true
3777
+ })
3778
+ filteredLines = new Set(filteredRecords.map(record => record._))
3779
+ if (filteredLines.size === 0) {
3780
+ return 0 // No records match the filter
3781
+ }
3782
+ } catch (error) {
3783
+ // If filtering fails, return 0 (no coverage possible)
3784
+ return 0
3785
+ }
3786
+ }
3787
+
3465
3788
  const isTermMapped = this.termManager &&
3466
3789
  this.termManager.termMappingFields &&
3467
3790
  this.termManager.termMappingFields.includes(fieldName)
@@ -3532,10 +3855,20 @@ class Database extends EventEmitter {
3532
3855
  break
3533
3856
  }
3534
3857
 
3858
+ // Apply filter if specified
3859
+ let validLineNumbers = lineNumbers
3860
+ if (filteredLines) {
3861
+ validLineNumbers = lineNumbers.filter(line => filteredLines.has(line))
3862
+ if (validLineNumbers.length === 0) {
3863
+ groupMatched = false
3864
+ break
3865
+ }
3866
+ }
3867
+
3535
3868
  if (candidateLines === null) {
3536
- candidateLines = new Set(lineNumbers)
3869
+ candidateLines = new Set(validLineNumbers)
3537
3870
  } else {
3538
- const termSet = new Set(lineNumbers)
3871
+ const termSet = new Set(validLineNumbers)
3539
3872
  for (const line of Array.from(candidateLines)) {
3540
3873
  if (!termSet.has(line)) {
3541
3874
  candidateLines.delete(line)
@@ -3569,7 +3902,13 @@ class Database extends EventEmitter {
3569
3902
  continue
3570
3903
  }
3571
3904
 
3572
- for (const line of excludeLines) {
3905
+ // Apply filter to exclude lines if specified
3906
+ let validExcludeLines = excludeLines
3907
+ if (filteredLines) {
3908
+ validExcludeLines = excludeLines.filter(line => filteredLines.has(line))
3909
+ }
3910
+
3911
+ for (const line of validExcludeLines) {
3573
3912
  if (!candidateLines.size) {
3574
3913
  break
3575
3914
  }
@@ -3793,37 +4132,12 @@ class Database extends EventEmitter {
3793
4132
 
3794
4133
  const groupedRanges = await this.fileHandler.groupedRanges(ranges)
3795
4134
 
3796
- const fs = await import('fs')
3797
4135
  const fd = await fs.promises.open(this.fileHandler.file, 'r')
3798
4136
 
3799
4137
  try {
3800
4138
  for (const groupedRange of groupedRanges) {
3801
- for await (const row of this.fileHandler.readGroupedRange(groupedRange, fd)) {
3802
- try {
3803
- const record = this.serializer.deserialize(row.line)
3804
-
3805
- // Get line number from the row, fallback to start offset mapping
3806
- let lineNumber = row._ !== null && row._ !== undefined ? row._ : (startToLineNumber.get(row.start) ?? 0)
3807
-
3808
- // Restore term IDs to terms
3809
- const recordWithTerms = this.restoreTermIdsAfterDeserialization(record)
3810
-
3811
- // Add line number
3812
- recordWithTerms._ = lineNumber
3813
-
3814
- // Add score if includeScore is true (default is true)
3815
- if (opts.includeScore !== false) {
3816
- recordWithTerms.score = scoresByLineNumber.get(lineNumber) || 0
3817
- }
3818
-
3819
- results.push(recordWithTerms)
3820
- } catch (error) {
3821
- // Skip invalid lines
3822
- if (this.opts.debugMode) {
3823
- console.error('Error deserializing record in score():', error)
3824
- }
3825
- }
3826
- }
4139
+ const rangeResults = await this._readGroupedRangeWithRetry(groupedRange, fd, startToLineNumber, scoresByLineNumber, opts);
4140
+ results.push(...rangeResults);
3827
4141
  }
3828
4142
  } finally {
3829
4143
  await fd.close()
@@ -3865,6 +4179,234 @@ class Database extends EventEmitter {
3865
4179
  return results
3866
4180
  }
3867
4181
 
4182
+ /**
4183
+ * Read a grouped range with retry logic to handle I/O hangs
4184
+ * @private
4185
+ */
4186
+ async _readGroupedRangeWithRetry(groupedRange, fd, startToLineNumber, scoresByLineNumber, opts) {
4187
+ // If no timeout configured, use original implementation without retry
4188
+ if (!this.opts.ioTimeoutMs) {
4189
+ return this._readGroupedRangeOriginal(groupedRange, fd, startToLineNumber, scoresByLineNumber, opts);
4190
+ }
4191
+
4192
+ const timeoutMs = this.opts.ioTimeoutMs || 3000; // Shorter timeout for range reads
4193
+ const maxRetries = this.opts.maxRetries || 3;
4194
+
4195
+ const results = [];
4196
+
4197
+ await pRetry(async (attempt) => {
4198
+ const controller = new AbortController();
4199
+ const timeout = setTimeout(() => controller.abort(), timeoutMs);
4200
+
4201
+ try {
4202
+ // Collect results from the generator
4203
+ const rangeResults = [];
4204
+ const generator = this.fileHandler.readGroupedRange(groupedRange, fd);
4205
+
4206
+ // Handle abort
4207
+ controller.signal.addEventListener('abort', () => {
4208
+ generator.return(); // Close the generator
4209
+ });
4210
+
4211
+ for await (const row of generator) {
4212
+ if (controller.signal.aborted) break;
4213
+
4214
+ try {
4215
+ const record = this.serializer.deserialize(row.line)
4216
+
4217
+ // Get line number from the row, fallback to start offset mapping
4218
+ let lineNumber = row._ !== null && row._ !== undefined ? row._ : (startToLineNumber.get(row.start) ?? 0)
4219
+
4220
+ // Restore term IDs to terms
4221
+ const recordWithTerms = this.restoreTermIdsAfterDeserialization(record)
4222
+
4223
+ // Add line number
4224
+ recordWithTerms._ = lineNumber
4225
+
4226
+ // Add score if includeScore is true (default is true)
4227
+ if (opts.includeScore !== false) {
4228
+ recordWithTerms.score = scoresByLineNumber.get(lineNumber) || 0
4229
+ }
4230
+
4231
+ rangeResults.push(recordWithTerms)
4232
+ } catch (error) {
4233
+ // Skip invalid lines
4234
+ if (this.opts.debugMode) {
4235
+ console.error('Error deserializing record in score():', error)
4236
+ }
4237
+ }
4238
+ }
4239
+
4240
+ results.push(...rangeResults);
4241
+ } catch (error) {
4242
+ if (error.name === 'AbortError' || error.code === 'ETIMEDOUT') {
4243
+ if (this.opts.debugMode) {
4244
+ console.log(`⚠️ Score range read attempt ${attempt} timed out, retrying...`);
4245
+ }
4246
+ throw error; // p-retry will retry
4247
+ }
4248
+ // For other errors, don't retry
4249
+ throw new pRetry.AbortError(error);
4250
+ } finally {
4251
+ clearTimeout(timeout);
4252
+ }
4253
+ }, {
4254
+ retries: maxRetries,
4255
+ minTimeout: 100,
4256
+ maxTimeout: 500,
4257
+ onFailedAttempt: (error) => {
4258
+ if (this.opts.debugMode) {
4259
+ console.log(`Score range read failed (attempt ${error.attemptNumber}), ${error.retriesLeft} retries left`);
4260
+ }
4261
+ }
4262
+ });
4263
+
4264
+ return results;
4265
+ }
4266
+
4267
+ /**
4268
+ * Original read grouped range logic without retry (for backward compatibility)
4269
+ * @private
4270
+ */
4271
+ async _readGroupedRangeOriginal(groupedRange, fd, startToLineNumber, scoresByLineNumber, opts) {
4272
+ const results = [];
4273
+
4274
+ // Collect results from the generator
4275
+ const rangeResults = [];
4276
+ const generator = this.fileHandler.readGroupedRange(groupedRange, fd);
4277
+
4278
+ for await (const row of generator) {
4279
+ try {
4280
+ const record = this.serializer.deserialize(row.line)
4281
+
4282
+ // Get line number from the row, fallback to start offset mapping
4283
+ let lineNumber = row._ !== null && row._ !== undefined ? row._ : (startToLineNumber.get(row.start) ?? 0)
4284
+
4285
+ // Restore term IDs to terms
4286
+ const recordWithTerms = this.restoreTermIdsAfterDeserialization(record)
4287
+
4288
+ // Add line number
4289
+ recordWithTerms._ = lineNumber
4290
+
4291
+ // Add score if includeScore is true (default is true)
4292
+ if (opts.includeScore !== false) {
4293
+ recordWithTerms.score = scoresByLineNumber.get(lineNumber) || 0
4294
+ }
4295
+
4296
+ rangeResults.push(recordWithTerms)
4297
+ } catch (error) {
4298
+ // Skip invalid lines
4299
+ if (this.opts.debugMode) {
4300
+ console.error('Error deserializing record in score():', error)
4301
+ }
4302
+ }
4303
+ }
4304
+
4305
+ results.push(...rangeResults);
4306
+ return results;
4307
+ }
4308
+
4309
+ /**
4310
+ * Original rebuild indexes logic without retry (for backward compatibility)
4311
+ * @private
4312
+ */
4313
+ async _rebuildIndexesOriginal() {
4314
+ let count = 0;
4315
+
4316
+ // Auto-detect schema from first line if not initialized
4317
+ if (!this.serializer.schemaManager.isInitialized) {
4318
+ const stream = fs.createReadStream(this.fileHandler.file, {
4319
+ highWaterMark: 64 * 1024,
4320
+ encoding: 'utf8'
4321
+ })
4322
+ const rl = readline.createInterface({
4323
+ input: stream,
4324
+ crlfDelay: Infinity
4325
+ })
4326
+
4327
+ for await (const line of rl) {
4328
+ if (line && line.trim()) {
4329
+ try {
4330
+ const firstRecord = JSON.parse(line)
4331
+ if (Array.isArray(firstRecord)) {
4332
+ // Try to infer schema from opts.fields if available
4333
+ if (this.opts.fields && typeof this.opts.fields === 'object') {
4334
+ const fieldNames = Object.keys(this.opts.fields)
4335
+ if (fieldNames.length >= firstRecord.length) {
4336
+ // Use first N fields from opts.fields to match array length
4337
+ const schema = fieldNames.slice(0, firstRecord.length)
4338
+ this.serializer.initializeSchema(schema)
4339
+ if (this.opts.debugMode) {
4340
+ console.log(`🔍 Inferred schema from opts.fields: ${schema.join(', ')}`)
4341
+ }
4342
+ } else {
4343
+ throw new Error(`Cannot rebuild index: array has ${firstRecord.length} elements but opts.fields only defines ${fieldNames.length} fields. Schema must be explicitly provided.`)
4344
+ }
4345
+ } else {
4346
+ throw new Error('Cannot rebuild index: schema missing, file uses array format, and opts.fields not provided. The .idx.jdb file is corrupted.')
4347
+ }
4348
+ } else {
4349
+ // Object format, initialize from object keys
4350
+ this.serializer.initializeSchema(firstRecord, true)
4351
+ if (this.opts.debugMode) {
4352
+ console.log(`🔍 Auto-detected schema from object: ${Object.keys(firstRecord).join(', ')}`)
4353
+ }
4354
+ }
4355
+ break
4356
+ } catch (error) {
4357
+ if (this.opts.debugMode) {
4358
+ console.error('❌ Failed to auto-detect schema:', error.message)
4359
+ }
4360
+ throw error
4361
+ }
4362
+ }
4363
+ }
4364
+ stream.destroy()
4365
+ }
4366
+
4367
+ // Use streaming to read records without loading everything into memory
4368
+ // Also rebuild offsets while we're at it
4369
+ this.offsets = []
4370
+ let currentOffset = 0
4371
+
4372
+ const stream = fs.createReadStream(this.fileHandler.file, {
4373
+ highWaterMark: 64 * 1024,
4374
+ encoding: 'utf8'
4375
+ })
4376
+
4377
+ const rl = readline.createInterface({
4378
+ input: stream,
4379
+ crlfDelay: Infinity
4380
+ })
4381
+
4382
+ try {
4383
+ for await (const line of rl) {
4384
+ if (line && line.trim()) {
4385
+ try {
4386
+ // Record the offset for this line
4387
+ this.offsets.push(currentOffset)
4388
+
4389
+ const record = this.serializer.deserialize(line)
4390
+ const recordWithTerms = this.restoreTermIdsAfterDeserialization(record)
4391
+ await this.indexManager.add(recordWithTerms, count)
4392
+ count++
4393
+ } catch (error) {
4394
+ // Skip invalid lines
4395
+ if (this.opts.debugMode) {
4396
+ console.log(`⚠️ Rebuild: Failed to deserialize line ${count}:`, error.message)
4397
+ }
4398
+ }
4399
+ }
4400
+ // Update offset for next line (including newline character)
4401
+ currentOffset += Buffer.byteLength(line, 'utf8') + 1
4402
+ }
4403
+ } finally {
4404
+ stream.destroy()
4405
+ }
4406
+
4407
+ return count;
4408
+ }
4409
+
3868
4410
  /**
3869
4411
  * Wait for all pending operations to complete
3870
4412
  */
@@ -4187,7 +4729,6 @@ class Database extends EventEmitter {
4187
4729
 
4188
4730
  // Method 1: Try to read the entire file and filter
4189
4731
  if (this.fileHandler.exists()) {
4190
- const fs = await import('fs')
4191
4732
  const fileContent = await fs.promises.readFile(this.normalizedFile, 'utf8')
4192
4733
  const lines = fileContent.split('\n').filter(line => line.trim())
4193
4734
 
@@ -4894,11 +5435,10 @@ class Database extends EventEmitter {
4894
5435
 
4895
5436
  this._offsetRecoveryInProgress = true
4896
5437
 
4897
- const fsModule = this._fsModule || (this._fsModule = await import('fs'))
4898
5438
  let fd
4899
5439
 
4900
5440
  try {
4901
- fd = await fsModule.promises.open(this.fileHandler.file, 'r')
5441
+ fd = await fs.promises.open(this.fileHandler.file, 'r')
4902
5442
  } catch (error) {
4903
5443
  this._offsetRecoveryInProgress = false
4904
5444
  if (this.opts.debugMode) {
@@ -5224,7 +5764,6 @@ class Database extends EventEmitter {
5224
5764
  const ranges = this.getRanges(map)
5225
5765
  const groupedRanges = await this.fileHandler.groupedRanges(ranges)
5226
5766
 
5227
- const fs = await import('fs')
5228
5767
  const fd = await fs.promises.open(this.fileHandler.file, 'r')
5229
5768
 
5230
5769
  try {
@@ -5770,7 +6309,6 @@ class Database extends EventEmitter {
5770
6309
  // If the .idx.jdb file exists and has data, and we're trying to save empty index,
5771
6310
  // skip the save to prevent corruption
5772
6311
  if (isEmpty && !this.offsets?.length) {
5773
- const fs = await import('fs')
5774
6312
  if (fs.existsSync(idxPath)) {
5775
6313
  try {
5776
6314
  const existingData = JSON.parse(await fs.promises.readFile(idxPath, 'utf8'))