npm - jexidb - Versions diffs - 2.1.0 → 2.1.1 - Mend

jexidb 2.1.0 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/dist/Database.cjs +1642 -334
package/docs/API.md +1057 -1051
package/package.json +1 -1
package/scripts/benchmark-array-serialization.js +108 -0
package/scripts/score-mode-demo.js +45 -0
package/src/Database.mjs +1362 -167
package/src/FileHandler.mjs +83 -44
package/src/OperationQueue.mjs +23 -23
package/src/Serializer.mjs +214 -23
package/src/managers/IndexManager.mjs +778 -87
package/src/managers/QueryManager.mjs +266 -49
package/src/managers/TermManager.mjs +7 -7
package/src/utils/operatorNormalizer.mjs +116 -0
package/test/coverage-method.test.js +93 -0
package/test/deserialize-corruption-fixes.test.js +296 -0
package/test/exists-method.test.js +318 -0
package/test/explicit-indexes-comparison.test.js +219 -0
package/test/filehandler-non-adjacent-ranges-bug.test.js +175 -0
package/test/index-line-number-regression.test.js +100 -0
package/test/index-missing-index-data.test.js +91 -0
package/test/index-persistence.test.js +205 -20
package/test/insert-session-auto-flush.test.js +353 -0
package/test/legacy-operator-compat.test.js +154 -0
package/test/score-method.test.js +60 -0

package/src/FileHandler.mjs CHANGED Viewed

@@ -6,10 +6,12 @@ import pLimit from 'p-limit'
 export default class FileHandler {
   constructor(file, fileMutex = null, opts = {}) {
     this.file = file
-    this.indexFile = file.replace(/\.jdb$/, '.idx.jdb')
+    this.indexFile = file ? file.replace(/\.jdb$/, '.idx.jdb') : null
     this.fileMutex = fileMutex
     this.opts = opts
     this.maxBufferSize = opts.maxBufferSize || 4 * 1024 * 1024 // 4MB default
+    // Global I/O limiter to prevent file descriptor exhaustion in concurrent operations
+    this.readLimiter = pLimit(opts.maxConcurrentReads || 4)
   }
   async truncate(offset) {
@@ -174,7 +176,7 @@ export default class FileHandler {
   }
   async readRanges(ranges, mapper) {
-    const lines = {}, limit = pLimit(4)
+    const lines = {}
     // Check if file exists before trying to read it
     if (!await this.exists()) {
@@ -185,7 +187,7 @@ export default class FileHandler {
     const groupedRanges = await this.groupedRanges(ranges)
     try {
       await Promise.allSettled(groupedRanges.map(async (groupedRange) => {
-        await limit(async () => {
+        await this.readLimiter(async () => {
           for await (const row of this.readGroupedRange(groupedRange, fd)) {
             lines[row.start] = mapper ? (await mapper(row.line, { start: row.start, end: row.start + row.line.length })) : row.line
           }
@@ -255,6 +257,10 @@ export default class FileHandler {
         lineString = actualBuffer.toString('utf8', { replacement: '?' })
       }
+      // CRITICAL FIX: Remove trailing newlines and whitespace for single range too
+      // Optimized: Use trimEnd() which efficiently removes all trailing whitespace (faster than manual checks)
+      lineString = lineString.trimEnd()
       yield {
         line: lineString,
         start: range.start,
@@ -301,14 +307,42 @@ export default class FileHandler {
         }
       }
     } else {
-      // Original logic for non-adjacent ranges
+      // CRITICAL FIX: For non-adjacent ranges, use the range.end directly
+      // because range.end already excludes the newline (calculated as offsets[n+1] - 1)
+      // We just need to find the line start (beginning of the line in the buffer)
       for (let i = 0; i < groupedRange.length; i++) {
         const range = groupedRange[i]
         const relativeStart = range.start - firstRange.start
         const relativeEnd = range.end - firstRange.start
-        // Extract the specific range content
-        const rangeContent = content.substring(relativeStart, relativeEnd)
+        // OPTIMIZATION 2: Find line start only if necessary
+        // Check if we're already at a line boundary to avoid unnecessary backwards search
+        let lineStart = relativeStart
+        if (relativeStart > 0 && content[relativeStart - 1] !== '\n') {
+          // Only search backwards if we're not already at a line boundary
+          while (lineStart > 0 && content[lineStart - 1] !== '\n') {
+            lineStart--
+          }
+        }
+        // OPTIMIZATION 3: Use slice() instead of substring() for better performance
+        // CRITICAL FIX: range.end = offsets[n+1] - 1 points to the newline character
+        // slice(start, end) includes characters from start to end-1 (end is exclusive)
+        // So if relativeEnd points to the newline, slice will include it
+        let rangeContent = content.slice(lineStart, relativeEnd)
+        // OPTIMIZATION 4: Direct character check instead of regex/trimEnd
+        // Remove trailing newlines and whitespace efficiently
+        // trimEnd() is actually optimized in V8, but we can check if there's anything to trim first
+        const len = rangeContent.length
+        if (len > 0) {
+          // Quick check: if last char is not whitespace, skip trimEnd
+          const lastChar = rangeContent[len - 1]
+          if (lastChar === '\n' || lastChar === '\r' || lastChar === ' ' || lastChar === '\t') {
+            // Only call trimEnd if we detected trailing whitespace
+            rangeContent = rangeContent.trimEnd()
+          }
+        }
         if (rangeContent.length === 0) continue
@@ -543,45 +577,48 @@ export default class FileHandler {
   }
   async readLastLine() {
-    // Check if file exists before trying to read it
-    if (!await this.exists()) {
-      return null // Return null if file doesn't exist
-    }
-    const reader = await fs.promises.open(this.file, 'r')
-    try {
-      const { size } = await reader.stat()
-      if (size < 1) throw 'empty file'
-      this.size = size
-      const bufferSize = 16384
-      let buffer, isFirstRead = true, lastReadSize, readPosition = Math.max(size - bufferSize, 0)
-      while (readPosition >= 0) {
-        const readSize = Math.min(bufferSize, size - readPosition)
-        if (readSize !== lastReadSize) {
-          lastReadSize = readSize
-          buffer = Buffer.alloc(readSize)
-        }
-        const { bytesRead } = await reader.read(buffer, 0, isFirstRead ? (readSize - 1) : readSize, readPosition)
-        if (isFirstRead) isFirstRead = false
-        if (bytesRead === 0) break
-        const newlineIndex = buffer.lastIndexOf(10)
-        const start = readPosition + newlineIndex + 1
-        if (newlineIndex !== -1) {
-          const lastLine = Buffer.alloc(size - start)
-          await reader.read(lastLine, 0, size - start, start)
-          if (!lastLine || !lastLine.length) {
-            throw 'no metadata or empty file'
+    // Use global read limiter to prevent file descriptor exhaustion
+    return this.readLimiter(async () => {
+      // Check if file exists before trying to read it
+      if (!await this.exists()) {
+        return null // Return null if file doesn't exist
+      }
+      const reader = await fs.promises.open(this.file, 'r')
+      try {
+        const { size } = await reader.stat()
+        if (size < 1) throw 'empty file'
+        this.size = size
+        const bufferSize = 16384
+        let buffer, isFirstRead = true, lastReadSize, readPosition = Math.max(size - bufferSize, 0)
+        while (readPosition >= 0) {
+          const readSize = Math.min(bufferSize, size - readPosition)
+          if (readSize !== lastReadSize) {
+            lastReadSize = readSize
+            buffer = Buffer.alloc(readSize)
+          }
+          const { bytesRead } = await reader.read(buffer, 0, isFirstRead ? (readSize - 1) : readSize, readPosition)
+          if (isFirstRead) isFirstRead = false
+          if (bytesRead === 0) break
+          const newlineIndex = buffer.lastIndexOf(10)
+          const start = readPosition + newlineIndex + 1
+          if (newlineIndex !== -1) {
+            const lastLine = Buffer.alloc(size - start)
+            await reader.read(lastLine, 0, size - start, start)
+            if (!lastLine || !lastLine.length) {
+              throw 'no metadata or empty file'
+            }
+            return lastLine
+          } else {
+            readPosition -= bufferSize
           }
-          return lastLine
-        } else {
-          readPosition -= bufferSize
         }
+      } catch (e) {
+        String(e).includes('empty file') || console.error('Error reading last line:', e)
+      } finally {
+        reader.close()
       }
-    } catch (e) {
-      String(e).includes('empty file') || console.error('Error reading last line:', e)
-    } finally {
-      reader.close()
-    }
+    })
   }
   /**
@@ -597,10 +634,12 @@ export default class FileHandler {
       return this.fileMutex.runExclusive(async () => {
         // Add a small delay to ensure any pending operations complete
         await new Promise(resolve => setTimeout(resolve, 5));
-        return this._readWithStreamingInternal(criteria, options, matchesCriteria, serializer);
+        // Use global read limiter to prevent file descriptor exhaustion
+        return this.readLimiter(() => this._readWithStreamingInternal(criteria, options, matchesCriteria, serializer));
       });
     } else {
-      return this._readWithStreamingInternal(criteria, options, matchesCriteria, serializer);
+      // Use global read limiter to prevent file descriptor exhaustion
+      return this.readLimiter(() => this._readWithStreamingInternal(criteria, options, matchesCriteria, serializer));
     }
   }

package/src/OperationQueue.mjs CHANGED Viewed

@@ -1,6 +1,6 @@
 /**
- * OperationQueue - Sistema de fila para operações do banco de dados
- * Resolve race conditions entre operações concorrentes
+ * OperationQueue - Queue system for database operations
+ * Resolves race conditions between concurrent operations
  */
 export class OperationQueue {
@@ -20,9 +20,9 @@ export class OperationQueue {
   }
   /**
-   * Adiciona uma operação à fila
-   * @param {Function} operation - Função assíncrona a ser executada
-   * @returns {Promise} - Promise que resolve quando a operação é concluída
+   * Adds an operation to the queue
+   * @param {Function} operation - Asynchronous function to be executed
+   * @returns {Promise} - Promise that resolves when the operation is completed
    */
   async enqueue(operation) {
     const id = ++this.operationId
@@ -48,13 +48,13 @@ export class OperationQueue {
         startTime: Date.now()
       })
-      // Processar imediatamente se não estiver processando
+      // Process immediately if not already processing
       this.process().catch(reject)
     })
   }
   /**
-   * Processa todas as operações na fila sequencialmente
+   * Processes all operations in the queue sequentially
    */
   async process() {
     if (this.processing || this.queue.length === 0) {
@@ -116,9 +116,9 @@ export class OperationQueue {
   }
   /**
-   * Aguarda todas as operações pendentes serem processadas
-   * @param {number|null} maxWaitTime - Tempo máximo de espera em ms (null = wait indefinitely)
-   * @returns {Promise<boolean>} - true se todas foram processadas, false se timeout
+   * Waits for all pending operations to be processed
+   * @param {number|null} maxWaitTime - Maximum wait time in ms (null = wait indefinitely)
+   * @returns {Promise<boolean>} - true if all operations were processed, false if a timeout occurred
    */
   async waitForCompletion(maxWaitTime = 5000) {
     const startTime = Date.now()
@@ -167,21 +167,21 @@ export class OperationQueue {
   }
   /**
-   * Retorna o tamanho atual da fila
+   * Returns the current queue length
    */
   getQueueLength() {
     return this.queue.length
   }
   /**
-   * Verifica se está processando operações
+   * Checks whether operations are currently being processed
    */
   isProcessing() {
     return this.processing
   }
   /**
-   * Retorna estatísticas da fila
+   * Returns queue statistics
    */
   getStats() {
     return {
@@ -194,7 +194,7 @@ export class OperationQueue {
   }
   /**
-   * Limpa a fila (para casos de emergência)
+   * Clears the queue (for emergency situations)
    */
   clear() {
     const clearedCount = this.queue.length
@@ -208,9 +208,9 @@ export class OperationQueue {
   }
   /**
-   * Detecta operações travadas e retorna informações detalhadas
-   * @param {number} stuckThreshold - Tempo em ms para considerar uma operação travada
-   * @returns {Array} - Lista de operações travadas com stack traces
+   * Detects stuck operations and returns detailed information
+   * @param {number} stuckThreshold - Time in ms to consider an operation stuck
+   * @returns {Array} - List of stuck operations with stack traces
    */
   detectStuckOperations(stuckThreshold = 10000) {
     const now = Date.now()
@@ -225,15 +225,15 @@ export class OperationQueue {
   }
   /**
-   * Força a limpeza de operações travadas (último recurso)
-   * @param {number} stuckThreshold - Tempo em ms para considerar uma operação travada
-   * @returns {number} - Número de operações removidas
+   * Force-cleans stuck operations (last resort)
+   * @param {number} stuckThreshold - Time in ms to consider an operation stuck
+   * @returns {number} - Number of operations removed
    */
   forceCleanupStuckOperations(stuckThreshold = 10000) {
     const stuckOps = this.detectStuckOperations(stuckThreshold)
     if (stuckOps.length > 0) {
-      // Rejeitar todas as operações travadas
+      // Reject all stuck operations
       stuckOps.forEach(stuckOp => {
         const opIndex = this.queue.findIndex(op => op.id === stuckOp.id)
         if (opIndex !== -1) {
@@ -255,14 +255,14 @@ export class OperationQueue {
   }
   /**
-   * Verifica se a fila está vazia
+   * Checks whether the queue is empty
    */
   isEmpty() {
     return this.queue.length === 0
   }
   /**
-   * Retorna informações sobre a próxima operação na fila
+   * Returns information about the next operation in the queue
    */
   peekNext() {
     if (this.queue.length === 0) {

package/src/Serializer.mjs CHANGED Viewed

@@ -259,42 +259,69 @@ export default class Serializer {
   optimizedStringify(obj) {
     // CRITICAL: Normalize encoding for all string fields before stringify
     const normalizedObj = this.deepNormalizeEncoding(obj)
+    return this._stringifyNormalizedValue(normalizedObj)
+  }
+  _stringifyNormalizedValue(value) {
     // Fast path for null and undefined
-    if (normalizedObj === null) return 'null'
-    if (normalizedObj === undefined) return 'null'
+    if (value === null || value === undefined) {
+      return 'null'
+    }
+    const type = typeof value
     // Fast path for primitives
-    if (typeof normalizedObj === 'boolean') return normalizedObj ? 'true' : 'false'
-    if (typeof normalizedObj === 'number') return normalizedObj.toString()
-    if (typeof normalizedObj === 'string') {
+    if (type === 'boolean') {
+      return value ? 'true' : 'false'
+    }
+    if (type === 'number') {
+      return Number.isFinite(value) ? value.toString() : 'null'
+    }
+    if (type === 'string') {
       // Fast path for simple strings (no escaping needed)
-      if (!/[\\"\u0000-\u001f]/.test(normalizedObj)) {
-        return '"' + normalizedObj + '"'
+      if (!/[\\"\u0000-\u001f]/.test(value)) {
+        return '"' + value + '"'
       }
       // Fall back to JSON.stringify for complex strings
-      return JSON.stringify(normalizedObj)
+      return JSON.stringify(value)
     }
-    // Fast path for arrays
-    if (Array.isArray(normalizedObj)) {
-      if (normalizedObj.length === 0) return '[]'
-      // For arrays, always use JSON.stringify to avoid concatenation issues
-      return JSON.stringify(normalizedObj)
+    if (Array.isArray(value)) {
+      return this._stringifyNormalizedArray(value)
     }
-    // Fast path for objects
-    if (typeof normalizedObj === 'object') {
-      const keys = Object.keys(normalizedObj)
+    if (type === 'object') {
+      const keys = Object.keys(value)
       if (keys.length === 0) return '{}'
-      // For objects, always use JSON.stringify to avoid concatenation issues
-      return JSON.stringify(normalizedObj)
+      // Use native stringify for object to leverage stable handling of undefined, Dates, etc.
+      return JSON.stringify(value)
     }
-    // Fallback to JSON.stringify for unknown types
-    return JSON.stringify(normalizedObj)
+    // Fallback to JSON.stringify for unknown types (BigInt, symbols, etc.)
+    return JSON.stringify(value)
+  }
+  _stringifyNormalizedArray(arr) {
+    const length = arr.length
+    if (length === 0) return '[]'
+    let result = '['
+    for (let i = 0; i < length; i++) {
+      if (i > 0) result += ','
+      const element = arr[i]
+      // JSON spec: undefined, functions, and symbols are serialized as null within arrays
+      if (element === undefined || typeof element === 'function' || typeof element === 'symbol') {
+        result += 'null'
+        continue
+      }
+      result += this._stringifyNormalizedValue(element)
+    }
+    result += ']'
+    return result
   }
   /**
@@ -350,12 +377,176 @@ export default class Serializer {
       // Fast path for empty strings
       if (strLength === 0) return null
+      // CRITICAL FIX: Detect and handle multiple JSON objects in the same line
+      // This can happen if data was corrupted during concurrent writes or offset calculation errors
+      const firstBrace = str.indexOf('{')
+      const firstBracket = str.indexOf('[')
+      // Helper function to extract first complete JSON object/array from a string
+      // CRITICAL FIX: Must handle strings and escaped characters correctly
+      // to avoid counting braces/brackets that are inside string values
+      const extractFirstJson = (jsonStr, startChar) => {
+        if (startChar === '{') {
+          let braceCount = 0
+          let endPos = -1
+          let inString = false
+          let escapeNext = false
+          for (let i = 0; i < jsonStr.length; i++) {
+            const char = jsonStr[i]
+            if (escapeNext) {
+              escapeNext = false
+              continue
+            }
+            if (char === '\\') {
+              escapeNext = true
+              continue
+            }
+            if (char === '"' && !escapeNext) {
+              inString = !inString
+              continue
+            }
+            if (!inString) {
+              if (char === '{') braceCount++
+              if (char === '}') {
+                braceCount--
+                if (braceCount === 0) {
+                  endPos = i + 1
+                  break
+                }
+              }
+            }
+          }
+          return endPos > 0 ? jsonStr.substring(0, endPos) : null
+        } else if (startChar === '[') {
+          let bracketCount = 0
+          let endPos = -1
+          let inString = false
+          let escapeNext = false
+          for (let i = 0; i < jsonStr.length; i++) {
+            const char = jsonStr[i]
+            if (escapeNext) {
+              escapeNext = false
+              continue
+            }
+            if (char === '\\') {
+              escapeNext = true
+              continue
+            }
+            if (char === '"' && !escapeNext) {
+              inString = !inString
+              continue
+            }
+            if (!inString) {
+              if (char === '[') bracketCount++
+              if (char === ']') {
+                bracketCount--
+                if (bracketCount === 0) {
+                  endPos = i + 1
+                  break
+                }
+              }
+            }
+          }
+          return endPos > 0 ? jsonStr.substring(0, endPos) : null
+        }
+        return null
+      }
+      // Check if JSON starts at the beginning of the string
+      const jsonStartsAtZero = (firstBrace === 0) || (firstBracket === 0)
+      let hasValidJson = false
+      if (jsonStartsAtZero) {
+        // JSON starts at beginning - check for multiple JSON objects/arrays
+        if (firstBrace === 0) {
+          const secondBrace = str.indexOf('{', 1)
+          if (secondBrace !== -1) {
+            // Multiple objects detected - extract first
+            const extracted = extractFirstJson(str, '{')
+            if (extracted) {
+              str = extracted
+              hasValidJson = true
+              if (this.opts && this.opts.debugMode) {
+                console.warn(`⚠️ Deserialize: Multiple JSON objects detected, using first object only`)
+              }
+            }
+          } else {
+            hasValidJson = true // Single valid object starting at 0
+          }
+        } else if (firstBracket === 0) {
+          const secondBracket = str.indexOf('[', 1)
+          if (secondBracket !== -1) {
+            // Multiple arrays detected - extract first
+            const extracted = extractFirstJson(str, '[')
+            if (extracted) {
+              str = extracted
+              hasValidJson = true
+              if (this.opts && this.opts.debugMode) {
+                console.warn(`⚠️ Deserialize: Multiple JSON arrays detected, using first array only`)
+              }
+            }
+          } else {
+            hasValidJson = true // Single valid array starting at 0
+          }
+        }
+      } else {
+        // JSON doesn't start at beginning - try to find and extract first valid JSON
+        const jsonStart = firstBrace !== -1 ? (firstBracket !== -1 ? Math.min(firstBrace, firstBracket) : firstBrace) : firstBracket
+        if (jsonStart !== -1 && jsonStart > 0) {
+          // Found JSON but not at start - extract from that position
+          const jsonStr = str.substring(jsonStart)
+          const startChar = jsonStr[0]
+          const extracted = extractFirstJson(jsonStr, startChar)
+          if (extracted) {
+            str = extracted
+            hasValidJson = true
+            if (this.opts && this.opts.debugMode) {
+              console.warn(`⚠️ Deserialize: Found JSON after ${jsonStart} chars of invalid text, extracted first ${startChar === '{' ? 'object' : 'array'}`)
+            }
+          }
+        }
+      }
+      // CRITICAL FIX: If no valid JSON structure found, throw error before attempting parse
+      // This allows walk() and other callers to catch and skip invalid lines
+      if (!hasValidJson && firstBrace === -1 && firstBracket === -1) {
+        const errorStr = Buffer.isBuffer(data) ? data.toString('utf8').trim() : data.trim()
+        const error = new Error(`Failed to deserialize JSON data: No valid JSON structure found in "${errorStr.substring(0, 100)}..."`)
+        // Mark this as a "no valid JSON" error so it can be handled appropriately
+        error.noValidJson = true
+        throw error
+      }
+      // If we tried to extract but got nothing valid, also throw error
+      if (hasValidJson && (!str || str.trim().length === 0)) {
+        const error = new Error(`Failed to deserialize JSON data: Extracted JSON is empty`)
+        error.noValidJson = true
+        throw error
+      }
       // Parse JSON data
       const parsedData = JSON.parse(str)
       // Convert from array format back to object if needed
       return this.convertFromArrayFormat(parsedData)
     } catch (e) {
+      // If error was already formatted with noValidJson flag, re-throw as-is
+      if (e.noValidJson) {
+        throw e
+      }
+      // Otherwise, format the error message
       const str = Buffer.isBuffer(data) ? data.toString('utf8').trim() : data.trim()
       throw new Error(`Failed to deserialize JSON data: "${str.substring(0, 100)}..." - ${e.message}`)
     }