npm - jexidb - Versions diffs - 2.0.2 → 2.1.0 - Mend

jexidb 2.0.2 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

package/.babelrc +13 -0
package/.gitattributes +2 -0
package/CHANGELOG.md +140 -0
package/LICENSE +21 -21
package/README.md +301 -527
package/babel.config.json +5 -0
package/dist/Database.cjs +3896 -0
package/docs/API.md +1051 -0
package/docs/EXAMPLES.md +701 -0
package/docs/README.md +194 -0
package/examples/iterate-usage-example.js +157 -0
package/examples/simple-iterate-example.js +115 -0
package/jest.config.js +24 -0
package/package.json +63 -51
package/scripts/README.md +47 -0
package/scripts/clean-test-files.js +75 -0
package/scripts/prepare.js +31 -0
package/scripts/run-tests.js +80 -0
package/src/Database.mjs +4130 -0
package/src/FileHandler.mjs +1101 -0
package/src/OperationQueue.mjs +279 -0
package/src/SchemaManager.mjs +268 -0
package/src/Serializer.mjs +511 -0
package/src/managers/ConcurrencyManager.mjs +257 -0
package/src/managers/IndexManager.mjs +1403 -0
package/src/managers/QueryManager.mjs +1273 -0
package/src/managers/StatisticsManager.mjs +262 -0
package/src/managers/StreamingProcessor.mjs +429 -0
package/src/managers/TermManager.mjs +278 -0
package/test/$not-operator-with-and.test.js +282 -0
package/test/README.md +8 -0
package/test/close-init-cycle.test.js +256 -0
package/test/critical-bugs-fixes.test.js +1069 -0
package/test/index-persistence.test.js +306 -0
package/test/index-serialization.test.js +314 -0
package/test/indexed-query-mode.test.js +360 -0
package/test/iterate-method.test.js +272 -0
package/test/query-operators.test.js +238 -0
package/test/regex-array-fields.test.js +129 -0
package/test/score-method.test.js +238 -0
package/test/setup.js +17 -0
package/test/term-mapping-minimal.test.js +154 -0
package/test/term-mapping-simple.test.js +257 -0
package/test/term-mapping.test.js +514 -0
package/test/writebuffer-flush-resilience.test.js +204 -0
package/dist/FileHandler.js +0 -688
package/dist/IndexManager.js +0 -353
package/dist/IntegrityChecker.js +0 -364
package/dist/JSONLDatabase.js +0 -1194
package/dist/index.js +0 -617
package/src/FileHandler.js +0 -674
package/src/IndexManager.js +0 -363
package/src/IntegrityChecker.js +0 -379
package/src/JSONLDatabase.js +0 -1248
package/src/index.js +0 -608

package/src/managers/TermManager.mjs ADDED Viewed

@@ -0,0 +1,278 @@
+/**
+ * TermManager - Manages term-to-ID mapping for efficient storage
+ *
+ * Responsibilities:
+ * - Map terms to numeric IDs for space efficiency
+ * - Track term usage counts for cleanup
+ * - Load/save terms from/to index file
+ * - Clean up orphaned terms
+ */
+export default class TermManager {
+  constructor() {
+    this.termToId = new Map()     // "bra" -> 1
+    this.idToTerm = new Map()     // 1 -> "bra"
+    this.termCounts = new Map()   // 1 -> 1500 (how many times used)
+    this.nextId = 1
+  }
+  /**
+   * Get ID for a term (create if doesn't exist)
+   * @param {string} term - Term to get ID for
+   * @returns {number} - Numeric ID for the term
+   */
+  getTermId(term) {
+    if (this.termToId.has(term)) {
+      const id = this.termToId.get(term)
+      this.termCounts.set(id, (this.termCounts.get(id) || 0) + 1)
+      return id
+    }
+    const id = this.nextId++
+    this.termToId.set(term, id)
+    this.idToTerm.set(id, term)
+    this.termCounts.set(id, 1)
+    return id
+  }
+  /**
+   * Get term ID without incrementing count (for IndexManager use)
+   * @param {string} term - Term to get ID for
+   * @returns {number} - Numeric ID for the term
+   */
+  getTermIdWithoutIncrement(term) {
+    if (this.termToId.has(term)) {
+      return this.termToId.get(term)
+    }
+    const id = this.nextId++
+    this.termToId.set(term, id)
+    this.idToTerm.set(id, term)
+    this.termCounts.set(id, 0) // Start with 0 count
+    return id
+  }
+  /**
+   * Get term by ID
+   * @param {number} id - Numeric ID
+   * @returns {string|null} - Term or null if not found
+   */
+  getTerm(id) {
+    return this.idToTerm.get(id) || null
+  }
+  /**
+   * Bulk get term IDs for multiple terms (optimized for performance)
+   * @param {string[]} terms - Array of terms to get IDs for
+   * @returns {number[]} - Array of term IDs in the same order
+   */
+  bulkGetTermIds(terms) {
+    if (!Array.isArray(terms) || terms.length === 0) {
+      return []
+    }
+    const termIds = new Array(terms.length)
+    // Process all terms in a single pass
+    for (let i = 0; i < terms.length; i++) {
+      const term = terms[i]
+      if (this.termToId.has(term)) {
+        const id = this.termToId.get(term)
+        this.termCounts.set(id, (this.termCounts.get(id) || 0) + 1)
+        termIds[i] = id
+      } else {
+        const id = this.nextId++
+        this.termToId.set(term, id)
+        this.idToTerm.set(id, term)
+        this.termCounts.set(id, 1)
+        termIds[i] = id
+      }
+    }
+    return termIds
+  }
+  /**
+   * Load terms from file data
+   * @param {Object} termsData - Terms data from file
+   */
+  loadTerms(termsData) {
+    if (!termsData || typeof termsData !== 'object') {
+      return
+    }
+    for (const [id, term] of Object.entries(termsData)) {
+      const numericId = parseInt(id)
+      if (!isNaN(numericId) && term) {
+        this.termToId.set(term, numericId)
+        this.idToTerm.set(numericId, term)
+        this.nextId = Math.max(this.nextId, numericId + 1)
+        // Initialize count to 0 - will be updated as terms are used
+        this.termCounts.set(numericId, 0)
+      }
+    }
+  }
+  /**
+   * Save terms to file format
+   * @returns {Object} - Terms data for file
+   */
+  saveTerms() {
+    const termsData = {}
+    for (const [id, term] of this.idToTerm) {
+      termsData[id] = term
+    }
+    return termsData
+  }
+  /**
+   * Clean up orphaned terms (terms with count 0)
+   * @param {boolean} forceCleanup - Force cleanup even if conditions not met
+   * @param {Object} options - Cleanup options
+   * @returns {number} - Number of orphaned terms removed
+   */
+  cleanupOrphanedTerms(forceCleanup = false, options = {}) {
+    const {
+      intelligentCleanup = true,
+      minOrphanCount = 10,
+      orphanPercentage = 0.15,
+      checkSystemState = true
+    } = options
+    // INTELLIGENT CLEANUP: Check if cleanup should be performed
+    if (!forceCleanup && intelligentCleanup) {
+      const stats = this.getStats()
+      const orphanedCount = stats.orphanedTerms
+      const totalTerms = stats.totalTerms
+      // Only cleanup if conditions are met
+      const shouldCleanup = (
+        orphanedCount >= minOrphanCount &&           // Minimum orphan count
+        orphanedCount > totalTerms * orphanPercentage && // Orphans > percentage of total
+        (!checkSystemState || this.isSystemSafe())   // System is safe (if check enabled)
+      )
+      if (!shouldCleanup) {
+        return 0 // Don't cleanup if conditions not met
+      }
+    } else if (!forceCleanup) {
+      return 0 // Don't remove anything during normal operations
+    }
+    // PERFORM CLEANUP: Remove orphaned terms
+    const orphanedIds = []
+    for (const [id, count] of this.termCounts) {
+      if (count === 0) {
+        orphanedIds.push(id)
+      }
+    }
+    // Remove orphaned terms with additional safety checks
+    for (const id of orphanedIds) {
+      const term = this.idToTerm.get(id)
+      if (term && typeof term === 'string') { // Extra safety: only remove string terms
+        this.termToId.delete(term)
+        this.idToTerm.delete(id)
+        this.termCounts.delete(id)
+      }
+    }
+    return orphanedIds.length
+  }
+  /**
+   * Check if system is safe for cleanup operations
+   * @returns {boolean} - True if system is safe for cleanup
+   */
+  isSystemSafe() {
+    // This method should be overridden by the database instance
+    // to provide system state information
+    return true // Default to safe for backward compatibility
+  }
+  /**
+   * Perform intelligent automatic cleanup
+   * @param {Object} options - Cleanup options
+   * @returns {number} - Number of orphaned terms removed
+   */
+  performIntelligentCleanup(options = {}) {
+    return this.cleanupOrphanedTerms(false, {
+      intelligentCleanup: true,
+      minOrphanCount: 5,        // Lower threshold for automatic cleanup
+      orphanPercentage: 0.1,    // 10% of total terms
+      checkSystemState: true,
+      ...options
+    })
+  }
+  /**
+   * Decrement term count (when term is removed from index)
+   * @param {number} termId - Term ID to decrement
+   */
+  decrementTermCount(termId) {
+    const count = this.termCounts.get(termId) || 0
+    this.termCounts.set(termId, Math.max(0, count - 1))
+  }
+  /**
+   * Increment term count (when term is added to index)
+   * @param {number} termId - Term ID to increment
+   */
+  incrementTermCount(termId) {
+    const count = this.termCounts.get(termId) || 0
+    this.termCounts.set(termId, count + 1)
+  }
+  /**
+   * Get statistics about terms
+   * @returns {Object} - Term statistics
+   */
+  getStats() {
+    return {
+      totalTerms: this.termToId.size,
+      nextId: this.nextId,
+      orphanedTerms: Array.from(this.termCounts.entries()).filter(([_, count]) => count === 0).length
+    }
+  }
+  /**
+   * Check if a term exists
+   * @param {string} term - Term to check
+   * @returns {boolean} - True if term exists
+   */
+  hasTerm(term) {
+    return this.termToId.has(term)
+  }
+  /**
+   * Get all terms
+   * @returns {Array} - Array of all terms
+   */
+  getAllTerms() {
+    return Array.from(this.termToId.keys())
+  }
+  /**
+   * Get all term IDs
+   * @returns {Array} - Array of all term IDs
+   */
+  getAllTermIds() {
+    return Array.from(this.idToTerm.keys())
+  }
+  /**
+   * Get statistics about term mapping
+   * @returns {Object} - Statistics object
+   */
+  getStatistics() {
+    return {
+      totalTerms: this.termToId.size,
+      nextId: this.nextId,
+      termCounts: Object.fromEntries(this.termCounts),
+      sampleTerms: Array.from(this.termToId.entries()).slice(0, 5)
+    }
+  }
+}

package/test/$not-operator-with-and.test.js ADDED Viewed

@@ -0,0 +1,282 @@
+/**
+ * $not Operator with $and on Array Fields Test
+ *
+ * Bug Report: https://github.com/yourrepo/jexidb/issues/XXX
+ *
+ * Issue: When using $not with $and on array fields in strict mode,
+ * queries return empty results even when matching documents exist.
+ *
+ * Root Cause: IndexManager.query() did not handle the $not operator,
+ * treating it as an unknown field and returning an empty set.
+ * Additionally, when fields existed at both root level and inside $and,
+ * only the $and conditions were being processed.
+ *
+ * Fix: Added proper $not handling in IndexManager.query() that:
+ * 1. Gets all possible line numbers from database offsets
+ * 2. Queries for the $not condition
+ * 3. Returns the complement (all lines except those matching $not)
+ * 4. Intersects with other root-level conditions if present
+ * Also fixed $and to properly intersect with root-level fields.
+ */
+import { Database } from '../src/Database.mjs'
+import { describe, it, expect, beforeEach, afterEach } from '@jest/globals'
+import fs from 'fs'
+describe('$not Operator with $and on Array Fields', () => {
+  let db
+  const testFile = './test-files/not-operator-test.jdb'
+  const testIdxFile = './test-files/not-operator-test.idx.jdb'
+  beforeEach(async () => {
+    // Clean up test files
+    try {
+      if (fs.existsSync(testFile)) fs.unlinkSync(testFile)
+      if (fs.existsSync(testIdxFile)) fs.unlinkSync(testIdxFile)
+    } catch (err) {
+      // Ignore cleanup errors
+    }
+    // Create database with array field
+    db = new Database(testFile, {
+      clear: true,
+      create: true,
+      integrityCheck: 'none',
+      indexedQueryMode: 'strict',
+      fields: {
+        name: 'string',
+        nameTerms: 'array:string',
+      },
+      indexes: ['name', 'nameTerms']
+    })
+    await db.init()
+    // Insert test data
+    const testData = [
+      { name: 'SBT Nacional', nameTerms: ['sbt'] },
+      { name: 'SBT HD', nameTerms: ['sbt'] },
+      { name: 'SBT Radio', nameTerms: ['sbt', 'radio'] },
+      { name: 'SBT FM', nameTerms: ['sbt', 'fm'] },
+      { name: 'Radio FM', nameTerms: ['radio', 'fm'] },
+      { name: 'Globo', nameTerms: ['globo'] },
+    ]
+    for (const doc of testData) {
+      await db.insert(doc)
+    }
+    await db.flush()
+    await db.close()
+    // Re-open database
+    db = new Database(testFile, {
+      create: false,
+      integrityCheck: 'none',
+      indexedQueryMode: 'strict',
+      fields: {
+        name: 'string',
+        nameTerms: 'array:string',
+      },
+      indexes: ['name', 'nameTerms']
+    })
+    await db.init()
+  })
+  afterEach(async () => {
+    if (db && !db.destroyed) {
+      try {
+        await db.destroy()
+      } catch (err) {
+        // Ignore destroy errors
+      }
+    }
+    // Clean up test files
+    try {
+      if (fs.existsSync(testFile)) fs.unlinkSync(testFile)
+      if (fs.existsSync(testIdxFile)) fs.unlinkSync(testIdxFile)
+    } catch (err) {
+      // Ignore cleanup errors
+    }
+  })
+  it('should handle $not with $and (positive condition first)', async () => {
+    const query = {
+      $and: [
+        { nameTerms: { $in: ['sbt'] } },
+        { $not: { nameTerms: { $in: ['radio', 'fm'] } } }
+      ]
+    }
+    const results = await db.find(query)
+    expect(results).toHaveLength(2)
+    expect(results[0].name).toBe('SBT Nacional')
+    expect(results[1].name).toBe('SBT HD')
+  })
+  it('should handle $not with $and (negative condition first)', async () => {
+    const query = {
+      $and: [
+        { $not: { nameTerms: { $in: ['radio', 'fm'] } } },
+        { nameTerms: { $in: ['sbt'] } }
+      ]
+    }
+    const results = await db.find(query)
+    expect(results).toHaveLength(2)
+    expect(results[0].name).toBe('SBT Nacional')
+    expect(results[1].name).toBe('SBT HD')
+  })
+  it('should handle $not WITHOUT $and (root level)', async () => {
+    const query = {
+      nameTerms: { $in: ['sbt'] },
+      $not: { nameTerms: { $in: ['radio', 'fm'] } }
+    }
+    const results = await db.find(query)
+    expect(results).toHaveLength(2)
+    expect(results[0].name).toBe('SBT Nacional')
+    expect(results[1].name).toBe('SBT HD')
+  })
+  it('should handle multiple $not in $and with root-level field', async () => {
+    const query = {
+      nameTerms: { $in: ['sbt'] },
+      $and: [
+        { $not: { nameTerms: 'radio' } },
+        { $not: { nameTerms: 'fm' } }
+      ]
+    }
+    const results = await db.find(query)
+    expect(results).toHaveLength(2)
+    expect(results[0].name).toBe('SBT Nacional')
+    expect(results[1].name).toBe('SBT HD')
+  })
+  it('should handle $not with single value', async () => {
+    const query = {
+      $and: [
+        { nameTerms: { $in: ['sbt'] } },
+        { $not: { nameTerms: 'radio' } }
+      ]
+    }
+    const results = await db.find(query)
+    expect(results).toHaveLength(3)
+    const names = results.map(r => r.name).sort()
+    expect(names).toEqual(['SBT FM', 'SBT HD', 'SBT Nacional'])
+  })
+  it('should handle complex $not queries with multiple conditions', async () => {
+    const query = {
+      $and: [
+        { nameTerms: { $in: ['sbt', 'globo'] } },
+        { $not: { nameTerms: { $in: ['radio', 'fm'] } } }
+      ]
+    }
+    const results = await db.find(query)
+    expect(results).toHaveLength(3)
+    const names = results.map(r => r.name).sort()
+    expect(names).toEqual(['Globo', 'SBT HD', 'SBT Nacional'])
+  })
+  it('should handle $not that excludes all results', async () => {
+    const query = {
+      $and: [
+        { nameTerms: { $in: ['sbt'] } },
+        { $not: { nameTerms: 'sbt' } }
+      ]
+    }
+    const results = await db.find(query)
+    expect(results).toHaveLength(0)
+  })
+  it('should handle $not with non-existent values', async () => {
+    const query = {
+      $and: [
+        { nameTerms: { $in: ['sbt'] } },
+        { $not: { nameTerms: { $in: ['nonexistent', 'invalid'] } } }
+      ]
+    }
+    const results = await db.find(query)
+    expect(results).toHaveLength(4)
+    const names = results.map(r => r.name).sort()
+    expect(names).toEqual(['SBT FM', 'SBT HD', 'SBT Nacional', 'SBT Radio'])
+  })
+  it('should handle $nin operator in strict mode', async () => {
+    const query = {
+      nameTerms: { $nin: ['radio', 'fm'] }
+    }
+    const results = await db.find(query)
+    expect(results).toHaveLength(3)
+    const names = results.map(r => r.name).sort()
+    expect(names).toEqual(['Globo', 'SBT HD', 'SBT Nacional'])
+  })
+  it('should handle $nin with $in in strict mode', async () => {
+    const query = {
+      $and: [
+        { nameTerms: { $in: ['sbt'] } },
+        { nameTerms: { $nin: ['radio', 'fm'] } }
+      ]
+    }
+    const results = await db.find(query)
+    expect(results).toHaveLength(2)
+    expect(results[0].name).toBe('SBT Nacional')
+    expect(results[1].name).toBe('SBT HD')
+  })
+  it('should handle $nin with single value', async () => {
+    const query = {
+      nameTerms: { $nin: ['radio'] }
+    }
+    const results = await db.find(query)
+    expect(results).toHaveLength(4)
+    const names = results.map(r => r.name).sort()
+    expect(names).toEqual(['Globo', 'SBT FM', 'SBT HD', 'SBT Nacional'])
+  })
+  it('should produce same results for $nin and $not+$in', async () => {
+    // Query with $nin
+    const ninQuery = {
+      nameTerms: { $nin: ['radio', 'fm'] }
+    }
+    // Equivalent query with $not + $in
+    const notQuery = {
+      $not: { nameTerms: { $in: ['radio', 'fm'] } }
+    }
+    const ninResults = await db.find(ninQuery)
+    const notResults = await db.find(notQuery)
+    expect(ninResults).toHaveLength(notResults.length)
+    const ninNames = ninResults.map(r => r.name).sort()
+    const notNames = notResults.map(r => r.name).sort()
+    expect(ninNames).toEqual(notNames)
+  })
+})

package/test/README.md ADDED Viewed

@@ -0,0 +1,8 @@
+## Test Results
+The following are the results of the automated tests conducted on my PC for JSON format.
+| Format                         | Size (bytes) | Time elapsed (ms) |
+|-------------------------------|--------------|--------------------|
+| JSON                          | 1117         | 21                 |
+JSON format provides universal compatibility across all environments and Node.js versions.