jexidb 2.0.2 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/.babelrc +13 -0
  2. package/.gitattributes +2 -0
  3. package/CHANGELOG.md +140 -0
  4. package/LICENSE +21 -21
  5. package/README.md +301 -527
  6. package/babel.config.json +5 -0
  7. package/dist/Database.cjs +3896 -0
  8. package/docs/API.md +1051 -0
  9. package/docs/EXAMPLES.md +701 -0
  10. package/docs/README.md +194 -0
  11. package/examples/iterate-usage-example.js +157 -0
  12. package/examples/simple-iterate-example.js +115 -0
  13. package/jest.config.js +24 -0
  14. package/package.json +63 -51
  15. package/scripts/README.md +47 -0
  16. package/scripts/clean-test-files.js +75 -0
  17. package/scripts/prepare.js +31 -0
  18. package/scripts/run-tests.js +80 -0
  19. package/src/Database.mjs +4130 -0
  20. package/src/FileHandler.mjs +1101 -0
  21. package/src/OperationQueue.mjs +279 -0
  22. package/src/SchemaManager.mjs +268 -0
  23. package/src/Serializer.mjs +511 -0
  24. package/src/managers/ConcurrencyManager.mjs +257 -0
  25. package/src/managers/IndexManager.mjs +1403 -0
  26. package/src/managers/QueryManager.mjs +1273 -0
  27. package/src/managers/StatisticsManager.mjs +262 -0
  28. package/src/managers/StreamingProcessor.mjs +429 -0
  29. package/src/managers/TermManager.mjs +278 -0
  30. package/test/$not-operator-with-and.test.js +282 -0
  31. package/test/README.md +8 -0
  32. package/test/close-init-cycle.test.js +256 -0
  33. package/test/critical-bugs-fixes.test.js +1069 -0
  34. package/test/index-persistence.test.js +306 -0
  35. package/test/index-serialization.test.js +314 -0
  36. package/test/indexed-query-mode.test.js +360 -0
  37. package/test/iterate-method.test.js +272 -0
  38. package/test/query-operators.test.js +238 -0
  39. package/test/regex-array-fields.test.js +129 -0
  40. package/test/score-method.test.js +238 -0
  41. package/test/setup.js +17 -0
  42. package/test/term-mapping-minimal.test.js +154 -0
  43. package/test/term-mapping-simple.test.js +257 -0
  44. package/test/term-mapping.test.js +514 -0
  45. package/test/writebuffer-flush-resilience.test.js +204 -0
  46. package/dist/FileHandler.js +0 -688
  47. package/dist/IndexManager.js +0 -353
  48. package/dist/IntegrityChecker.js +0 -364
  49. package/dist/JSONLDatabase.js +0 -1194
  50. package/dist/index.js +0 -617
  51. package/src/FileHandler.js +0 -674
  52. package/src/IndexManager.js +0 -363
  53. package/src/IntegrityChecker.js +0 -379
  54. package/src/JSONLDatabase.js +0 -1248
  55. package/src/index.js +0 -608
@@ -0,0 +1,278 @@
1
+ /**
2
+ * TermManager - Manages term-to-ID mapping for efficient storage
3
+ *
4
+ * Responsibilities:
5
+ * - Map terms to numeric IDs for space efficiency
6
+ * - Track term usage counts for cleanup
7
+ * - Load/save terms from/to index file
8
+ * - Clean up orphaned terms
9
+ */
10
+ export default class TermManager {
11
+ constructor() {
12
+ this.termToId = new Map() // "bra" -> 1
13
+ this.idToTerm = new Map() // 1 -> "bra"
14
+ this.termCounts = new Map() // 1 -> 1500 (how many times used)
15
+ this.nextId = 1
16
+ }
17
+
18
+ /**
19
+ * Get ID for a term (create if doesn't exist)
20
+ * @param {string} term - Term to get ID for
21
+ * @returns {number} - Numeric ID for the term
22
+ */
23
+ getTermId(term) {
24
+ if (this.termToId.has(term)) {
25
+ const id = this.termToId.get(term)
26
+ this.termCounts.set(id, (this.termCounts.get(id) || 0) + 1)
27
+ return id
28
+ }
29
+
30
+ const id = this.nextId++
31
+ this.termToId.set(term, id)
32
+ this.idToTerm.set(id, term)
33
+ this.termCounts.set(id, 1)
34
+
35
+ return id
36
+ }
37
+
38
+ /**
39
+ * Get term ID without incrementing count (for IndexManager use)
40
+ * @param {string} term - Term to get ID for
41
+ * @returns {number} - Numeric ID for the term
42
+ */
43
+ getTermIdWithoutIncrement(term) {
44
+ if (this.termToId.has(term)) {
45
+ return this.termToId.get(term)
46
+ }
47
+
48
+ const id = this.nextId++
49
+ this.termToId.set(term, id)
50
+ this.idToTerm.set(id, term)
51
+ this.termCounts.set(id, 0) // Start with 0 count
52
+
53
+ return id
54
+ }
55
+
56
+ /**
57
+ * Get term by ID
58
+ * @param {number} id - Numeric ID
59
+ * @returns {string|null} - Term or null if not found
60
+ */
61
+ getTerm(id) {
62
+ return this.idToTerm.get(id) || null
63
+ }
64
+
65
+ /**
66
+ * Bulk get term IDs for multiple terms (optimized for performance)
67
+ * @param {string[]} terms - Array of terms to get IDs for
68
+ * @returns {number[]} - Array of term IDs in the same order
69
+ */
70
+ bulkGetTermIds(terms) {
71
+ if (!Array.isArray(terms) || terms.length === 0) {
72
+ return []
73
+ }
74
+
75
+ const termIds = new Array(terms.length)
76
+
77
+ // Process all terms in a single pass
78
+ for (let i = 0; i < terms.length; i++) {
79
+ const term = terms[i]
80
+ if (this.termToId.has(term)) {
81
+ const id = this.termToId.get(term)
82
+ this.termCounts.set(id, (this.termCounts.get(id) || 0) + 1)
83
+ termIds[i] = id
84
+ } else {
85
+ const id = this.nextId++
86
+ this.termToId.set(term, id)
87
+ this.idToTerm.set(id, term)
88
+ this.termCounts.set(id, 1)
89
+ termIds[i] = id
90
+ }
91
+ }
92
+
93
+ return termIds
94
+ }
95
+
96
+ /**
97
+ * Load terms from file data
98
+ * @param {Object} termsData - Terms data from file
99
+ */
100
+ loadTerms(termsData) {
101
+ if (!termsData || typeof termsData !== 'object') {
102
+ return
103
+ }
104
+
105
+ for (const [id, term] of Object.entries(termsData)) {
106
+ const numericId = parseInt(id)
107
+ if (!isNaN(numericId) && term) {
108
+ this.termToId.set(term, numericId)
109
+ this.idToTerm.set(numericId, term)
110
+ this.nextId = Math.max(this.nextId, numericId + 1)
111
+ // Initialize count to 0 - will be updated as terms are used
112
+ this.termCounts.set(numericId, 0)
113
+ }
114
+ }
115
+ }
116
+
117
+ /**
118
+ * Save terms to file format
119
+ * @returns {Object} - Terms data for file
120
+ */
121
+ saveTerms() {
122
+ const termsData = {}
123
+ for (const [id, term] of this.idToTerm) {
124
+ termsData[id] = term
125
+ }
126
+ return termsData
127
+ }
128
+
129
+ /**
130
+ * Clean up orphaned terms (terms with count 0)
131
+ * @param {boolean} forceCleanup - Force cleanup even if conditions not met
132
+ * @param {Object} options - Cleanup options
133
+ * @returns {number} - Number of orphaned terms removed
134
+ */
135
+ cleanupOrphanedTerms(forceCleanup = false, options = {}) {
136
+ const {
137
+ intelligentCleanup = true,
138
+ minOrphanCount = 10,
139
+ orphanPercentage = 0.15,
140
+ checkSystemState = true
141
+ } = options
142
+
143
+ // INTELLIGENT CLEANUP: Check if cleanup should be performed
144
+ if (!forceCleanup && intelligentCleanup) {
145
+ const stats = this.getStats()
146
+ const orphanedCount = stats.orphanedTerms
147
+ const totalTerms = stats.totalTerms
148
+
149
+ // Only cleanup if conditions are met
150
+ const shouldCleanup = (
151
+ orphanedCount >= minOrphanCount && // Minimum orphan count
152
+ orphanedCount > totalTerms * orphanPercentage && // Orphans > percentage of total
153
+ (!checkSystemState || this.isSystemSafe()) // System is safe (if check enabled)
154
+ )
155
+
156
+ if (!shouldCleanup) {
157
+ return 0 // Don't cleanup if conditions not met
158
+ }
159
+ } else if (!forceCleanup) {
160
+ return 0 // Don't remove anything during normal operations
161
+ }
162
+
163
+ // PERFORM CLEANUP: Remove orphaned terms
164
+ const orphanedIds = []
165
+
166
+ for (const [id, count] of this.termCounts) {
167
+ if (count === 0) {
168
+ orphanedIds.push(id)
169
+ }
170
+ }
171
+
172
+ // Remove orphaned terms with additional safety checks
173
+ for (const id of orphanedIds) {
174
+ const term = this.idToTerm.get(id)
175
+ if (term && typeof term === 'string') { // Extra safety: only remove string terms
176
+ this.termToId.delete(term)
177
+ this.idToTerm.delete(id)
178
+ this.termCounts.delete(id)
179
+ }
180
+ }
181
+
182
+ return orphanedIds.length
183
+ }
184
+
185
+ /**
186
+ * Check if system is safe for cleanup operations
187
+ * @returns {boolean} - True if system is safe for cleanup
188
+ */
189
+ isSystemSafe() {
190
+ // This method should be overridden by the database instance
191
+ // to provide system state information
192
+ return true // Default to safe for backward compatibility
193
+ }
194
+
195
+ /**
196
+ * Perform intelligent automatic cleanup
197
+ * @param {Object} options - Cleanup options
198
+ * @returns {number} - Number of orphaned terms removed
199
+ */
200
+ performIntelligentCleanup(options = {}) {
201
+ return this.cleanupOrphanedTerms(false, {
202
+ intelligentCleanup: true,
203
+ minOrphanCount: 5, // Lower threshold for automatic cleanup
204
+ orphanPercentage: 0.1, // 10% of total terms
205
+ checkSystemState: true,
206
+ ...options
207
+ })
208
+ }
209
+
210
+ /**
211
+ * Decrement term count (when term is removed from index)
212
+ * @param {number} termId - Term ID to decrement
213
+ */
214
+ decrementTermCount(termId) {
215
+ const count = this.termCounts.get(termId) || 0
216
+ this.termCounts.set(termId, Math.max(0, count - 1))
217
+ }
218
+
219
+ /**
220
+ * Increment term count (when term is added to index)
221
+ * @param {number} termId - Term ID to increment
222
+ */
223
+ incrementTermCount(termId) {
224
+ const count = this.termCounts.get(termId) || 0
225
+ this.termCounts.set(termId, count + 1)
226
+ }
227
+
228
+ /**
229
+ * Get statistics about terms
230
+ * @returns {Object} - Term statistics
231
+ */
232
+ getStats() {
233
+ return {
234
+ totalTerms: this.termToId.size,
235
+ nextId: this.nextId,
236
+ orphanedTerms: Array.from(this.termCounts.entries()).filter(([_, count]) => count === 0).length
237
+ }
238
+ }
239
+
240
+ /**
241
+ * Check if a term exists
242
+ * @param {string} term - Term to check
243
+ * @returns {boolean} - True if term exists
244
+ */
245
+ hasTerm(term) {
246
+ return this.termToId.has(term)
247
+ }
248
+
249
+ /**
250
+ * Get all terms
251
+ * @returns {Array} - Array of all terms
252
+ */
253
+ getAllTerms() {
254
+ return Array.from(this.termToId.keys())
255
+ }
256
+
257
+ /**
258
+ * Get all term IDs
259
+ * @returns {Array} - Array of all term IDs
260
+ */
261
+ getAllTermIds() {
262
+ return Array.from(this.idToTerm.keys())
263
+ }
264
+
265
+ /**
266
+ * Get statistics about term mapping
267
+ * @returns {Object} - Statistics object
268
+ */
269
+ getStatistics() {
270
+ return {
271
+ totalTerms: this.termToId.size,
272
+ nextId: this.nextId,
273
+ termCounts: Object.fromEntries(this.termCounts),
274
+ sampleTerms: Array.from(this.termToId.entries()).slice(0, 5)
275
+ }
276
+ }
277
+
278
+ }
@@ -0,0 +1,282 @@
1
+ /**
2
+ * $not Operator with $and on Array Fields Test
3
+ *
4
+ * Bug Report: https://github.com/yourrepo/jexidb/issues/XXX
5
+ *
6
+ * Issue: When using $not with $and on array fields in strict mode,
7
+ * queries return empty results even when matching documents exist.
8
+ *
9
+ * Root Cause: IndexManager.query() did not handle the $not operator,
10
+ * treating it as an unknown field and returning an empty set.
11
+ * Additionally, when fields existed at both root level and inside $and,
12
+ * only the $and conditions were being processed.
13
+ *
14
+ * Fix: Added proper $not handling in IndexManager.query() that:
15
+ * 1. Gets all possible line numbers from database offsets
16
+ * 2. Queries for the $not condition
17
+ * 3. Returns the complement (all lines except those matching $not)
18
+ * 4. Intersects with other root-level conditions if present
19
+ * Also fixed $and to properly intersect with root-level fields.
20
+ */
21
+
22
+ import { Database } from '../src/Database.mjs'
23
+ import { describe, it, expect, beforeEach, afterEach } from '@jest/globals'
24
+ import fs from 'fs'
25
+
26
+ describe('$not Operator with $and on Array Fields', () => {
27
+ let db
28
+ const testFile = './test-files/not-operator-test.jdb'
29
+ const testIdxFile = './test-files/not-operator-test.idx.jdb'
30
+
31
+ beforeEach(async () => {
32
+ // Clean up test files
33
+ try {
34
+ if (fs.existsSync(testFile)) fs.unlinkSync(testFile)
35
+ if (fs.existsSync(testIdxFile)) fs.unlinkSync(testIdxFile)
36
+ } catch (err) {
37
+ // Ignore cleanup errors
38
+ }
39
+
40
+ // Create database with array field
41
+ db = new Database(testFile, {
42
+ clear: true,
43
+ create: true,
44
+ integrityCheck: 'none',
45
+ indexedQueryMode: 'strict',
46
+ fields: {
47
+ name: 'string',
48
+ nameTerms: 'array:string',
49
+ },
50
+ indexes: ['name', 'nameTerms']
51
+ })
52
+
53
+ await db.init()
54
+
55
+ // Insert test data
56
+ const testData = [
57
+ { name: 'SBT Nacional', nameTerms: ['sbt'] },
58
+ { name: 'SBT HD', nameTerms: ['sbt'] },
59
+ { name: 'SBT Radio', nameTerms: ['sbt', 'radio'] },
60
+ { name: 'SBT FM', nameTerms: ['sbt', 'fm'] },
61
+ { name: 'Radio FM', nameTerms: ['radio', 'fm'] },
62
+ { name: 'Globo', nameTerms: ['globo'] },
63
+ ]
64
+
65
+ for (const doc of testData) {
66
+ await db.insert(doc)
67
+ }
68
+
69
+ await db.flush()
70
+ await db.close()
71
+
72
+ // Re-open database
73
+ db = new Database(testFile, {
74
+ create: false,
75
+ integrityCheck: 'none',
76
+ indexedQueryMode: 'strict',
77
+ fields: {
78
+ name: 'string',
79
+ nameTerms: 'array:string',
80
+ },
81
+ indexes: ['name', 'nameTerms']
82
+ })
83
+
84
+ await db.init()
85
+ })
86
+
87
+ afterEach(async () => {
88
+ if (db && !db.destroyed) {
89
+ try {
90
+ await db.destroy()
91
+ } catch (err) {
92
+ // Ignore destroy errors
93
+ }
94
+ }
95
+
96
+ // Clean up test files
97
+ try {
98
+ if (fs.existsSync(testFile)) fs.unlinkSync(testFile)
99
+ if (fs.existsSync(testIdxFile)) fs.unlinkSync(testIdxFile)
100
+ } catch (err) {
101
+ // Ignore cleanup errors
102
+ }
103
+ })
104
+
105
+ it('should handle $not with $and (positive condition first)', async () => {
106
+ const query = {
107
+ $and: [
108
+ { nameTerms: { $in: ['sbt'] } },
109
+ { $not: { nameTerms: { $in: ['radio', 'fm'] } } }
110
+ ]
111
+ }
112
+
113
+ const results = await db.find(query)
114
+
115
+ expect(results).toHaveLength(2)
116
+ expect(results[0].name).toBe('SBT Nacional')
117
+ expect(results[1].name).toBe('SBT HD')
118
+ })
119
+
120
+ it('should handle $not with $and (negative condition first)', async () => {
121
+ const query = {
122
+ $and: [
123
+ { $not: { nameTerms: { $in: ['radio', 'fm'] } } },
124
+ { nameTerms: { $in: ['sbt'] } }
125
+ ]
126
+ }
127
+
128
+ const results = await db.find(query)
129
+
130
+ expect(results).toHaveLength(2)
131
+ expect(results[0].name).toBe('SBT Nacional')
132
+ expect(results[1].name).toBe('SBT HD')
133
+ })
134
+
135
+ it('should handle $not WITHOUT $and (root level)', async () => {
136
+ const query = {
137
+ nameTerms: { $in: ['sbt'] },
138
+ $not: { nameTerms: { $in: ['radio', 'fm'] } }
139
+ }
140
+
141
+ const results = await db.find(query)
142
+
143
+ expect(results).toHaveLength(2)
144
+ expect(results[0].name).toBe('SBT Nacional')
145
+ expect(results[1].name).toBe('SBT HD')
146
+ })
147
+
148
+ it('should handle multiple $not in $and with root-level field', async () => {
149
+ const query = {
150
+ nameTerms: { $in: ['sbt'] },
151
+ $and: [
152
+ { $not: { nameTerms: 'radio' } },
153
+ { $not: { nameTerms: 'fm' } }
154
+ ]
155
+ }
156
+
157
+ const results = await db.find(query)
158
+
159
+ expect(results).toHaveLength(2)
160
+ expect(results[0].name).toBe('SBT Nacional')
161
+ expect(results[1].name).toBe('SBT HD')
162
+ })
163
+
164
+ it('should handle $not with single value', async () => {
165
+ const query = {
166
+ $and: [
167
+ { nameTerms: { $in: ['sbt'] } },
168
+ { $not: { nameTerms: 'radio' } }
169
+ ]
170
+ }
171
+
172
+ const results = await db.find(query)
173
+
174
+ expect(results).toHaveLength(3)
175
+ const names = results.map(r => r.name).sort()
176
+ expect(names).toEqual(['SBT FM', 'SBT HD', 'SBT Nacional'])
177
+ })
178
+
179
+ it('should handle complex $not queries with multiple conditions', async () => {
180
+ const query = {
181
+ $and: [
182
+ { nameTerms: { $in: ['sbt', 'globo'] } },
183
+ { $not: { nameTerms: { $in: ['radio', 'fm'] } } }
184
+ ]
185
+ }
186
+
187
+ const results = await db.find(query)
188
+
189
+ expect(results).toHaveLength(3)
190
+ const names = results.map(r => r.name).sort()
191
+ expect(names).toEqual(['Globo', 'SBT HD', 'SBT Nacional'])
192
+ })
193
+
194
+ it('should handle $not that excludes all results', async () => {
195
+ const query = {
196
+ $and: [
197
+ { nameTerms: { $in: ['sbt'] } },
198
+ { $not: { nameTerms: 'sbt' } }
199
+ ]
200
+ }
201
+
202
+ const results = await db.find(query)
203
+
204
+ expect(results).toHaveLength(0)
205
+ })
206
+
207
+ it('should handle $not with non-existent values', async () => {
208
+ const query = {
209
+ $and: [
210
+ { nameTerms: { $in: ['sbt'] } },
211
+ { $not: { nameTerms: { $in: ['nonexistent', 'invalid'] } } }
212
+ ]
213
+ }
214
+
215
+ const results = await db.find(query)
216
+
217
+ expect(results).toHaveLength(4)
218
+ const names = results.map(r => r.name).sort()
219
+ expect(names).toEqual(['SBT FM', 'SBT HD', 'SBT Nacional', 'SBT Radio'])
220
+ })
221
+
222
+ it('should handle $nin operator in strict mode', async () => {
223
+ const query = {
224
+ nameTerms: { $nin: ['radio', 'fm'] }
225
+ }
226
+
227
+ const results = await db.find(query)
228
+
229
+ expect(results).toHaveLength(3)
230
+ const names = results.map(r => r.name).sort()
231
+ expect(names).toEqual(['Globo', 'SBT HD', 'SBT Nacional'])
232
+ })
233
+
234
+ it('should handle $nin with $in in strict mode', async () => {
235
+ const query = {
236
+ $and: [
237
+ { nameTerms: { $in: ['sbt'] } },
238
+ { nameTerms: { $nin: ['radio', 'fm'] } }
239
+ ]
240
+ }
241
+
242
+ const results = await db.find(query)
243
+
244
+ expect(results).toHaveLength(2)
245
+ expect(results[0].name).toBe('SBT Nacional')
246
+ expect(results[1].name).toBe('SBT HD')
247
+ })
248
+
249
+ it('should handle $nin with single value', async () => {
250
+ const query = {
251
+ nameTerms: { $nin: ['radio'] }
252
+ }
253
+
254
+ const results = await db.find(query)
255
+
256
+ expect(results).toHaveLength(4)
257
+ const names = results.map(r => r.name).sort()
258
+ expect(names).toEqual(['Globo', 'SBT FM', 'SBT HD', 'SBT Nacional'])
259
+ })
260
+
261
+ it('should produce same results for $nin and $not+$in', async () => {
262
+ // Query with $nin
263
+ const ninQuery = {
264
+ nameTerms: { $nin: ['radio', 'fm'] }
265
+ }
266
+
267
+ // Equivalent query with $not + $in
268
+ const notQuery = {
269
+ $not: { nameTerms: { $in: ['radio', 'fm'] } }
270
+ }
271
+
272
+ const ninResults = await db.find(ninQuery)
273
+ const notResults = await db.find(notQuery)
274
+
275
+ expect(ninResults).toHaveLength(notResults.length)
276
+
277
+ const ninNames = ninResults.map(r => r.name).sort()
278
+ const notNames = notResults.map(r => r.name).sort()
279
+ expect(ninNames).toEqual(notNames)
280
+ })
281
+ })
282
+
package/test/README.md ADDED
@@ -0,0 +1,8 @@
1
+ ## Test Results
2
+ The following are the results of the automated tests conducted on my PC for JSON format.
3
+
4
+ | Format | Size (bytes) | Time elapsed (ms) |
5
+ |-------------------------------|--------------|--------------------|
6
+ | JSON | 1117 | 21 |
7
+
8
+ JSON format provides universal compatibility across all environments and Node.js versions.