jexidb 2.1.8 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,285 +1,285 @@
1
- /**
2
- * TermManager - Manages term-to-ID mapping for efficient storage
3
- *
4
- * Responsibilities:
5
- * - Map terms to numeric IDs for space efficiency
6
- * - Track term usage counts for cleanup
7
- * - Load/save terms from/to index file
8
- * - Clean up orphaned terms
9
- */
10
- export default class TermManager {
11
- constructor() {
12
- this.termToId = new Map() // "bra" -> 1
13
- this.idToTerm = new Map() // 1 -> "bra"
14
- this.termCounts = new Map() // 1 -> 1500 (how many times used)
15
- this.nextId = 1
16
- }
17
-
18
- /**
19
- * Get ID for a term (create if doesn't exist)
20
- * @param {string} term - Term to get ID for
21
- * @returns {number} - Numeric ID for the term
22
- */
23
- getTermId(term) {
24
- if (this.termToId.has(term)) {
25
- const id = this.termToId.get(term)
26
- this.termCounts.set(id, (this.termCounts.get(id) || 0) + 1)
27
- return id
28
- }
29
-
30
- const id = this.nextId++
31
- this.termToId.set(term, id)
32
- this.idToTerm.set(id, term)
33
- this.termCounts.set(id, 1)
34
-
35
- return id
36
- }
37
-
38
- /**
39
- * Get term ID without incrementing count (for IndexManager use)
40
- * @param {string} term - Term to get ID for
41
- * @returns {number|undefined} - Numeric ID for the term, or undefined if not found
42
- * CRITICAL: Does NOT create new IDs - only returns existing ones
43
- * This prevents creating invalid term IDs during queries when terms haven't been loaded yet
44
- */
45
- getTermIdWithoutIncrement(term) {
46
- if (this.termToId.has(term)) {
47
- return this.termToId.get(term)
48
- }
49
-
50
- // CRITICAL FIX: Don't create new IDs during queries
51
- // If term doesn't exist, return undefined
52
- // This ensures queries only work with terms that were actually saved to the database
53
- return undefined
54
- }
55
-
56
- /**
57
- * Get term by ID
58
- * @param {number} id - Numeric ID
59
- * @returns {string|null} - Term or null if not found
60
- */
61
- getTerm(id) {
62
- return this.idToTerm.get(id) || null
63
- }
64
-
65
- /**
66
- * Bulk get term IDs for multiple terms (optimized for performance)
67
- * @param {string[]} terms - Array of terms to get IDs for
68
- * @returns {number[]} - Array of term IDs in the same order
69
- */
70
- bulkGetTermIds(terms) {
71
- if (!Array.isArray(terms) || terms.length === 0) {
72
- return []
73
- }
74
-
75
- const termIds = new Array(terms.length)
76
-
77
- // Process all terms in a single pass
78
- for (let i = 0; i < terms.length; i++) {
79
- const term = terms[i]
80
- if (this.termToId.has(term)) {
81
- const id = this.termToId.get(term)
82
- this.termCounts.set(id, (this.termCounts.get(id) || 0) + 1)
83
- termIds[i] = id
84
- } else {
85
- const id = this.nextId++
86
- this.termToId.set(term, id)
87
- this.idToTerm.set(id, term)
88
- this.termCounts.set(id, 1)
89
- termIds[i] = id
90
- }
91
- }
92
-
93
- return termIds
94
- }
95
-
96
- /**
97
- * Load terms from file data
98
- * @param {Object} termsData - Terms data from file
99
- */
100
- loadTerms(termsData) {
101
- if (!termsData || typeof termsData !== 'object') {
102
- return
103
- }
104
-
105
- for (const [id, term] of Object.entries(termsData)) {
106
- const numericId = parseInt(id)
107
- if (!isNaN(numericId) && term) {
108
- this.termToId.set(term, numericId)
109
- this.idToTerm.set(numericId, term)
110
- this.nextId = Math.max(this.nextId, numericId + 1)
111
- // Initialize count to 0 - will be updated as terms are used
112
- this.termCounts.set(numericId, 0)
113
- }
114
- }
115
- }
116
-
117
- /**
118
- * Save terms to file format
119
- * @returns {Object} - Terms data for file
120
- */
121
- saveTerms() {
122
- const termsData = {}
123
- for (const [id, term] of this.idToTerm) {
124
- termsData[id] = term
125
- }
126
- return termsData
127
- }
128
-
129
- /**
130
- * Clean up orphaned terms (terms with count 0)
131
- * @param {boolean} forceCleanup - Force cleanup even if conditions not met
132
- * @param {Object} options - Cleanup options
133
- * @returns {number} - Number of orphaned terms removed
134
- */
135
- cleanupOrphanedTerms(forceCleanup = false, options = {}) {
136
- const {
137
- intelligentCleanup = true,
138
- minOrphanCount = 10,
139
- orphanPercentage = 0.15,
140
- checkSystemState = true
141
- } = options
142
-
143
- // INTELLIGENT CLEANUP: Check if cleanup should be performed
144
- if (!forceCleanup && intelligentCleanup) {
145
- const stats = this.getStats()
146
- const orphanedCount = stats.orphanedTerms
147
- const totalTerms = stats.totalTerms
148
-
149
- // SAFETY: If all terms are marked as orphaned, it likely means counts
150
- // haven't been rebuilt after loading from disk. Skip cleanup to avoid
151
- // wiping valid term mappings.
152
- if (totalTerms > 0 && orphanedCount === totalTerms) {
153
- return 0
154
- }
155
-
156
- // Only cleanup if conditions are met
157
- const shouldCleanup = (
158
- orphanedCount >= minOrphanCount && // Minimum orphan count
159
- orphanedCount > totalTerms * orphanPercentage && // Orphans > percentage of total
160
- (!checkSystemState || this.isSystemSafe()) // System is safe (if check enabled)
161
- )
162
-
163
- if (!shouldCleanup) {
164
- return 0 // Don't cleanup if conditions not met
165
- }
166
- } else if (!forceCleanup) {
167
- return 0 // Don't remove anything during normal operations
168
- }
169
-
170
- // PERFORM CLEANUP: Remove orphaned terms
171
- const orphanedIds = []
172
-
173
- for (const [id, count] of this.termCounts) {
174
- if (count === 0) {
175
- orphanedIds.push(id)
176
- }
177
- }
178
-
179
- // Remove orphaned terms with additional safety checks
180
- for (const id of orphanedIds) {
181
- const term = this.idToTerm.get(id)
182
- if (term && typeof term === 'string') { // Extra safety: only remove string terms
183
- this.termToId.delete(term)
184
- this.idToTerm.delete(id)
185
- this.termCounts.delete(id)
186
- }
187
- }
188
-
189
- return orphanedIds.length
190
- }
191
-
192
- /**
193
- * Check if system is safe for cleanup operations
194
- * @returns {boolean} - True if system is safe for cleanup
195
- */
196
- isSystemSafe() {
197
- // This method should be overridden by the database instance
198
- // to provide system state information
199
- return true // Default to safe for backward compatibility
200
- }
201
-
202
- /**
203
- * Perform intelligent automatic cleanup
204
- * @param {Object} options - Cleanup options
205
- * @returns {number} - Number of orphaned terms removed
206
- */
207
- performIntelligentCleanup(options = {}) {
208
- return this.cleanupOrphanedTerms(false, {
209
- intelligentCleanup: true,
210
- minOrphanCount: 5, // Lower threshold for automatic cleanup
211
- orphanPercentage: 0.1, // 10% of total terms
212
- checkSystemState: true,
213
- ...options
214
- })
215
- }
216
-
217
- /**
218
- * Decrement term count (when term is removed from index)
219
- * @param {number} termId - Term ID to decrement
220
- */
221
- decrementTermCount(termId) {
222
- const count = this.termCounts.get(termId) || 0
223
- this.termCounts.set(termId, Math.max(0, count - 1))
224
- }
225
-
226
- /**
227
- * Increment term count (when term is added to index)
228
- * @param {number} termId - Term ID to increment
229
- */
230
- incrementTermCount(termId) {
231
- const count = this.termCounts.get(termId) || 0
232
- this.termCounts.set(termId, count + 1)
233
- }
234
-
235
- /**
236
- * Get statistics about terms
237
- * @returns {Object} - Term statistics
238
- */
239
- getStats() {
240
- return {
241
- totalTerms: this.termToId.size,
242
- nextId: this.nextId,
243
- orphanedTerms: Array.from(this.termCounts.entries()).filter(([_, count]) => count === 0).length
244
- }
245
- }
246
-
247
- /**
248
- * Check if a term exists
249
- * @param {string} term - Term to check
250
- * @returns {boolean} - True if term exists
251
- */
252
- hasTerm(term) {
253
- return this.termToId.has(term)
254
- }
255
-
256
- /**
257
- * Get all terms
258
- * @returns {Array} - Array of all terms
259
- */
260
- getAllTerms() {
261
- return Array.from(this.termToId.keys())
262
- }
263
-
264
- /**
265
- * Get all term IDs
266
- * @returns {Array} - Array of all term IDs
267
- */
268
- getAllTermIds() {
269
- return Array.from(this.idToTerm.keys())
270
- }
271
-
272
- /**
273
- * Get statistics about term mapping
274
- * @returns {Object} - Statistics object
275
- */
276
- getStatistics() {
277
- return {
278
- totalTerms: this.termToId.size,
279
- nextId: this.nextId,
280
- termCounts: Object.fromEntries(this.termCounts),
281
- sampleTerms: Array.from(this.termToId.entries()).slice(0, 5)
282
- }
283
- }
284
-
285
- }
1
+ /**
2
+ * TermManager - Manages term-to-ID mapping for efficient storage
3
+ *
4
+ * Responsibilities:
5
+ * - Map terms to numeric IDs for space efficiency
6
+ * - Track term usage counts for cleanup
7
+ * - Load/save terms from/to index file
8
+ * - Clean up orphaned terms
9
+ */
10
+ export default class TermManager {
11
+ constructor() {
12
+ this.termToId = new Map() // "bra" -> 1
13
+ this.idToTerm = new Map() // 1 -> "bra"
14
+ this.termCounts = new Map() // 1 -> 1500 (how many times used)
15
+ this.nextId = 1
16
+ }
17
+
18
+ /**
19
+ * Get ID for a term (create if doesn't exist)
20
+ * @param {string} term - Term to get ID for
21
+ * @returns {number} - Numeric ID for the term
22
+ */
23
+ getTermId(term) {
24
+ if (this.termToId.has(term)) {
25
+ const id = this.termToId.get(term)
26
+ this.termCounts.set(id, (this.termCounts.get(id) || 0) + 1)
27
+ return id
28
+ }
29
+
30
+ const id = this.nextId++
31
+ this.termToId.set(term, id)
32
+ this.idToTerm.set(id, term)
33
+ this.termCounts.set(id, 1)
34
+
35
+ return id
36
+ }
37
+
38
+ /**
39
+ * Get term ID without incrementing count (for IndexManager use)
40
+ * @param {string} term - Term to get ID for
41
+ * @returns {number|undefined} - Numeric ID for the term, or undefined if not found
42
+ * CRITICAL: Does NOT create new IDs - only returns existing ones
43
+ * This prevents creating invalid term IDs during queries when terms haven't been loaded yet
44
+ */
45
+ getTermIdWithoutIncrement(term) {
46
+ if (this.termToId.has(term)) {
47
+ return this.termToId.get(term)
48
+ }
49
+
50
+ // CRITICAL FIX: Don't create new IDs during queries
51
+ // If term doesn't exist, return undefined
52
+ // This ensures queries only work with terms that were actually saved to the database
53
+ return undefined
54
+ }
55
+
56
+ /**
57
+ * Get term by ID
58
+ * @param {number} id - Numeric ID
59
+ * @returns {string|null} - Term or null if not found
60
+ */
61
+ getTerm(id) {
62
+ return this.idToTerm.get(id) || null
63
+ }
64
+
65
+ /**
66
+ * Bulk get term IDs for multiple terms (optimized for performance)
67
+ * @param {string[]} terms - Array of terms to get IDs for
68
+ * @returns {number[]} - Array of term IDs in the same order
69
+ */
70
+ bulkGetTermIds(terms) {
71
+ if (!Array.isArray(terms) || terms.length === 0) {
72
+ return []
73
+ }
74
+
75
+ const termIds = new Array(terms.length)
76
+
77
+ // Process all terms in a single pass
78
+ for (let i = 0; i < terms.length; i++) {
79
+ const term = terms[i]
80
+ if (this.termToId.has(term)) {
81
+ const id = this.termToId.get(term)
82
+ this.termCounts.set(id, (this.termCounts.get(id) || 0) + 1)
83
+ termIds[i] = id
84
+ } else {
85
+ const id = this.nextId++
86
+ this.termToId.set(term, id)
87
+ this.idToTerm.set(id, term)
88
+ this.termCounts.set(id, 1)
89
+ termIds[i] = id
90
+ }
91
+ }
92
+
93
+ return termIds
94
+ }
95
+
96
+ /**
97
+ * Load terms from file data
98
+ * @param {Object} termsData - Terms data from file
99
+ */
100
+ loadTerms(termsData) {
101
+ if (!termsData || typeof termsData !== 'object') {
102
+ return
103
+ }
104
+
105
+ for (const [id, term] of Object.entries(termsData)) {
106
+ const numericId = parseInt(id)
107
+ if (!isNaN(numericId) && term) {
108
+ this.termToId.set(term, numericId)
109
+ this.idToTerm.set(numericId, term)
110
+ this.nextId = Math.max(this.nextId, numericId + 1)
111
+ // Initialize count to 0 - will be updated as terms are used
112
+ this.termCounts.set(numericId, 0)
113
+ }
114
+ }
115
+ }
116
+
117
+ /**
118
+ * Save terms to file format
119
+ * @returns {Object} - Terms data for file
120
+ */
121
+ saveTerms() {
122
+ const termsData = {}
123
+ for (const [id, term] of this.idToTerm) {
124
+ termsData[id] = term
125
+ }
126
+ return termsData
127
+ }
128
+
129
+ /**
130
+ * Clean up orphaned terms (terms with count 0)
131
+ * @param {boolean} forceCleanup - Force cleanup even if conditions not met
132
+ * @param {Object} options - Cleanup options
133
+ * @returns {number} - Number of orphaned terms removed
134
+ */
135
+ cleanupOrphanedTerms(forceCleanup = false, options = {}) {
136
+ const {
137
+ intelligentCleanup = true,
138
+ minOrphanCount = 10,
139
+ orphanPercentage = 0.15,
140
+ checkSystemState = true
141
+ } = options
142
+
143
+ // INTELLIGENT CLEANUP: Check if cleanup should be performed
144
+ if (!forceCleanup && intelligentCleanup) {
145
+ const stats = this.getStats()
146
+ const orphanedCount = stats.orphanedTerms
147
+ const totalTerms = stats.totalTerms
148
+
149
+ // SAFETY: If all terms are marked as orphaned, it likely means counts
150
+ // haven't been rebuilt after loading from disk. Skip cleanup to avoid
151
+ // wiping valid term mappings.
152
+ if (totalTerms > 0 && orphanedCount === totalTerms) {
153
+ return 0
154
+ }
155
+
156
+ // Only cleanup if conditions are met
157
+ const shouldCleanup = (
158
+ orphanedCount >= minOrphanCount && // Minimum orphan count
159
+ orphanedCount > totalTerms * orphanPercentage && // Orphans > percentage of total
160
+ (!checkSystemState || this.isSystemSafe()) // System is safe (if check enabled)
161
+ )
162
+
163
+ if (!shouldCleanup) {
164
+ return 0 // Don't cleanup if conditions not met
165
+ }
166
+ } else if (!forceCleanup) {
167
+ return 0 // Don't remove anything during normal operations
168
+ }
169
+
170
+ // PERFORM CLEANUP: Remove orphaned terms
171
+ const orphanedIds = []
172
+
173
+ for (const [id, count] of this.termCounts) {
174
+ if (count === 0) {
175
+ orphanedIds.push(id)
176
+ }
177
+ }
178
+
179
+ // Remove orphaned terms with additional safety checks
180
+ for (const id of orphanedIds) {
181
+ const term = this.idToTerm.get(id)
182
+ if (term && typeof term === 'string') { // Extra safety: only remove string terms
183
+ this.termToId.delete(term)
184
+ this.idToTerm.delete(id)
185
+ this.termCounts.delete(id)
186
+ }
187
+ }
188
+
189
+ return orphanedIds.length
190
+ }
191
+
192
+ /**
193
+ * Check if system is safe for cleanup operations
194
+ * @returns {boolean} - True if system is safe for cleanup
195
+ */
196
+ isSystemSafe() {
197
+ // This method should be overridden by the database instance
198
+ // to provide system state information
199
+ return true // Default to safe for backward compatibility
200
+ }
201
+
202
+ /**
203
+ * Perform intelligent automatic cleanup
204
+ * @param {Object} options - Cleanup options
205
+ * @returns {number} - Number of orphaned terms removed
206
+ */
207
+ performIntelligentCleanup(options = {}) {
208
+ return this.cleanupOrphanedTerms(false, {
209
+ intelligentCleanup: true,
210
+ minOrphanCount: 5, // Lower threshold for automatic cleanup
211
+ orphanPercentage: 0.1, // 10% of total terms
212
+ checkSystemState: true,
213
+ ...options
214
+ })
215
+ }
216
+
217
+ /**
218
+ * Decrement term count (when term is removed from index)
219
+ * @param {number} termId - Term ID to decrement
220
+ */
221
+ decrementTermCount(termId) {
222
+ const count = this.termCounts.get(termId) || 0
223
+ this.termCounts.set(termId, Math.max(0, count - 1))
224
+ }
225
+
226
+ /**
227
+ * Increment term count (when term is added to index)
228
+ * @param {number} termId - Term ID to increment
229
+ */
230
+ incrementTermCount(termId) {
231
+ const count = this.termCounts.get(termId) || 0
232
+ this.termCounts.set(termId, count + 1)
233
+ }
234
+
235
+ /**
236
+ * Get statistics about terms
237
+ * @returns {Object} - Term statistics
238
+ */
239
+ getStats() {
240
+ return {
241
+ totalTerms: this.termToId.size,
242
+ nextId: this.nextId,
243
+ orphanedTerms: Array.from(this.termCounts.entries()).filter(([_, count]) => count === 0).length
244
+ }
245
+ }
246
+
247
+ /**
248
+ * Check if a term exists
249
+ * @param {string} term - Term to check
250
+ * @returns {boolean} - True if term exists
251
+ */
252
+ hasTerm(term) {
253
+ return this.termToId.has(term)
254
+ }
255
+
256
+ /**
257
+ * Get all terms
258
+ * @returns {Array} - Array of all terms
259
+ */
260
+ getAllTerms() {
261
+ return Array.from(this.termToId.keys())
262
+ }
263
+
264
+ /**
265
+ * Get all term IDs
266
+ * @returns {Array} - Array of all term IDs
267
+ */
268
+ getAllTermIds() {
269
+ return Array.from(this.idToTerm.keys())
270
+ }
271
+
272
+ /**
273
+ * Get statistics about term mapping
274
+ * @returns {Object} - Statistics object
275
+ */
276
+ getStatistics() {
277
+ return {
278
+ totalTerms: this.termToId.size,
279
+ nextId: this.nextId,
280
+ termCounts: Object.fromEntries(this.termCounts),
281
+ sampleTerms: Array.from(this.termToId.entries()).slice(0, 5)
282
+ }
283
+ }
284
+
285
+ }