jexidb 2.0.3 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/.babelrc +13 -0
  2. package/.gitattributes +2 -0
  3. package/CHANGELOG.md +132 -101
  4. package/LICENSE +21 -21
  5. package/README.md +301 -639
  6. package/babel.config.json +5 -0
  7. package/dist/Database.cjs +3896 -0
  8. package/docs/API.md +1051 -390
  9. package/docs/EXAMPLES.md +701 -177
  10. package/docs/README.md +194 -184
  11. package/examples/iterate-usage-example.js +157 -0
  12. package/examples/simple-iterate-example.js +115 -0
  13. package/jest.config.js +24 -0
  14. package/package.json +63 -54
  15. package/scripts/README.md +47 -0
  16. package/scripts/clean-test-files.js +75 -0
  17. package/scripts/prepare.js +31 -0
  18. package/scripts/run-tests.js +80 -0
  19. package/src/Database.mjs +4130 -0
  20. package/src/FileHandler.mjs +1101 -0
  21. package/src/OperationQueue.mjs +279 -0
  22. package/src/SchemaManager.mjs +268 -0
  23. package/src/Serializer.mjs +511 -0
  24. package/src/managers/ConcurrencyManager.mjs +257 -0
  25. package/src/managers/IndexManager.mjs +1403 -0
  26. package/src/managers/QueryManager.mjs +1273 -0
  27. package/src/managers/StatisticsManager.mjs +262 -0
  28. package/src/managers/StreamingProcessor.mjs +429 -0
  29. package/src/managers/TermManager.mjs +278 -0
  30. package/test/$not-operator-with-and.test.js +282 -0
  31. package/test/README.md +8 -0
  32. package/test/close-init-cycle.test.js +256 -0
  33. package/test/critical-bugs-fixes.test.js +1069 -0
  34. package/test/index-persistence.test.js +306 -0
  35. package/test/index-serialization.test.js +314 -0
  36. package/test/indexed-query-mode.test.js +360 -0
  37. package/test/iterate-method.test.js +272 -0
  38. package/test/query-operators.test.js +238 -0
  39. package/test/regex-array-fields.test.js +129 -0
  40. package/test/score-method.test.js +238 -0
  41. package/test/setup.js +17 -0
  42. package/test/term-mapping-minimal.test.js +154 -0
  43. package/test/term-mapping-simple.test.js +257 -0
  44. package/test/term-mapping.test.js +514 -0
  45. package/test/writebuffer-flush-resilience.test.js +204 -0
  46. package/dist/FileHandler.js +0 -688
  47. package/dist/IndexManager.js +0 -353
  48. package/dist/IntegrityChecker.js +0 -364
  49. package/dist/JSONLDatabase.js +0 -1333
  50. package/dist/index.js +0 -617
  51. package/docs/MIGRATION.md +0 -295
  52. package/examples/auto-save-example.js +0 -158
  53. package/examples/cjs-usage.cjs +0 -82
  54. package/examples/close-vs-delete-example.js +0 -71
  55. package/examples/esm-usage.js +0 -113
  56. package/examples/example-columns.idx.jdb +0 -0
  57. package/examples/example-columns.jdb +0 -9
  58. package/examples/example-options.idx.jdb +0 -0
  59. package/examples/example-options.jdb +0 -0
  60. package/examples/example-users.idx.jdb +0 -0
  61. package/examples/example-users.jdb +0 -5
  62. package/examples/simple-test.js +0 -55
  63. package/src/FileHandler.js +0 -674
  64. package/src/IndexManager.js +0 -363
  65. package/src/IntegrityChecker.js +0 -379
  66. package/src/JSONLDatabase.js +0 -1391
  67. package/src/index.js +0 -608
@@ -0,0 +1,511 @@
1
+ // NOTE: Buffer pool was removed due to complexity with low performance gain
2
+ // It was causing serialization issues and data corruption in batch operations
3
+ // If reintroducing buffer pooling in the future, ensure proper buffer management
4
+ // and avoid reusing buffers that may contain stale data
5
+
6
+ import SchemaManager from './SchemaManager.mjs'
7
+
8
+ export default class Serializer {
9
+ constructor(opts = {}) {
10
+ this.opts = Object.assign({
11
+ enableAdvancedSerialization: true,
12
+ enableArraySerialization: true
13
+ // NOTE: bufferPoolSize, adaptivePooling, memoryPressureThreshold removed
14
+ // Buffer pool was causing more problems than benefits
15
+ }, opts)
16
+
17
+ // Initialize schema manager for array-based serialization
18
+ this.schemaManager = new SchemaManager({
19
+ enableArraySerialization: this.opts.enableArraySerialization,
20
+ strictSchema: true,
21
+ debugMode: this.opts.debugMode || false
22
+ })
23
+
24
+
25
+
26
+ // Advanced serialization settings
27
+ this.serializationStats = {
28
+ totalSerializations: 0,
29
+ totalDeserializations: 0,
30
+ jsonSerializations: 0,
31
+ arraySerializations: 0,
32
+ objectSerializations: 0
33
+ }
34
+ }
35
+
36
+ /**
37
+ * Initialize schema for array-based serialization
38
+ */
39
+ initializeSchema(schemaOrData, autoDetect = false) {
40
+ this.schemaManager.initializeSchema(schemaOrData, autoDetect)
41
+ }
42
+
43
+ /**
44
+ * Get current schema
45
+ */
46
+ getSchema() {
47
+ return this.schemaManager.getSchema()
48
+ }
49
+
50
+ /**
51
+ * Convert object to array format for optimized serialization
52
+ */
53
+ convertToArrayFormat(obj) {
54
+ if (!this.opts.enableArraySerialization) {
55
+ return obj
56
+ }
57
+ return this.schemaManager.objectToArray(obj)
58
+ }
59
+
60
+ /**
61
+ * Convert array format back to object
62
+ */
63
+ convertFromArrayFormat(arr) {
64
+ if (!this.opts.enableArraySerialization) {
65
+ return arr
66
+ }
67
+ return this.schemaManager.arrayToObject(arr)
68
+ }
69
+
70
+ /**
71
+ * Advanced serialization with optimized JSON and buffer pooling
72
+ */
73
+ serialize(data, opts = {}) {
74
+ this.serializationStats.totalSerializations++
75
+ const addLinebreak = opts.linebreak !== false
76
+
77
+ // Convert to array format if enabled
78
+ const serializationData = this.convertToArrayFormat(data)
79
+
80
+ // Track conversion statistics
81
+ if (Array.isArray(serializationData) && typeof data === 'object' && data !== null) {
82
+ this.serializationStats.arraySerializations++
83
+ } else {
84
+ this.serializationStats.objectSerializations++
85
+ }
86
+
87
+ // Use advanced JSON serialization
88
+ if (this.opts.enableAdvancedSerialization) {
89
+ this.serializationStats.jsonSerializations++
90
+ return this.serializeAdvanced(serializationData, addLinebreak)
91
+ }
92
+
93
+ // Fallback to standard serialization
94
+ this.serializationStats.jsonSerializations++
95
+ return this.serializeStandard(serializationData, addLinebreak)
96
+ }
97
+
98
+
99
+
100
+ /**
101
+ * Advanced serialization with optimized JSON.stringify and buffer pooling
102
+ */
103
+ serializeAdvanced(data, addLinebreak) {
104
+ // Validate encoding before serialization
105
+ this.validateEncodingBeforeSerialization(data)
106
+
107
+ // Use optimized JSON.stringify without buffer pooling
108
+ // NOTE: Buffer pool removed - using direct Buffer creation for simplicity and reliability
109
+ const json = this.optimizedStringify(data)
110
+
111
+ // CRITICAL FIX: Normalize encoding before creating buffer
112
+ const normalizedJson = this.normalizeEncoding(json)
113
+ const jsonBuffer = Buffer.from(normalizedJson, 'utf8')
114
+
115
+ const totalLength = jsonBuffer.length + (addLinebreak ? 1 : 0)
116
+ const result = Buffer.allocUnsafe(totalLength)
117
+
118
+ jsonBuffer.copy(result, 0, 0, jsonBuffer.length)
119
+ if (addLinebreak) {
120
+ result[jsonBuffer.length] = 0x0A
121
+ }
122
+
123
+ return result
124
+ }
125
+
126
+ /**
127
+ * Proper encoding normalization with UTF-8 validation
128
+ * Fixed to prevent double-encoding and data corruption
129
+ */
130
+ normalizeEncoding(str) {
131
+ if (typeof str !== 'string') return str
132
+
133
+ // Skip if already valid UTF-8 (99% of cases)
134
+ if (this.isValidUTF8(str)) return str
135
+
136
+ // Try to detect and convert encoding safely
137
+ return this.safeConvertToUTF8(str)
138
+ }
139
+
140
+ /**
141
+ * Check if string is valid UTF-8
142
+ */
143
+ isValidUTF8(str) {
144
+ try {
145
+ // Test if string can be encoded and decoded as UTF-8 without loss
146
+ const encoded = Buffer.from(str, 'utf8')
147
+ const decoded = encoded.toString('utf8')
148
+ return decoded === str
149
+ } catch (error) {
150
+ return false
151
+ }
152
+ }
153
+
154
+ /**
155
+ * Safe conversion to UTF-8 with proper encoding detection
156
+ */
157
+ safeConvertToUTF8(str) {
158
+ // Try common encodings in order of likelihood
159
+ const encodings = ['utf8', 'latin1', 'utf16le', 'ascii']
160
+
161
+ for (const encoding of encodings) {
162
+ try {
163
+ const converted = Buffer.from(str, encoding).toString('utf8')
164
+
165
+ // Validate the conversion didn't lose information
166
+ if (this.isValidUTF8(converted)) {
167
+ return converted
168
+ }
169
+ } catch (error) {
170
+ // Try next encoding
171
+ continue
172
+ }
173
+ }
174
+
175
+ // Fallback: return original string (preserve data)
176
+ console.warn('JexiDB: Could not normalize encoding, preserving original string')
177
+ return str
178
+ }
179
+
180
+ /**
181
+ * Enhanced deep encoding normalization with UTF-8 validation
182
+ * Fixed to prevent double-encoding and data corruption
183
+ */
184
+ deepNormalizeEncoding(obj) {
185
+ if (obj === null || obj === undefined) return obj
186
+
187
+ if (typeof obj === 'string') {
188
+ return this.normalizeEncoding(obj)
189
+ }
190
+
191
+ if (Array.isArray(obj)) {
192
+ // Check if normalization is needed (performance optimization)
193
+ const needsNormalization = obj.some(item =>
194
+ typeof item === 'string' && !this.isValidUTF8(item)
195
+ )
196
+
197
+ if (!needsNormalization) return obj
198
+
199
+ return obj.map(item => this.deepNormalizeEncoding(item))
200
+ }
201
+
202
+ if (typeof obj === 'object') {
203
+ // Check if normalization is needed (performance optimization)
204
+ const needsNormalization = Object.values(obj).some(value =>
205
+ typeof value === 'string' && !this.isValidUTF8(value)
206
+ )
207
+
208
+ if (!needsNormalization) return obj
209
+
210
+ const normalized = {}
211
+ for (const [key, value] of Object.entries(obj)) {
212
+ normalized[key] = this.deepNormalizeEncoding(value)
213
+ }
214
+ return normalized
215
+ }
216
+
217
+ return obj
218
+ }
219
+
220
+ /**
221
+ * Validate encoding before serialization
222
+ */
223
+ validateEncodingBeforeSerialization(data) {
224
+ const issues = []
225
+
226
+ const checkString = (str, path = '') => {
227
+ if (typeof str === 'string' && !this.isValidUTF8(str)) {
228
+ issues.push(`Invalid encoding at ${path}: "${str.substring(0, 50)}..."`)
229
+ }
230
+ }
231
+
232
+ const traverse = (obj, path = '') => {
233
+ if (typeof obj === 'string') {
234
+ checkString(obj, path)
235
+ } else if (Array.isArray(obj)) {
236
+ obj.forEach((item, index) => {
237
+ traverse(item, `${path}[${index}]`)
238
+ })
239
+ } else if (obj && typeof obj === 'object') {
240
+ Object.entries(obj).forEach(([key, value]) => {
241
+ traverse(value, path ? `${path}.${key}` : key)
242
+ })
243
+ }
244
+ }
245
+
246
+ traverse(data)
247
+
248
+ if (issues.length > 0) {
249
+ console.warn('JexiDB: Encoding issues detected:', issues)
250
+ }
251
+
252
+ return issues.length === 0
253
+ }
254
+
255
+ /**
256
+ * Optimized JSON.stringify with fast paths for common data structures
257
+ * Now includes deep encoding normalization for all string fields
258
+ */
259
+ optimizedStringify(obj) {
260
+ // CRITICAL: Normalize encoding for all string fields before stringify
261
+ const normalizedObj = this.deepNormalizeEncoding(obj)
262
+
263
+ // Fast path for null and undefined
264
+ if (normalizedObj === null) return 'null'
265
+ if (normalizedObj === undefined) return 'null'
266
+
267
+ // Fast path for primitives
268
+ if (typeof normalizedObj === 'boolean') return normalizedObj ? 'true' : 'false'
269
+ if (typeof normalizedObj === 'number') return normalizedObj.toString()
270
+ if (typeof normalizedObj === 'string') {
271
+ // Fast path for simple strings (no escaping needed)
272
+ if (!/[\\"\u0000-\u001f]/.test(normalizedObj)) {
273
+ return '"' + normalizedObj + '"'
274
+ }
275
+ // Fall back to JSON.stringify for complex strings
276
+ return JSON.stringify(normalizedObj)
277
+ }
278
+
279
+ // Fast path for arrays
280
+ if (Array.isArray(normalizedObj)) {
281
+ if (normalizedObj.length === 0) return '[]'
282
+
283
+ // For arrays, always use JSON.stringify to avoid concatenation issues
284
+ return JSON.stringify(normalizedObj)
285
+ }
286
+
287
+ // Fast path for objects
288
+ if (typeof normalizedObj === 'object') {
289
+ const keys = Object.keys(normalizedObj)
290
+ if (keys.length === 0) return '{}'
291
+
292
+ // For objects, always use JSON.stringify to avoid concatenation issues
293
+ return JSON.stringify(normalizedObj)
294
+ }
295
+
296
+ // Fallback to JSON.stringify for unknown types
297
+ return JSON.stringify(normalizedObj)
298
+ }
299
+
300
+ /**
301
+ * Standard serialization (fallback)
302
+ */
303
+ serializeStandard(data, addLinebreak) {
304
+ // Validate encoding before serialization
305
+ this.validateEncodingBeforeSerialization(data)
306
+
307
+ // NOTE: Buffer pool removed - using direct Buffer creation for simplicity and reliability
308
+ // CRITICAL: Normalize encoding for all string fields before stringify
309
+ const normalizedData = this.deepNormalizeEncoding(data)
310
+ const json = JSON.stringify(normalizedData)
311
+
312
+ // CRITICAL FIX: Normalize encoding before creating buffer
313
+ const normalizedJson = this.normalizeEncoding(json)
314
+ const jsonBuffer = Buffer.from(normalizedJson, 'utf8')
315
+
316
+ const totalLength = jsonBuffer.length + (addLinebreak ? 1 : 0)
317
+ const result = Buffer.allocUnsafe(totalLength)
318
+
319
+ jsonBuffer.copy(result, 0, 0, jsonBuffer.length)
320
+ if (addLinebreak) {
321
+ result[jsonBuffer.length] = 0x0A
322
+ }
323
+
324
+ return result
325
+ }
326
+
327
+ /**
328
+ * Advanced deserialization with fast paths
329
+ */
330
+ deserialize(data) {
331
+ this.serializationStats.totalDeserializations++
332
+
333
+ if (data.length === 0) return null
334
+
335
+ try {
336
+ // Handle both Buffer and string inputs
337
+ let str
338
+ if (Buffer.isBuffer(data)) {
339
+ // Fast path: avoid toString() for empty data
340
+ if (data.length === 1 && data[0] === 0x0A) return null // Just newline
341
+ str = data.toString('utf8').trim()
342
+ } else if (typeof data === 'string') {
343
+ str = data.trim()
344
+ } else {
345
+ throw new Error('Invalid data type for deserialization')
346
+ }
347
+
348
+ const strLength = str.length
349
+
350
+ // Fast path for empty strings
351
+ if (strLength === 0) return null
352
+
353
+ // Parse JSON data
354
+ const parsedData = JSON.parse(str)
355
+
356
+ // Convert from array format back to object if needed
357
+ return this.convertFromArrayFormat(parsedData)
358
+ } catch (e) {
359
+ const str = Buffer.isBuffer(data) ? data.toString('utf8').trim() : data.trim()
360
+ throw new Error(`Failed to deserialize JSON data: "${str.substring(0, 100)}..." - ${e.message}`)
361
+ }
362
+ }
363
+
364
+ /**
365
+ * Batch serialization for multiple records
366
+ */
367
+ serializeBatch(dataArray, opts = {}) {
368
+ // Validate encoding before serialization
369
+ this.validateEncodingBeforeSerialization(dataArray)
370
+
371
+ // Convert all objects to array format for optimization
372
+ const convertedData = dataArray.map(data => this.convertToArrayFormat(data))
373
+
374
+ // Track conversion statistics
375
+ this.serializationStats.arraySerializations += convertedData.filter((item, index) =>
376
+ Array.isArray(item) && typeof dataArray[index] === 'object' && dataArray[index] !== null
377
+ ).length
378
+ this.serializationStats.objectSerializations += dataArray.length - this.serializationStats.arraySerializations
379
+
380
+ // JSONL format: serialize each array as a separate line
381
+ try {
382
+ const lines = []
383
+ for (const arrayData of convertedData) {
384
+ const json = this.optimizedStringify(arrayData)
385
+ const normalizedJson = this.normalizeEncoding(json)
386
+ lines.push(normalizedJson)
387
+ }
388
+
389
+ // Join all lines with newlines
390
+ const jsonlContent = lines.join('\n')
391
+ const jsonlBuffer = Buffer.from(jsonlContent, 'utf8')
392
+
393
+ // Add final linebreak if requested
394
+ const addLinebreak = opts.linebreak !== false
395
+ const totalLength = jsonlBuffer.length + (addLinebreak ? 1 : 0)
396
+ const result = Buffer.allocUnsafe(totalLength)
397
+
398
+ jsonlBuffer.copy(result, 0, 0, jsonlBuffer.length)
399
+ if (addLinebreak) {
400
+ result[jsonlBuffer.length] = 0x0A
401
+ }
402
+
403
+ return result
404
+ } catch (e) {
405
+ // Fallback to individual serialization if batch serialization fails
406
+ const results = []
407
+ const batchSize = opts.batchSize || 100
408
+
409
+ for (let i = 0; i < convertedData.length; i += batchSize) {
410
+ const batch = convertedData.slice(i, i + batchSize)
411
+ const batchResults = batch.map(data => this.serialize(data, opts))
412
+ results.push(...batchResults)
413
+ }
414
+
415
+ return results
416
+ }
417
+ }
418
+
419
+ /**
420
+ * Batch deserialization for multiple records
421
+ */
422
+ deserializeBatch(dataArray) {
423
+ // Optimization: try to parse all entries as a single JSON array first
424
+ // This is much faster than parsing each entry individually
425
+ try {
426
+ // Convert all entries to strings and join them as a single JSON array
427
+ const entriesJson = '[' + dataArray.map(data => {
428
+ if (Buffer.isBuffer(data)) {
429
+ return data.toString('utf8').trim()
430
+ } else if (typeof data === 'string') {
431
+ return data.trim()
432
+ } else {
433
+ throw new Error('Invalid data type for batch deserialization')
434
+ }
435
+ }).join(',') + ']'
436
+ const parsedResults = JSON.parse(entriesJson)
437
+
438
+ // Convert arrays back to objects if needed
439
+ const results = parsedResults.map(data => this.convertFromArrayFormat(data))
440
+
441
+ // Validate that all results are objects (JexiDB requirement)
442
+ if (Array.isArray(results) && results.every(item => item && typeof item === 'object')) {
443
+ return results
444
+ }
445
+
446
+ // If validation fails, fall back to individual parsing
447
+ throw new Error('Validation failed - not all entries are objects')
448
+ } catch (e) {
449
+ // Fallback to individual deserialization if batch parsing fails
450
+ const results = []
451
+ const batchSize = 100 // Process in batches to avoid blocking
452
+
453
+ for (let i = 0; i < dataArray.length; i += batchSize) {
454
+ const batch = dataArray.slice(i, i + batchSize)
455
+ const batchResults = batch.map(data => this.deserialize(data))
456
+ results.push(...batchResults)
457
+ }
458
+
459
+ return results
460
+ }
461
+ }
462
+
463
+ /**
464
+ * Check if data appears to be binary (always false since we only use JSON now)
465
+ */
466
+ isBinaryData(data) {
467
+ // All data is now JSON format
468
+ return false
469
+ }
470
+
471
+ /**
472
+ * Get comprehensive performance statistics
473
+ */
474
+ getStats() {
475
+ // NOTE: Buffer pool stats removed - buffer pool was causing more problems than benefits
476
+ return {
477
+ // Serialization stats
478
+ totalSerializations: this.serializationStats.totalSerializations,
479
+ totalDeserializations: this.serializationStats.totalDeserializations,
480
+ jsonSerializations: this.serializationStats.jsonSerializations,
481
+ arraySerializations: this.serializationStats.arraySerializations,
482
+ objectSerializations: this.serializationStats.objectSerializations,
483
+
484
+ // Configuration
485
+ enableAdvancedSerialization: this.opts.enableAdvancedSerialization,
486
+ enableArraySerialization: this.opts.enableArraySerialization,
487
+
488
+ // Schema stats
489
+ schemaStats: this.schemaManager.getStats()
490
+ }
491
+ }
492
+
493
+ /**
494
+ * Cleanup resources
495
+ */
496
+ cleanup() {
497
+ // NOTE: Buffer pool cleanup removed - buffer pool was causing more problems than benefits
498
+ this.serializationStats = {
499
+ totalSerializations: 0,
500
+ totalDeserializations: 0,
501
+ jsonSerializations: 0,
502
+ arraySerializations: 0,
503
+ objectSerializations: 0
504
+ }
505
+
506
+ // Reset schema manager
507
+ if (this.schemaManager) {
508
+ this.schemaManager.reset()
509
+ }
510
+ }
511
+ }