jexidb 2.0.3 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.babelrc +13 -0
- package/.gitattributes +2 -0
- package/CHANGELOG.md +132 -101
- package/LICENSE +21 -21
- package/README.md +301 -639
- package/babel.config.json +5 -0
- package/dist/Database.cjs +5204 -0
- package/docs/API.md +908 -241
- package/docs/EXAMPLES.md +701 -177
- package/docs/README.md +194 -184
- package/examples/iterate-usage-example.js +157 -0
- package/examples/simple-iterate-example.js +115 -0
- package/jest.config.js +24 -0
- package/package.json +63 -54
- package/scripts/README.md +47 -0
- package/scripts/benchmark-array-serialization.js +108 -0
- package/scripts/clean-test-files.js +75 -0
- package/scripts/prepare.js +31 -0
- package/scripts/run-tests.js +80 -0
- package/scripts/score-mode-demo.js +45 -0
- package/src/Database.mjs +5325 -0
- package/src/FileHandler.mjs +1140 -0
- package/src/OperationQueue.mjs +279 -0
- package/src/SchemaManager.mjs +268 -0
- package/src/Serializer.mjs +702 -0
- package/src/managers/ConcurrencyManager.mjs +257 -0
- package/src/managers/IndexManager.mjs +2094 -0
- package/src/managers/QueryManager.mjs +1490 -0
- package/src/managers/StatisticsManager.mjs +262 -0
- package/src/managers/StreamingProcessor.mjs +429 -0
- package/src/managers/TermManager.mjs +278 -0
- package/src/utils/operatorNormalizer.mjs +116 -0
- package/test/$not-operator-with-and.test.js +282 -0
- package/test/README.md +8 -0
- package/test/close-init-cycle.test.js +256 -0
- package/test/coverage-method.test.js +93 -0
- package/test/critical-bugs-fixes.test.js +1069 -0
- package/test/deserialize-corruption-fixes.test.js +296 -0
- package/test/exists-method.test.js +318 -0
- package/test/explicit-indexes-comparison.test.js +219 -0
- package/test/filehandler-non-adjacent-ranges-bug.test.js +175 -0
- package/test/index-line-number-regression.test.js +100 -0
- package/test/index-missing-index-data.test.js +91 -0
- package/test/index-persistence.test.js +491 -0
- package/test/index-serialization.test.js +314 -0
- package/test/indexed-query-mode.test.js +360 -0
- package/test/insert-session-auto-flush.test.js +353 -0
- package/test/iterate-method.test.js +272 -0
- package/test/legacy-operator-compat.test.js +154 -0
- package/test/query-operators.test.js +238 -0
- package/test/regex-array-fields.test.js +129 -0
- package/test/score-method.test.js +298 -0
- package/test/setup.js +17 -0
- package/test/term-mapping-minimal.test.js +154 -0
- package/test/term-mapping-simple.test.js +257 -0
- package/test/term-mapping.test.js +514 -0
- package/test/writebuffer-flush-resilience.test.js +204 -0
- package/dist/FileHandler.js +0 -688
- package/dist/IndexManager.js +0 -353
- package/dist/IntegrityChecker.js +0 -364
- package/dist/JSONLDatabase.js +0 -1333
- package/dist/index.js +0 -617
- package/docs/MIGRATION.md +0 -295
- package/examples/auto-save-example.js +0 -158
- package/examples/cjs-usage.cjs +0 -82
- package/examples/close-vs-delete-example.js +0 -71
- package/examples/esm-usage.js +0 -113
- package/examples/example-columns.idx.jdb +0 -0
- package/examples/example-columns.jdb +0 -9
- package/examples/example-options.idx.jdb +0 -0
- package/examples/example-options.jdb +0 -0
- package/examples/example-users.idx.jdb +0 -0
- package/examples/example-users.jdb +0 -5
- package/examples/simple-test.js +0 -55
- package/src/FileHandler.js +0 -674
- package/src/IndexManager.js +0 -363
- package/src/IntegrityChecker.js +0 -379
- package/src/JSONLDatabase.js +0 -1391
- package/src/index.js +0 -608
|
@@ -0,0 +1,702 @@
|
|
|
1
|
+
// NOTE: Buffer pool was removed due to complexity with low performance gain
|
|
2
|
+
// It was causing serialization issues and data corruption in batch operations
|
|
3
|
+
// If reintroducing buffer pooling in the future, ensure proper buffer management
|
|
4
|
+
// and avoid reusing buffers that may contain stale data
|
|
5
|
+
|
|
6
|
+
import SchemaManager from './SchemaManager.mjs'
|
|
7
|
+
|
|
8
|
+
export default class Serializer {
|
|
9
|
+
constructor(opts = {}) {
|
|
10
|
+
this.opts = Object.assign({
|
|
11
|
+
enableAdvancedSerialization: true,
|
|
12
|
+
enableArraySerialization: true
|
|
13
|
+
// NOTE: bufferPoolSize, adaptivePooling, memoryPressureThreshold removed
|
|
14
|
+
// Buffer pool was causing more problems than benefits
|
|
15
|
+
}, opts)
|
|
16
|
+
|
|
17
|
+
// Initialize schema manager for array-based serialization
|
|
18
|
+
this.schemaManager = new SchemaManager({
|
|
19
|
+
enableArraySerialization: this.opts.enableArraySerialization,
|
|
20
|
+
strictSchema: true,
|
|
21
|
+
debugMode: this.opts.debugMode || false
|
|
22
|
+
})
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
// Advanced serialization settings
|
|
27
|
+
this.serializationStats = {
|
|
28
|
+
totalSerializations: 0,
|
|
29
|
+
totalDeserializations: 0,
|
|
30
|
+
jsonSerializations: 0,
|
|
31
|
+
arraySerializations: 0,
|
|
32
|
+
objectSerializations: 0
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Initialize schema for array-based serialization
|
|
38
|
+
*/
|
|
39
|
+
initializeSchema(schemaOrData, autoDetect = false) {
|
|
40
|
+
this.schemaManager.initializeSchema(schemaOrData, autoDetect)
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Get current schema
|
|
45
|
+
*/
|
|
46
|
+
getSchema() {
|
|
47
|
+
return this.schemaManager.getSchema()
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Convert object to array format for optimized serialization
|
|
52
|
+
*/
|
|
53
|
+
convertToArrayFormat(obj) {
|
|
54
|
+
if (!this.opts.enableArraySerialization) {
|
|
55
|
+
return obj
|
|
56
|
+
}
|
|
57
|
+
return this.schemaManager.objectToArray(obj)
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Convert array format back to object
|
|
62
|
+
*/
|
|
63
|
+
convertFromArrayFormat(arr) {
|
|
64
|
+
if (!this.opts.enableArraySerialization) {
|
|
65
|
+
return arr
|
|
66
|
+
}
|
|
67
|
+
return this.schemaManager.arrayToObject(arr)
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Advanced serialization with optimized JSON and buffer pooling
|
|
72
|
+
*/
|
|
73
|
+
serialize(data, opts = {}) {
|
|
74
|
+
this.serializationStats.totalSerializations++
|
|
75
|
+
const addLinebreak = opts.linebreak !== false
|
|
76
|
+
|
|
77
|
+
// Convert to array format if enabled
|
|
78
|
+
const serializationData = this.convertToArrayFormat(data)
|
|
79
|
+
|
|
80
|
+
// Track conversion statistics
|
|
81
|
+
if (Array.isArray(serializationData) && typeof data === 'object' && data !== null) {
|
|
82
|
+
this.serializationStats.arraySerializations++
|
|
83
|
+
} else {
|
|
84
|
+
this.serializationStats.objectSerializations++
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Use advanced JSON serialization
|
|
88
|
+
if (this.opts.enableAdvancedSerialization) {
|
|
89
|
+
this.serializationStats.jsonSerializations++
|
|
90
|
+
return this.serializeAdvanced(serializationData, addLinebreak)
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Fallback to standard serialization
|
|
94
|
+
this.serializationStats.jsonSerializations++
|
|
95
|
+
return this.serializeStandard(serializationData, addLinebreak)
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Advanced serialization with optimized JSON.stringify and buffer pooling
|
|
102
|
+
*/
|
|
103
|
+
serializeAdvanced(data, addLinebreak) {
|
|
104
|
+
// Validate encoding before serialization
|
|
105
|
+
this.validateEncodingBeforeSerialization(data)
|
|
106
|
+
|
|
107
|
+
// Use optimized JSON.stringify without buffer pooling
|
|
108
|
+
// NOTE: Buffer pool removed - using direct Buffer creation for simplicity and reliability
|
|
109
|
+
const json = this.optimizedStringify(data)
|
|
110
|
+
|
|
111
|
+
// CRITICAL FIX: Normalize encoding before creating buffer
|
|
112
|
+
const normalizedJson = this.normalizeEncoding(json)
|
|
113
|
+
const jsonBuffer = Buffer.from(normalizedJson, 'utf8')
|
|
114
|
+
|
|
115
|
+
const totalLength = jsonBuffer.length + (addLinebreak ? 1 : 0)
|
|
116
|
+
const result = Buffer.allocUnsafe(totalLength)
|
|
117
|
+
|
|
118
|
+
jsonBuffer.copy(result, 0, 0, jsonBuffer.length)
|
|
119
|
+
if (addLinebreak) {
|
|
120
|
+
result[jsonBuffer.length] = 0x0A
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
return result
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Proper encoding normalization with UTF-8 validation
|
|
128
|
+
* Fixed to prevent double-encoding and data corruption
|
|
129
|
+
*/
|
|
130
|
+
normalizeEncoding(str) {
|
|
131
|
+
if (typeof str !== 'string') return str
|
|
132
|
+
|
|
133
|
+
// Skip if already valid UTF-8 (99% of cases)
|
|
134
|
+
if (this.isValidUTF8(str)) return str
|
|
135
|
+
|
|
136
|
+
// Try to detect and convert encoding safely
|
|
137
|
+
return this.safeConvertToUTF8(str)
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Check if string is valid UTF-8
|
|
142
|
+
*/
|
|
143
|
+
isValidUTF8(str) {
|
|
144
|
+
try {
|
|
145
|
+
// Test if string can be encoded and decoded as UTF-8 without loss
|
|
146
|
+
const encoded = Buffer.from(str, 'utf8')
|
|
147
|
+
const decoded = encoded.toString('utf8')
|
|
148
|
+
return decoded === str
|
|
149
|
+
} catch (error) {
|
|
150
|
+
return false
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Safe conversion to UTF-8 with proper encoding detection
|
|
156
|
+
*/
|
|
157
|
+
safeConvertToUTF8(str) {
|
|
158
|
+
// Try common encodings in order of likelihood
|
|
159
|
+
const encodings = ['utf8', 'latin1', 'utf16le', 'ascii']
|
|
160
|
+
|
|
161
|
+
for (const encoding of encodings) {
|
|
162
|
+
try {
|
|
163
|
+
const converted = Buffer.from(str, encoding).toString('utf8')
|
|
164
|
+
|
|
165
|
+
// Validate the conversion didn't lose information
|
|
166
|
+
if (this.isValidUTF8(converted)) {
|
|
167
|
+
return converted
|
|
168
|
+
}
|
|
169
|
+
} catch (error) {
|
|
170
|
+
// Try next encoding
|
|
171
|
+
continue
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Fallback: return original string (preserve data)
|
|
176
|
+
console.warn('JexiDB: Could not normalize encoding, preserving original string')
|
|
177
|
+
return str
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Enhanced deep encoding normalization with UTF-8 validation
|
|
182
|
+
* Fixed to prevent double-encoding and data corruption
|
|
183
|
+
*/
|
|
184
|
+
deepNormalizeEncoding(obj) {
|
|
185
|
+
if (obj === null || obj === undefined) return obj
|
|
186
|
+
|
|
187
|
+
if (typeof obj === 'string') {
|
|
188
|
+
return this.normalizeEncoding(obj)
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
if (Array.isArray(obj)) {
|
|
192
|
+
// Check if normalization is needed (performance optimization)
|
|
193
|
+
const needsNormalization = obj.some(item =>
|
|
194
|
+
typeof item === 'string' && !this.isValidUTF8(item)
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
if (!needsNormalization) return obj
|
|
198
|
+
|
|
199
|
+
return obj.map(item => this.deepNormalizeEncoding(item))
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
if (typeof obj === 'object') {
|
|
203
|
+
// Check if normalization is needed (performance optimization)
|
|
204
|
+
const needsNormalization = Object.values(obj).some(value =>
|
|
205
|
+
typeof value === 'string' && !this.isValidUTF8(value)
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
if (!needsNormalization) return obj
|
|
209
|
+
|
|
210
|
+
const normalized = {}
|
|
211
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
212
|
+
normalized[key] = this.deepNormalizeEncoding(value)
|
|
213
|
+
}
|
|
214
|
+
return normalized
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
return obj
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* Validate encoding before serialization
|
|
222
|
+
*/
|
|
223
|
+
validateEncodingBeforeSerialization(data) {
|
|
224
|
+
const issues = []
|
|
225
|
+
|
|
226
|
+
const checkString = (str, path = '') => {
|
|
227
|
+
if (typeof str === 'string' && !this.isValidUTF8(str)) {
|
|
228
|
+
issues.push(`Invalid encoding at ${path}: "${str.substring(0, 50)}..."`)
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
const traverse = (obj, path = '') => {
|
|
233
|
+
if (typeof obj === 'string') {
|
|
234
|
+
checkString(obj, path)
|
|
235
|
+
} else if (Array.isArray(obj)) {
|
|
236
|
+
obj.forEach((item, index) => {
|
|
237
|
+
traverse(item, `${path}[${index}]`)
|
|
238
|
+
})
|
|
239
|
+
} else if (obj && typeof obj === 'object') {
|
|
240
|
+
Object.entries(obj).forEach(([key, value]) => {
|
|
241
|
+
traverse(value, path ? `${path}.${key}` : key)
|
|
242
|
+
})
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
traverse(data)
|
|
247
|
+
|
|
248
|
+
if (issues.length > 0) {
|
|
249
|
+
console.warn('JexiDB: Encoding issues detected:', issues)
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
return issues.length === 0
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
/**
|
|
256
|
+
* Optimized JSON.stringify with fast paths for common data structures
|
|
257
|
+
* Now includes deep encoding normalization for all string fields
|
|
258
|
+
*/
|
|
259
|
+
optimizedStringify(obj) {
|
|
260
|
+
// CRITICAL: Normalize encoding for all string fields before stringify
|
|
261
|
+
const normalizedObj = this.deepNormalizeEncoding(obj)
|
|
262
|
+
return this._stringifyNormalizedValue(normalizedObj)
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
_stringifyNormalizedValue(value) {
|
|
266
|
+
// Fast path for null and undefined
|
|
267
|
+
if (value === null || value === undefined) {
|
|
268
|
+
return 'null'
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
const type = typeof value
|
|
272
|
+
|
|
273
|
+
// Fast path for primitives
|
|
274
|
+
if (type === 'boolean') {
|
|
275
|
+
return value ? 'true' : 'false'
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
if (type === 'number') {
|
|
279
|
+
return Number.isFinite(value) ? value.toString() : 'null'
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
if (type === 'string') {
|
|
283
|
+
// Fast path for simple strings (no escaping needed)
|
|
284
|
+
if (!/[\\"\u0000-\u001f]/.test(value)) {
|
|
285
|
+
return '"' + value + '"'
|
|
286
|
+
}
|
|
287
|
+
// Fall back to JSON.stringify for complex strings
|
|
288
|
+
return JSON.stringify(value)
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
if (Array.isArray(value)) {
|
|
292
|
+
return this._stringifyNormalizedArray(value)
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
if (type === 'object') {
|
|
296
|
+
const keys = Object.keys(value)
|
|
297
|
+
if (keys.length === 0) return '{}'
|
|
298
|
+
// Use native stringify for object to leverage stable handling of undefined, Dates, etc.
|
|
299
|
+
return JSON.stringify(value)
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// Fallback to JSON.stringify for unknown types (BigInt, symbols, etc.)
|
|
303
|
+
return JSON.stringify(value)
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
_stringifyNormalizedArray(arr) {
|
|
307
|
+
const length = arr.length
|
|
308
|
+
if (length === 0) return '[]'
|
|
309
|
+
|
|
310
|
+
let result = '['
|
|
311
|
+
for (let i = 0; i < length; i++) {
|
|
312
|
+
if (i > 0) result += ','
|
|
313
|
+
const element = arr[i]
|
|
314
|
+
|
|
315
|
+
// JSON spec: undefined, functions, and symbols are serialized as null within arrays
|
|
316
|
+
if (element === undefined || typeof element === 'function' || typeof element === 'symbol') {
|
|
317
|
+
result += 'null'
|
|
318
|
+
continue
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
result += this._stringifyNormalizedValue(element)
|
|
322
|
+
}
|
|
323
|
+
result += ']'
|
|
324
|
+
return result
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
/**
|
|
328
|
+
* Standard serialization (fallback)
|
|
329
|
+
*/
|
|
330
|
+
serializeStandard(data, addLinebreak) {
|
|
331
|
+
// Validate encoding before serialization
|
|
332
|
+
this.validateEncodingBeforeSerialization(data)
|
|
333
|
+
|
|
334
|
+
// NOTE: Buffer pool removed - using direct Buffer creation for simplicity and reliability
|
|
335
|
+
// CRITICAL: Normalize encoding for all string fields before stringify
|
|
336
|
+
const normalizedData = this.deepNormalizeEncoding(data)
|
|
337
|
+
const json = JSON.stringify(normalizedData)
|
|
338
|
+
|
|
339
|
+
// CRITICAL FIX: Normalize encoding before creating buffer
|
|
340
|
+
const normalizedJson = this.normalizeEncoding(json)
|
|
341
|
+
const jsonBuffer = Buffer.from(normalizedJson, 'utf8')
|
|
342
|
+
|
|
343
|
+
const totalLength = jsonBuffer.length + (addLinebreak ? 1 : 0)
|
|
344
|
+
const result = Buffer.allocUnsafe(totalLength)
|
|
345
|
+
|
|
346
|
+
jsonBuffer.copy(result, 0, 0, jsonBuffer.length)
|
|
347
|
+
if (addLinebreak) {
|
|
348
|
+
result[jsonBuffer.length] = 0x0A
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
return result
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
/**
|
|
355
|
+
* Advanced deserialization with fast paths
|
|
356
|
+
*/
|
|
357
|
+
deserialize(data) {
|
|
358
|
+
this.serializationStats.totalDeserializations++
|
|
359
|
+
|
|
360
|
+
if (data.length === 0) return null
|
|
361
|
+
|
|
362
|
+
try {
|
|
363
|
+
// Handle both Buffer and string inputs
|
|
364
|
+
let str
|
|
365
|
+
if (Buffer.isBuffer(data)) {
|
|
366
|
+
// Fast path: avoid toString() for empty data
|
|
367
|
+
if (data.length === 1 && data[0] === 0x0A) return null // Just newline
|
|
368
|
+
str = data.toString('utf8').trim()
|
|
369
|
+
} else if (typeof data === 'string') {
|
|
370
|
+
str = data.trim()
|
|
371
|
+
} else {
|
|
372
|
+
throw new Error('Invalid data type for deserialization')
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
const strLength = str.length
|
|
376
|
+
|
|
377
|
+
// Fast path for empty strings
|
|
378
|
+
if (strLength === 0) return null
|
|
379
|
+
|
|
380
|
+
// CRITICAL FIX: Detect and handle multiple JSON objects in the same line
|
|
381
|
+
// This can happen if data was corrupted during concurrent writes or offset calculation errors
|
|
382
|
+
const firstBrace = str.indexOf('{')
|
|
383
|
+
const firstBracket = str.indexOf('[')
|
|
384
|
+
|
|
385
|
+
// Helper function to extract first complete JSON object/array from a string
|
|
386
|
+
// CRITICAL FIX: Must handle strings and escaped characters correctly
|
|
387
|
+
// to avoid counting braces/brackets that are inside string values
|
|
388
|
+
const extractFirstJson = (jsonStr, startChar) => {
|
|
389
|
+
if (startChar === '{') {
|
|
390
|
+
let braceCount = 0
|
|
391
|
+
let endPos = -1
|
|
392
|
+
let inString = false
|
|
393
|
+
let escapeNext = false
|
|
394
|
+
|
|
395
|
+
for (let i = 0; i < jsonStr.length; i++) {
|
|
396
|
+
const char = jsonStr[i]
|
|
397
|
+
|
|
398
|
+
if (escapeNext) {
|
|
399
|
+
escapeNext = false
|
|
400
|
+
continue
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
if (char === '\\') {
|
|
404
|
+
escapeNext = true
|
|
405
|
+
continue
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
if (char === '"' && !escapeNext) {
|
|
409
|
+
inString = !inString
|
|
410
|
+
continue
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
if (!inString) {
|
|
414
|
+
if (char === '{') braceCount++
|
|
415
|
+
if (char === '}') {
|
|
416
|
+
braceCount--
|
|
417
|
+
if (braceCount === 0) {
|
|
418
|
+
endPos = i + 1
|
|
419
|
+
break
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
return endPos > 0 ? jsonStr.substring(0, endPos) : null
|
|
425
|
+
} else if (startChar === '[') {
|
|
426
|
+
let bracketCount = 0
|
|
427
|
+
let endPos = -1
|
|
428
|
+
let inString = false
|
|
429
|
+
let escapeNext = false
|
|
430
|
+
|
|
431
|
+
for (let i = 0; i < jsonStr.length; i++) {
|
|
432
|
+
const char = jsonStr[i]
|
|
433
|
+
|
|
434
|
+
if (escapeNext) {
|
|
435
|
+
escapeNext = false
|
|
436
|
+
continue
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
if (char === '\\') {
|
|
440
|
+
escapeNext = true
|
|
441
|
+
continue
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
if (char === '"' && !escapeNext) {
|
|
445
|
+
inString = !inString
|
|
446
|
+
continue
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
if (!inString) {
|
|
450
|
+
if (char === '[') bracketCount++
|
|
451
|
+
if (char === ']') {
|
|
452
|
+
bracketCount--
|
|
453
|
+
if (bracketCount === 0) {
|
|
454
|
+
endPos = i + 1
|
|
455
|
+
break
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
return endPos > 0 ? jsonStr.substring(0, endPos) : null
|
|
461
|
+
}
|
|
462
|
+
return null
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
// Check if JSON starts at the beginning of the string
|
|
466
|
+
const jsonStartsAtZero = (firstBrace === 0) || (firstBracket === 0)
|
|
467
|
+
let hasValidJson = false
|
|
468
|
+
|
|
469
|
+
if (jsonStartsAtZero) {
|
|
470
|
+
// JSON starts at beginning - check for multiple JSON objects/arrays
|
|
471
|
+
if (firstBrace === 0) {
|
|
472
|
+
const secondBrace = str.indexOf('{', 1)
|
|
473
|
+
if (secondBrace !== -1) {
|
|
474
|
+
// Multiple objects detected - extract first
|
|
475
|
+
const extracted = extractFirstJson(str, '{')
|
|
476
|
+
if (extracted) {
|
|
477
|
+
str = extracted
|
|
478
|
+
hasValidJson = true
|
|
479
|
+
if (this.opts && this.opts.debugMode) {
|
|
480
|
+
console.warn(`⚠️ Deserialize: Multiple JSON objects detected, using first object only`)
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
} else {
|
|
484
|
+
hasValidJson = true // Single valid object starting at 0
|
|
485
|
+
}
|
|
486
|
+
} else if (firstBracket === 0) {
|
|
487
|
+
const secondBracket = str.indexOf('[', 1)
|
|
488
|
+
if (secondBracket !== -1) {
|
|
489
|
+
// Multiple arrays detected - extract first
|
|
490
|
+
const extracted = extractFirstJson(str, '[')
|
|
491
|
+
if (extracted) {
|
|
492
|
+
str = extracted
|
|
493
|
+
hasValidJson = true
|
|
494
|
+
if (this.opts && this.opts.debugMode) {
|
|
495
|
+
console.warn(`⚠️ Deserialize: Multiple JSON arrays detected, using first array only`)
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
} else {
|
|
499
|
+
hasValidJson = true // Single valid array starting at 0
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
} else {
|
|
503
|
+
// JSON doesn't start at beginning - try to find and extract first valid JSON
|
|
504
|
+
const jsonStart = firstBrace !== -1 ? (firstBracket !== -1 ? Math.min(firstBrace, firstBracket) : firstBrace) : firstBracket
|
|
505
|
+
|
|
506
|
+
if (jsonStart !== -1 && jsonStart > 0) {
|
|
507
|
+
// Found JSON but not at start - extract from that position
|
|
508
|
+
const jsonStr = str.substring(jsonStart)
|
|
509
|
+
const startChar = jsonStr[0]
|
|
510
|
+
const extracted = extractFirstJson(jsonStr, startChar)
|
|
511
|
+
|
|
512
|
+
if (extracted) {
|
|
513
|
+
str = extracted
|
|
514
|
+
hasValidJson = true
|
|
515
|
+
if (this.opts && this.opts.debugMode) {
|
|
516
|
+
console.warn(`⚠️ Deserialize: Found JSON after ${jsonStart} chars of invalid text, extracted first ${startChar === '{' ? 'object' : 'array'}`)
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
// CRITICAL FIX: If no valid JSON structure found, throw error before attempting parse
|
|
523
|
+
// This allows walk() and other callers to catch and skip invalid lines
|
|
524
|
+
if (!hasValidJson && firstBrace === -1 && firstBracket === -1) {
|
|
525
|
+
const errorStr = Buffer.isBuffer(data) ? data.toString('utf8').trim() : data.trim()
|
|
526
|
+
const error = new Error(`Failed to deserialize JSON data: No valid JSON structure found in "${errorStr.substring(0, 100)}..."`)
|
|
527
|
+
// Mark this as a "no valid JSON" error so it can be handled appropriately
|
|
528
|
+
error.noValidJson = true
|
|
529
|
+
throw error
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
// If we tried to extract but got nothing valid, also throw error
|
|
533
|
+
if (hasValidJson && (!str || str.trim().length === 0)) {
|
|
534
|
+
const error = new Error(`Failed to deserialize JSON data: Extracted JSON is empty`)
|
|
535
|
+
error.noValidJson = true
|
|
536
|
+
throw error
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
// Parse JSON data
|
|
540
|
+
const parsedData = JSON.parse(str)
|
|
541
|
+
|
|
542
|
+
// Convert from array format back to object if needed
|
|
543
|
+
return this.convertFromArrayFormat(parsedData)
|
|
544
|
+
} catch (e) {
|
|
545
|
+
// If error was already formatted with noValidJson flag, re-throw as-is
|
|
546
|
+
if (e.noValidJson) {
|
|
547
|
+
throw e
|
|
548
|
+
}
|
|
549
|
+
// Otherwise, format the error message
|
|
550
|
+
const str = Buffer.isBuffer(data) ? data.toString('utf8').trim() : data.trim()
|
|
551
|
+
throw new Error(`Failed to deserialize JSON data: "${str.substring(0, 100)}..." - ${e.message}`)
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
/**
|
|
556
|
+
* Batch serialization for multiple records
|
|
557
|
+
*/
|
|
558
|
+
serializeBatch(dataArray, opts = {}) {
|
|
559
|
+
// Validate encoding before serialization
|
|
560
|
+
this.validateEncodingBeforeSerialization(dataArray)
|
|
561
|
+
|
|
562
|
+
// Convert all objects to array format for optimization
|
|
563
|
+
const convertedData = dataArray.map(data => this.convertToArrayFormat(data))
|
|
564
|
+
|
|
565
|
+
// Track conversion statistics
|
|
566
|
+
this.serializationStats.arraySerializations += convertedData.filter((item, index) =>
|
|
567
|
+
Array.isArray(item) && typeof dataArray[index] === 'object' && dataArray[index] !== null
|
|
568
|
+
).length
|
|
569
|
+
this.serializationStats.objectSerializations += dataArray.length - this.serializationStats.arraySerializations
|
|
570
|
+
|
|
571
|
+
// JSONL format: serialize each array as a separate line
|
|
572
|
+
try {
|
|
573
|
+
const lines = []
|
|
574
|
+
for (const arrayData of convertedData) {
|
|
575
|
+
const json = this.optimizedStringify(arrayData)
|
|
576
|
+
const normalizedJson = this.normalizeEncoding(json)
|
|
577
|
+
lines.push(normalizedJson)
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
// Join all lines with newlines
|
|
581
|
+
const jsonlContent = lines.join('\n')
|
|
582
|
+
const jsonlBuffer = Buffer.from(jsonlContent, 'utf8')
|
|
583
|
+
|
|
584
|
+
// Add final linebreak if requested
|
|
585
|
+
const addLinebreak = opts.linebreak !== false
|
|
586
|
+
const totalLength = jsonlBuffer.length + (addLinebreak ? 1 : 0)
|
|
587
|
+
const result = Buffer.allocUnsafe(totalLength)
|
|
588
|
+
|
|
589
|
+
jsonlBuffer.copy(result, 0, 0, jsonlBuffer.length)
|
|
590
|
+
if (addLinebreak) {
|
|
591
|
+
result[jsonlBuffer.length] = 0x0A
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
return result
|
|
595
|
+
} catch (e) {
|
|
596
|
+
// Fallback to individual serialization if batch serialization fails
|
|
597
|
+
const results = []
|
|
598
|
+
const batchSize = opts.batchSize || 100
|
|
599
|
+
|
|
600
|
+
for (let i = 0; i < convertedData.length; i += batchSize) {
|
|
601
|
+
const batch = convertedData.slice(i, i + batchSize)
|
|
602
|
+
const batchResults = batch.map(data => this.serialize(data, opts))
|
|
603
|
+
results.push(...batchResults)
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
return results
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
/**
|
|
611
|
+
* Batch deserialization for multiple records
|
|
612
|
+
*/
|
|
613
|
+
deserializeBatch(dataArray) {
|
|
614
|
+
// Optimization: try to parse all entries as a single JSON array first
|
|
615
|
+
// This is much faster than parsing each entry individually
|
|
616
|
+
try {
|
|
617
|
+
// Convert all entries to strings and join them as a single JSON array
|
|
618
|
+
const entriesJson = '[' + dataArray.map(data => {
|
|
619
|
+
if (Buffer.isBuffer(data)) {
|
|
620
|
+
return data.toString('utf8').trim()
|
|
621
|
+
} else if (typeof data === 'string') {
|
|
622
|
+
return data.trim()
|
|
623
|
+
} else {
|
|
624
|
+
throw new Error('Invalid data type for batch deserialization')
|
|
625
|
+
}
|
|
626
|
+
}).join(',') + ']'
|
|
627
|
+
const parsedResults = JSON.parse(entriesJson)
|
|
628
|
+
|
|
629
|
+
// Convert arrays back to objects if needed
|
|
630
|
+
const results = parsedResults.map(data => this.convertFromArrayFormat(data))
|
|
631
|
+
|
|
632
|
+
// Validate that all results are objects (JexiDB requirement)
|
|
633
|
+
if (Array.isArray(results) && results.every(item => item && typeof item === 'object')) {
|
|
634
|
+
return results
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
// If validation fails, fall back to individual parsing
|
|
638
|
+
throw new Error('Validation failed - not all entries are objects')
|
|
639
|
+
} catch (e) {
|
|
640
|
+
// Fallback to individual deserialization if batch parsing fails
|
|
641
|
+
const results = []
|
|
642
|
+
const batchSize = 100 // Process in batches to avoid blocking
|
|
643
|
+
|
|
644
|
+
for (let i = 0; i < dataArray.length; i += batchSize) {
|
|
645
|
+
const batch = dataArray.slice(i, i + batchSize)
|
|
646
|
+
const batchResults = batch.map(data => this.deserialize(data))
|
|
647
|
+
results.push(...batchResults)
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
return results
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
/**
|
|
655
|
+
* Check if data appears to be binary (always false since we only use JSON now)
|
|
656
|
+
*/
|
|
657
|
+
isBinaryData(data) {
|
|
658
|
+
// All data is now JSON format
|
|
659
|
+
return false
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
/**
|
|
663
|
+
* Get comprehensive performance statistics
|
|
664
|
+
*/
|
|
665
|
+
getStats() {
|
|
666
|
+
// NOTE: Buffer pool stats removed - buffer pool was causing more problems than benefits
|
|
667
|
+
return {
|
|
668
|
+
// Serialization stats
|
|
669
|
+
totalSerializations: this.serializationStats.totalSerializations,
|
|
670
|
+
totalDeserializations: this.serializationStats.totalDeserializations,
|
|
671
|
+
jsonSerializations: this.serializationStats.jsonSerializations,
|
|
672
|
+
arraySerializations: this.serializationStats.arraySerializations,
|
|
673
|
+
objectSerializations: this.serializationStats.objectSerializations,
|
|
674
|
+
|
|
675
|
+
// Configuration
|
|
676
|
+
enableAdvancedSerialization: this.opts.enableAdvancedSerialization,
|
|
677
|
+
enableArraySerialization: this.opts.enableArraySerialization,
|
|
678
|
+
|
|
679
|
+
// Schema stats
|
|
680
|
+
schemaStats: this.schemaManager.getStats()
|
|
681
|
+
}
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
/**
|
|
685
|
+
* Cleanup resources
|
|
686
|
+
*/
|
|
687
|
+
cleanup() {
|
|
688
|
+
// NOTE: Buffer pool cleanup removed - buffer pool was causing more problems than benefits
|
|
689
|
+
this.serializationStats = {
|
|
690
|
+
totalSerializations: 0,
|
|
691
|
+
totalDeserializations: 0,
|
|
692
|
+
jsonSerializations: 0,
|
|
693
|
+
arraySerializations: 0,
|
|
694
|
+
objectSerializations: 0
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
// Reset schema manager
|
|
698
|
+
if (this.schemaManager) {
|
|
699
|
+
this.schemaManager.reset()
|
|
700
|
+
}
|
|
701
|
+
}
|
|
702
|
+
}
|