jexidb 1.0.8 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +2 -2
- package/README.md +556 -127
- package/dist/FileHandler.js +688 -0
- package/dist/IndexManager.js +353 -0
- package/dist/IntegrityChecker.js +364 -0
- package/dist/JSONLDatabase.js +1132 -0
- package/dist/index.js +598 -0
- package/package.json +65 -59
- package/src/FileHandler.js +674 -0
- package/src/IndexManager.js +363 -0
- package/src/IntegrityChecker.js +379 -0
- package/src/JSONLDatabase.js +1189 -0
- package/src/index.js +594 -0
- package/.gitattributes +0 -2
- package/babel.config.json +0 -5
- package/dist/Database.cjs +0 -1085
- package/src/Database.mjs +0 -376
- package/src/FileHandler.mjs +0 -202
- package/src/IndexManager.mjs +0 -230
- package/src/Serializer.mjs +0 -120
- package/test/README.md +0 -13
- package/test/test-json-compressed.jdb +0 -0
- package/test/test-json.jdb +0 -0
- package/test/test-v8-compressed.jdb +0 -0
- package/test/test-v8.jdb +0 -0
- package/test/test.mjs +0 -168
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.default = void 0;
|
|
7
|
+
/**
|
|
8
|
+
* IndexManager - In-memory index management
|
|
9
|
+
* Supports different data types and query operations
|
|
10
|
+
*/
|
|
11
|
+
class IndexManager {
|
|
12
|
+
constructor(indexes = {}) {
|
|
13
|
+
this.indexes = {};
|
|
14
|
+
this.offsets = [];
|
|
15
|
+
this.recordCount = 0;
|
|
16
|
+
|
|
17
|
+
// Initialize indexes based on configuration
|
|
18
|
+
for (const [field, type] of Object.entries(indexes)) {
|
|
19
|
+
this.indexes[field] = {
|
|
20
|
+
type,
|
|
21
|
+
values: new Map() // Map<value, Set<offsetIndex>>
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Adds a record to the index
|
|
28
|
+
*/
|
|
29
|
+
addRecord(record, offsetIndex) {
|
|
30
|
+
this.offsets[offsetIndex] = record._offset || 0;
|
|
31
|
+
this.recordCount = Math.max(this.recordCount, offsetIndex + 1);
|
|
32
|
+
|
|
33
|
+
// Add to indexes
|
|
34
|
+
for (const [field, index] of Object.entries(this.indexes)) {
|
|
35
|
+
const value = this.getNestedValue(record, field);
|
|
36
|
+
if (value !== undefined) {
|
|
37
|
+
if (!index.values.has(value)) {
|
|
38
|
+
index.values.set(value, new Set());
|
|
39
|
+
}
|
|
40
|
+
index.values.get(value).add(offsetIndex);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Removes a record from the index
|
|
47
|
+
*/
|
|
48
|
+
removeRecord(offsetIndex) {
|
|
49
|
+
// Remove from indexes
|
|
50
|
+
for (const [field, index] of Object.entries(this.indexes)) {
|
|
51
|
+
for (const [value, offsetSet] of index.values.entries()) {
|
|
52
|
+
offsetSet.delete(offsetIndex);
|
|
53
|
+
if (offsetSet.size === 0) {
|
|
54
|
+
index.values.delete(value);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Mark as removed in the offsets array
|
|
60
|
+
this.offsets[offsetIndex] = null;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Updates a record in the index
|
|
65
|
+
*/
|
|
66
|
+
updateRecord(record, offsetIndex, oldRecord = null) {
|
|
67
|
+
// Remove old values from indexes
|
|
68
|
+
if (oldRecord) {
|
|
69
|
+
for (const [field, index] of Object.entries(this.indexes)) {
|
|
70
|
+
const oldValue = this.getNestedValue(oldRecord, field);
|
|
71
|
+
if (oldValue !== undefined) {
|
|
72
|
+
const offsetSet = index.values.get(oldValue);
|
|
73
|
+
if (offsetSet) {
|
|
74
|
+
offsetSet.delete(offsetIndex);
|
|
75
|
+
if (offsetSet.size === 0) {
|
|
76
|
+
index.values.delete(oldValue);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Add new values to indexes
|
|
84
|
+
for (const [field, index] of Object.entries(this.indexes)) {
|
|
85
|
+
const value = this.getNestedValue(record, field);
|
|
86
|
+
if (value !== undefined) {
|
|
87
|
+
if (!index.values.has(value)) {
|
|
88
|
+
index.values.set(value, new Set());
|
|
89
|
+
}
|
|
90
|
+
index.values.get(value).add(offsetIndex);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Update offset
|
|
95
|
+
this.offsets[offsetIndex] = record._offset || 0;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Searches records based on criteria
|
|
100
|
+
*/
|
|
101
|
+
findRecords(criteria, options = {}) {
|
|
102
|
+
const {
|
|
103
|
+
caseInsensitive = false,
|
|
104
|
+
matchAny = false
|
|
105
|
+
} = options;
|
|
106
|
+
if (!criteria || Object.keys(criteria).length === 0) {
|
|
107
|
+
// Returns all valid records
|
|
108
|
+
return Array.from(this.offsets.keys()).filter(i => this.offsets[i] !== null);
|
|
109
|
+
}
|
|
110
|
+
let matchingOffsets = null;
|
|
111
|
+
for (const [field, criteriaValue] of Object.entries(criteria)) {
|
|
112
|
+
const index = this.indexes[field];
|
|
113
|
+
if (!index) {
|
|
114
|
+
// If no index exists for this field, we need to scan all records
|
|
115
|
+
// For now, return empty result if any field doesn't have an index
|
|
116
|
+
return [];
|
|
117
|
+
}
|
|
118
|
+
const fieldOffsets = this.findFieldMatches(field, criteriaValue, caseInsensitive);
|
|
119
|
+
if (matchingOffsets === null) {
|
|
120
|
+
matchingOffsets = fieldOffsets;
|
|
121
|
+
} else if (matchAny) {
|
|
122
|
+
// Union (OR)
|
|
123
|
+
matchingOffsets = new Set([...matchingOffsets, ...fieldOffsets]);
|
|
124
|
+
} else {
|
|
125
|
+
// Intersection (AND)
|
|
126
|
+
matchingOffsets = new Set([...matchingOffsets].filter(x => fieldOffsets.has(x)));
|
|
127
|
+
}
|
|
128
|
+
if (!matchAny && matchingOffsets.size === 0) {
|
|
129
|
+
break; // No intersection, stop search
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
return matchingOffsets ? Array.from(matchingOffsets) : [];
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Searches for matches in a specific field
|
|
137
|
+
*/
|
|
138
|
+
findFieldMatches(field, criteriaValue, caseInsensitive) {
|
|
139
|
+
const index = this.indexes[field];
|
|
140
|
+
if (!index) return new Set();
|
|
141
|
+
const matches = new Set();
|
|
142
|
+
if (typeof criteriaValue === 'object' && !Array.isArray(criteriaValue)) {
|
|
143
|
+
// Comparison operators
|
|
144
|
+
for (const [value, offsetSet] of index.values.entries()) {
|
|
145
|
+
if (this.matchesOperator(value, criteriaValue, caseInsensitive)) {
|
|
146
|
+
for (const offset of offsetSet) {
|
|
147
|
+
matches.add(offset);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
} else {
|
|
152
|
+
// Direct comparison
|
|
153
|
+
const values = Array.isArray(criteriaValue) ? criteriaValue : [criteriaValue];
|
|
154
|
+
for (const searchValue of values) {
|
|
155
|
+
const offsetSet = index.values.get(searchValue);
|
|
156
|
+
if (offsetSet) {
|
|
157
|
+
for (const offset of offsetSet) {
|
|
158
|
+
matches.add(offset);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
return matches;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Checks if a value matches the operators
|
|
168
|
+
*/
|
|
169
|
+
matchesOperator(value, operators, caseInsensitive) {
|
|
170
|
+
for (const [operator, operatorValue] of Object.entries(operators)) {
|
|
171
|
+
switch (operator) {
|
|
172
|
+
case '>':
|
|
173
|
+
if (value <= operatorValue) return false;
|
|
174
|
+
break;
|
|
175
|
+
case '>=':
|
|
176
|
+
if (value < operatorValue) return false;
|
|
177
|
+
break;
|
|
178
|
+
case '<':
|
|
179
|
+
if (value >= operatorValue) return false;
|
|
180
|
+
break;
|
|
181
|
+
case '<=':
|
|
182
|
+
if (value > operatorValue) return false;
|
|
183
|
+
break;
|
|
184
|
+
case '!=':
|
|
185
|
+
if (value === operatorValue) return false;
|
|
186
|
+
break;
|
|
187
|
+
case 'in':
|
|
188
|
+
if (!Array.isArray(operatorValue) || !operatorValue.includes(value)) return false;
|
|
189
|
+
break;
|
|
190
|
+
case 'nin':
|
|
191
|
+
if (Array.isArray(operatorValue) && operatorValue.includes(value)) return false;
|
|
192
|
+
break;
|
|
193
|
+
case 'regex':
|
|
194
|
+
const regex = new RegExp(operatorValue, caseInsensitive ? 'i' : '');
|
|
195
|
+
if (!regex.test(String(value))) return false;
|
|
196
|
+
break;
|
|
197
|
+
case 'contains':
|
|
198
|
+
const searchStr = String(operatorValue);
|
|
199
|
+
const valueStr = String(value);
|
|
200
|
+
if (caseInsensitive) {
|
|
201
|
+
if (!valueStr.toLowerCase().includes(searchStr.toLowerCase())) return false;
|
|
202
|
+
} else {
|
|
203
|
+
if (!valueStr.includes(searchStr)) return false;
|
|
204
|
+
}
|
|
205
|
+
break;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
return true;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Gets nested value from an object
|
|
213
|
+
*/
|
|
214
|
+
getNestedValue(obj, path) {
|
|
215
|
+
return path.split('.').reduce((current, key) => {
|
|
216
|
+
return current && current[key] !== undefined ? current[key] : undefined;
|
|
217
|
+
}, obj);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* Recalculates all offsets after modifications
|
|
222
|
+
*/
|
|
223
|
+
recalculateOffsets() {
|
|
224
|
+
let currentOffset = 0;
|
|
225
|
+
const newOffsets = [];
|
|
226
|
+
for (let i = 0; i < this.offsets.length; i++) {
|
|
227
|
+
if (this.offsets[i] !== null) {
|
|
228
|
+
newOffsets[i] = currentOffset;
|
|
229
|
+
currentOffset += this.offsets[i];
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
this.offsets = newOffsets;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Recalculates offsets after a file rewrite
|
|
237
|
+
* This method should be called after the file has been rewritten
|
|
238
|
+
*/
|
|
239
|
+
async recalculateOffsetsFromFile(fileHandler) {
|
|
240
|
+
const newOffsets = [];
|
|
241
|
+
let currentOffset = 0;
|
|
242
|
+
let recordIndex = 0;
|
|
243
|
+
|
|
244
|
+
// Read the file line by line and recalculate offsets
|
|
245
|
+
for await (const line of this.walkFile(fileHandler)) {
|
|
246
|
+
if (line && !line._deleted) {
|
|
247
|
+
newOffsets[recordIndex] = currentOffset;
|
|
248
|
+
currentOffset += fileHandler.getByteLength(fileHandler.serialize(line));
|
|
249
|
+
recordIndex++;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
this.offsets = newOffsets;
|
|
253
|
+
this.recordCount = recordIndex;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/**
|
|
257
|
+
* Walks through the file to read all records
|
|
258
|
+
*/
|
|
259
|
+
async *walkFile(fileHandler) {
|
|
260
|
+
let offset = 0;
|
|
261
|
+
while (true) {
|
|
262
|
+
const line = await fileHandler.readLine(offset);
|
|
263
|
+
if (line === null) break;
|
|
264
|
+
try {
|
|
265
|
+
const record = fileHandler.deserialize(line);
|
|
266
|
+
yield record;
|
|
267
|
+
offset += fileHandler.getByteLength(line + '\n');
|
|
268
|
+
} catch (error) {
|
|
269
|
+
// Skip corrupted lines
|
|
270
|
+
offset += fileHandler.getByteLength(line + '\n');
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Gets index statistics
|
|
277
|
+
*/
|
|
278
|
+
getStats() {
|
|
279
|
+
const stats = {
|
|
280
|
+
recordCount: this.recordCount,
|
|
281
|
+
indexCount: Object.keys(this.indexes).length,
|
|
282
|
+
indexes: {}
|
|
283
|
+
};
|
|
284
|
+
for (const [field, index] of Object.entries(this.indexes)) {
|
|
285
|
+
stats.indexes[field] = {
|
|
286
|
+
type: index.type,
|
|
287
|
+
uniqueValues: index.values.size,
|
|
288
|
+
totalReferences: Array.from(index.values.values()).reduce((sum, set) => sum + set.size, 0)
|
|
289
|
+
};
|
|
290
|
+
}
|
|
291
|
+
return stats;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
/**
|
|
295
|
+
* Clears all indexes
|
|
296
|
+
*/
|
|
297
|
+
clear() {
|
|
298
|
+
this.indexes = {};
|
|
299
|
+
this.offsets = [];
|
|
300
|
+
this.recordCount = 0;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
/**
|
|
304
|
+
* Serializes indexes for persistence
|
|
305
|
+
*/
|
|
306
|
+
serialize() {
|
|
307
|
+
const serialized = {
|
|
308
|
+
indexes: {},
|
|
309
|
+
offsets: this.offsets,
|
|
310
|
+
recordCount: this.recordCount
|
|
311
|
+
};
|
|
312
|
+
for (const [field, index] of Object.entries(this.indexes)) {
|
|
313
|
+
serialized.indexes[field] = {
|
|
314
|
+
type: index.type,
|
|
315
|
+
values: {}
|
|
316
|
+
};
|
|
317
|
+
for (const [value, offsetSet] of index.values.entries()) {
|
|
318
|
+
serialized.indexes[field].values[value] = Array.from(offsetSet);
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
return serialized;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
/**
|
|
325
|
+
* Deserializes indexes from persistence
|
|
326
|
+
*/
|
|
327
|
+
deserialize(data) {
|
|
328
|
+
if (!data || !data.indexes) return;
|
|
329
|
+
this.indexes = {};
|
|
330
|
+
this.offsets = data.offsets || [];
|
|
331
|
+
this.recordCount = data.recordCount || 0;
|
|
332
|
+
for (const [field, indexData] of Object.entries(data.indexes)) {
|
|
333
|
+
const type = indexData.type;
|
|
334
|
+
const values = new Map();
|
|
335
|
+
for (const [valueStr, offsetArr] of Object.entries(indexData.values)) {
|
|
336
|
+
let key;
|
|
337
|
+
if (type === 'number') {
|
|
338
|
+
key = Number(valueStr);
|
|
339
|
+
} else if (type === 'boolean') {
|
|
340
|
+
key = valueStr === 'true';
|
|
341
|
+
} else {
|
|
342
|
+
key = valueStr;
|
|
343
|
+
}
|
|
344
|
+
values.set(key, new Set(offsetArr));
|
|
345
|
+
}
|
|
346
|
+
this.indexes[field] = {
|
|
347
|
+
type,
|
|
348
|
+
values
|
|
349
|
+
};
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
var _default = exports.default = IndexManager;
|
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.default = void 0;
|
|
7
|
+
var _fs = require("fs");
|
|
8
|
+
/**
|
|
9
|
+
* IntegrityChecker - JSONL file integrity validation
|
|
10
|
+
* Checks consistency between data, indexes and offsets
|
|
11
|
+
*/
|
|
12
|
+
class IntegrityChecker {
|
|
13
|
+
constructor(fileHandler, indexManager) {
|
|
14
|
+
this.fileHandler = fileHandler;
|
|
15
|
+
this.indexManager = indexManager;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Validates the complete integrity of the database
|
|
20
|
+
*/
|
|
21
|
+
async validateIntegrity(options = {}) {
|
|
22
|
+
const {
|
|
23
|
+
checkData = true,
|
|
24
|
+
checkIndexes = true,
|
|
25
|
+
checkOffsets = true,
|
|
26
|
+
verbose = false
|
|
27
|
+
} = options;
|
|
28
|
+
const results = {
|
|
29
|
+
isValid: true,
|
|
30
|
+
errors: [],
|
|
31
|
+
warnings: [],
|
|
32
|
+
stats: {
|
|
33
|
+
totalRecords: 0,
|
|
34
|
+
validRecords: 0,
|
|
35
|
+
corruptedRecords: 0,
|
|
36
|
+
missingIndexes: 0,
|
|
37
|
+
orphanedIndexes: 0
|
|
38
|
+
}
|
|
39
|
+
};
|
|
40
|
+
if (verbose) {
|
|
41
|
+
console.log('🔍 Starting integrity validation...');
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Check if file exists
|
|
45
|
+
const fileExists = await this.fileHandler.exists();
|
|
46
|
+
if (!fileExists) {
|
|
47
|
+
results.errors.push('Data file does not exist');
|
|
48
|
+
results.isValid = false;
|
|
49
|
+
return results;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Validate file data
|
|
53
|
+
if (checkData) {
|
|
54
|
+
const dataResults = await this.validateDataFile(verbose);
|
|
55
|
+
results.errors.push(...dataResults.errors);
|
|
56
|
+
results.warnings.push(...dataResults.warnings);
|
|
57
|
+
results.stats = {
|
|
58
|
+
...results.stats,
|
|
59
|
+
...dataResults.stats
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Validate indexes
|
|
64
|
+
if (checkIndexes) {
|
|
65
|
+
const indexResults = await this.validateIndexes(verbose);
|
|
66
|
+
results.errors.push(...indexResults.errors);
|
|
67
|
+
results.warnings.push(...indexResults.warnings);
|
|
68
|
+
results.stats = {
|
|
69
|
+
...results.stats,
|
|
70
|
+
...indexResults.stats
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Validate offsets
|
|
75
|
+
if (checkOffsets) {
|
|
76
|
+
const offsetResults = await this.validateOffsets(verbose);
|
|
77
|
+
results.errors.push(...offsetResults.errors);
|
|
78
|
+
results.warnings.push(...offsetResults.warnings);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Determine if valid
|
|
82
|
+
results.isValid = results.errors.length === 0;
|
|
83
|
+
if (verbose) {
|
|
84
|
+
console.log(`✅ Validation completed: ${results.isValid ? 'VALID' : 'INVALID'}`);
|
|
85
|
+
console.log(`📊 Statistics:`, results.stats);
|
|
86
|
+
if (results.errors.length > 0) {
|
|
87
|
+
console.log(`❌ Errors found:`, results.errors);
|
|
88
|
+
}
|
|
89
|
+
if (results.warnings.length > 0) {
|
|
90
|
+
console.log(`⚠️ Warnings:`, results.warnings);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
return results;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Validates the JSONL data file
|
|
98
|
+
*/
|
|
99
|
+
async validateDataFile(verbose = false) {
|
|
100
|
+
const results = {
|
|
101
|
+
errors: [],
|
|
102
|
+
warnings: [],
|
|
103
|
+
stats: {
|
|
104
|
+
totalRecords: 0,
|
|
105
|
+
validRecords: 0,
|
|
106
|
+
corruptedRecords: 0
|
|
107
|
+
}
|
|
108
|
+
};
|
|
109
|
+
try {
|
|
110
|
+
const fd = await _fs.promises.open(this.fileHandler.filePath, 'r');
|
|
111
|
+
let lineNumber = 0;
|
|
112
|
+
let offset = 0;
|
|
113
|
+
const buffer = Buffer.alloc(8192);
|
|
114
|
+
let lineBuffer = '';
|
|
115
|
+
try {
|
|
116
|
+
while (true) {
|
|
117
|
+
const {
|
|
118
|
+
bytesRead
|
|
119
|
+
} = await fd.read(buffer, 0, buffer.length, offset);
|
|
120
|
+
if (bytesRead === 0) break;
|
|
121
|
+
const chunk = buffer.toString('utf8', 0, bytesRead);
|
|
122
|
+
lineBuffer += chunk;
|
|
123
|
+
|
|
124
|
+
// Process complete lines
|
|
125
|
+
let newlineIndex;
|
|
126
|
+
while ((newlineIndex = lineBuffer.indexOf('\n')) !== -1) {
|
|
127
|
+
const line = lineBuffer.substring(0, newlineIndex);
|
|
128
|
+
lineBuffer = lineBuffer.substring(newlineIndex + 1);
|
|
129
|
+
results.stats.totalRecords++;
|
|
130
|
+
if (line.trim() === '') {
|
|
131
|
+
results.warnings.push(`Line ${lineNumber + 1}: Empty line`);
|
|
132
|
+
} else {
|
|
133
|
+
try {
|
|
134
|
+
const record = JSON.parse(line);
|
|
135
|
+
|
|
136
|
+
// Check if it's a deleted record
|
|
137
|
+
if (record._deleted) {
|
|
138
|
+
if (verbose) {
|
|
139
|
+
console.log(`🗑️ Line ${lineNumber + 1}: Deleted record`);
|
|
140
|
+
}
|
|
141
|
+
} else {
|
|
142
|
+
results.stats.validRecords++;
|
|
143
|
+
if (verbose) {
|
|
144
|
+
console.log(`✅ Line ${lineNumber + 1}: Valid record`);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
} catch (error) {
|
|
148
|
+
results.stats.corruptedRecords++;
|
|
149
|
+
results.errors.push(`Line ${lineNumber + 1}: Invalid JSON - ${error.message}`);
|
|
150
|
+
if (verbose) {
|
|
151
|
+
console.log(`❌ Line ${lineNumber + 1}: Corrupted JSON`);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
lineNumber++;
|
|
156
|
+
}
|
|
157
|
+
offset += bytesRead;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Process last line if it doesn't end with \n
|
|
161
|
+
if (lineBuffer.trim() !== '') {
|
|
162
|
+
results.warnings.push(`Line ${lineNumber + 1}: File doesn't end with newline`);
|
|
163
|
+
}
|
|
164
|
+
} finally {
|
|
165
|
+
await fd.close();
|
|
166
|
+
}
|
|
167
|
+
} catch (error) {
|
|
168
|
+
results.errors.push(`Error reading file: ${error.message}`);
|
|
169
|
+
}
|
|
170
|
+
return results;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Validates index consistency
|
|
175
|
+
*/
|
|
176
|
+
async validateIndexes(verbose = false) {
|
|
177
|
+
const results = {
|
|
178
|
+
errors: [],
|
|
179
|
+
warnings: [],
|
|
180
|
+
stats: {
|
|
181
|
+
missingIndexes: 0,
|
|
182
|
+
orphanedIndexes: 0
|
|
183
|
+
}
|
|
184
|
+
};
|
|
185
|
+
const indexData = this.indexManager.serialize();
|
|
186
|
+
const validOffsets = new Set();
|
|
187
|
+
|
|
188
|
+
// Collect all valid offsets
|
|
189
|
+
for (let i = 0; i < this.indexManager.offsets.length; i++) {
|
|
190
|
+
if (this.indexManager.offsets[i] !== null) {
|
|
191
|
+
validOffsets.add(i);
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Check each index
|
|
196
|
+
for (const [field, fieldIndexData] of Object.entries(indexData.indexes)) {
|
|
197
|
+
if (verbose) {
|
|
198
|
+
console.log(`🔍 Checking index: ${field}`);
|
|
199
|
+
}
|
|
200
|
+
for (const [value, offsetArray] of Object.entries(fieldIndexData.values)) {
|
|
201
|
+
for (const offsetIndex of offsetArray) {
|
|
202
|
+
if (!validOffsets.has(offsetIndex)) {
|
|
203
|
+
results.stats.orphanedIndexes++;
|
|
204
|
+
results.errors.push(`Orphaned index: ${field}=${value} points to non-existent offset ${offsetIndex}`);
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// Check if there are valid records without index
|
|
211
|
+
for (const offsetIndex of validOffsets) {
|
|
212
|
+
let hasIndex = false;
|
|
213
|
+
for (const [field, index] of Object.entries(this.indexManager.indexes)) {
|
|
214
|
+
for (const [value, offsetSet] of index.values.entries()) {
|
|
215
|
+
if (offsetSet.has(offsetIndex)) {
|
|
216
|
+
hasIndex = true;
|
|
217
|
+
break;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
if (hasIndex) break;
|
|
221
|
+
}
|
|
222
|
+
if (!hasIndex) {
|
|
223
|
+
results.stats.missingIndexes++;
|
|
224
|
+
results.warnings.push(`Record at offset ${offsetIndex} is not indexed`);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
return results;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* Validates offset consistency
|
|
232
|
+
*/
|
|
233
|
+
async validateOffsets(verbose = false) {
|
|
234
|
+
const results = {
|
|
235
|
+
errors: [],
|
|
236
|
+
warnings: []
|
|
237
|
+
};
|
|
238
|
+
const stats = await this.fileHandler.getStats();
|
|
239
|
+
const fileSize = stats.size;
|
|
240
|
+
|
|
241
|
+
// Check if offsets are valid
|
|
242
|
+
for (let i = 0; i < this.indexManager.offsets.length; i++) {
|
|
243
|
+
const offset = this.indexManager.offsets[i];
|
|
244
|
+
if (offset !== null) {
|
|
245
|
+
if (offset < 0) {
|
|
246
|
+
results.errors.push(`Offset ${i}: Negative value (${offset})`);
|
|
247
|
+
} else if (offset >= fileSize) {
|
|
248
|
+
results.errors.push(`Offset ${i}: Out of file bounds (${offset} >= ${fileSize})`);
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
return results;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
/**
|
|
256
|
+
* Rebuilds indexes from the data file
|
|
257
|
+
*/
|
|
258
|
+
async rebuildIndexes(verbose = false) {
|
|
259
|
+
if (verbose) {
|
|
260
|
+
console.log('🔧 Rebuilding indexes...');
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// Store the configured indexes before clearing
|
|
264
|
+
const configuredIndexes = this.indexManager.indexes;
|
|
265
|
+
|
|
266
|
+
// Clear current indexes but preserve configuration
|
|
267
|
+
this.indexManager.clear();
|
|
268
|
+
|
|
269
|
+
// Restore the configured indexes
|
|
270
|
+
for (const [field, indexConfig] of Object.entries(configuredIndexes)) {
|
|
271
|
+
this.indexManager.indexes[field] = {
|
|
272
|
+
type: indexConfig.type,
|
|
273
|
+
values: new Map()
|
|
274
|
+
};
|
|
275
|
+
}
|
|
276
|
+
try {
|
|
277
|
+
const fd = await _fs.promises.open(this.fileHandler.filePath, 'r');
|
|
278
|
+
let lineNumber = 0;
|
|
279
|
+
let offset = 0;
|
|
280
|
+
const buffer = Buffer.alloc(8192);
|
|
281
|
+
let lineBuffer = '';
|
|
282
|
+
try {
|
|
283
|
+
while (true) {
|
|
284
|
+
const {
|
|
285
|
+
bytesRead
|
|
286
|
+
} = await fd.read(buffer, 0, buffer.length, offset);
|
|
287
|
+
if (bytesRead === 0) break;
|
|
288
|
+
const chunk = buffer.toString('utf8', 0, bytesRead);
|
|
289
|
+
lineBuffer += chunk;
|
|
290
|
+
|
|
291
|
+
// Process complete lines
|
|
292
|
+
let newlineIndex;
|
|
293
|
+
while ((newlineIndex = lineBuffer.indexOf('\n')) !== -1) {
|
|
294
|
+
const line = lineBuffer.substring(0, newlineIndex);
|
|
295
|
+
lineBuffer = lineBuffer.substring(newlineIndex + 1);
|
|
296
|
+
if (line.trim() !== '') {
|
|
297
|
+
try {
|
|
298
|
+
const record = JSON.parse(line);
|
|
299
|
+
|
|
300
|
+
// Only index non-deleted records
|
|
301
|
+
if (!record._deleted) {
|
|
302
|
+
record._offset = offset;
|
|
303
|
+
this.indexManager.addRecord(record, lineNumber);
|
|
304
|
+
if (verbose) {
|
|
305
|
+
console.log(`✅ Reindexed: line ${lineNumber + 1}`);
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
} catch (error) {
|
|
309
|
+
if (verbose) {
|
|
310
|
+
console.log(`⚠️ Line ${lineNumber + 1}: Ignored (invalid JSON)`);
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
lineNumber++;
|
|
315
|
+
offset += this.fileHandler.getByteLength(line + '\n');
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
} finally {
|
|
319
|
+
await fd.close();
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// Save rebuilt indexes
|
|
323
|
+
await this.fileHandler.writeIndex(this.indexManager.serialize());
|
|
324
|
+
if (verbose) {
|
|
325
|
+
console.log('✅ Indexes rebuilt successfully');
|
|
326
|
+
}
|
|
327
|
+
return true;
|
|
328
|
+
} catch (error) {
|
|
329
|
+
if (verbose) {
|
|
330
|
+
console.error('❌ Error rebuilding indexes:', error.message);
|
|
331
|
+
}
|
|
332
|
+
throw error;
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
/**
|
|
337
|
+
* Exports detailed statistics
|
|
338
|
+
*/
|
|
339
|
+
async exportStats() {
|
|
340
|
+
const stats = await this.fileHandler.getStats();
|
|
341
|
+
const indexStats = this.indexManager.getStats();
|
|
342
|
+
const integrityResults = await this.validateIntegrity({
|
|
343
|
+
verbose: false
|
|
344
|
+
});
|
|
345
|
+
return {
|
|
346
|
+
file: {
|
|
347
|
+
path: this.fileHandler.filePath,
|
|
348
|
+
size: stats.size,
|
|
349
|
+
created: stats.created,
|
|
350
|
+
modified: stats.modified
|
|
351
|
+
},
|
|
352
|
+
indexes: indexStats,
|
|
353
|
+
integrity: integrityResults,
|
|
354
|
+
summary: {
|
|
355
|
+
totalRecords: indexStats.recordCount,
|
|
356
|
+
fileSize: stats.size,
|
|
357
|
+
isValid: integrityResults.isValid,
|
|
358
|
+
errorCount: integrityResults.errors.length,
|
|
359
|
+
warningCount: integrityResults.warnings.length
|
|
360
|
+
}
|
|
361
|
+
};
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
var _default = exports.default = IntegrityChecker;
|