@toiroakr/lines-db 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/bin/cli.js +378 -415
- package/dist/index.cjs +195 -327
- package/dist/index.d.cts +64 -84
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.ts +64 -84
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +197 -328
- package/dist/index.js.map +1 -1
- package/package.json +2 -1
- package/src/cli.ts +226 -126
- package/src/database.ts +296 -52
- package/src/index.ts +2 -2
- package/src/jsonl-migration.ts +24 -56
- package/src/schema.ts +37 -32
- package/src/types.ts +21 -0
- package/src/validator.test.ts +0 -507
- package/src/validator.ts +0 -441
package/dist/index.cjs
CHANGED
|
@@ -261,8 +261,7 @@ var DirectoryScanner = class {
|
|
|
261
261
|
* Define a bidirectional schema with optional backward transformation
|
|
262
262
|
*
|
|
263
263
|
* @param schema - Standard Schema for validation
|
|
264
|
-
* @param
|
|
265
|
-
* Required when schema performs transformations
|
|
264
|
+
* @param options - SchemaOptions object. When Input and Output types differ, backward transformation is required
|
|
266
265
|
*
|
|
267
266
|
* @example
|
|
268
267
|
* // No transformation - backward not needed
|
|
@@ -271,10 +270,12 @@ var DirectoryScanner = class {
|
|
|
271
270
|
* );
|
|
272
271
|
*
|
|
273
272
|
* @example
|
|
274
|
-
* // With transformation - backward
|
|
273
|
+
* // With transformation - backward REQUIRED
|
|
275
274
|
* const schema = defineSchema(
|
|
276
275
|
* v.pipe(v.string(), v.transform(Number)),
|
|
277
|
-
*
|
|
276
|
+
* {
|
|
277
|
+
* backward: (num) => String(num) // backward: number → string (REQUIRED)
|
|
278
|
+
* }
|
|
278
279
|
* );
|
|
279
280
|
*
|
|
280
281
|
* @example
|
|
@@ -289,14 +290,14 @@ var DirectoryScanner = class {
|
|
|
289
290
|
* }
|
|
290
291
|
* );
|
|
291
292
|
*/
|
|
292
|
-
function defineSchema(schema,
|
|
293
|
+
function defineSchema(schema, ...args) {
|
|
294
|
+
const options = args[0];
|
|
293
295
|
const bidirectionalSchema = Object.create(schema);
|
|
294
|
-
if (
|
|
295
|
-
|
|
296
|
-
if (
|
|
297
|
-
if (
|
|
298
|
-
if (
|
|
299
|
-
if (optionsOrBackward.indexes) bidirectionalSchema.indexes = optionsOrBackward.indexes;
|
|
296
|
+
if (options) {
|
|
297
|
+
if (options.backward) bidirectionalSchema.backward = options.backward;
|
|
298
|
+
if (options.primaryKey) bidirectionalSchema.primaryKey = options.primaryKey;
|
|
299
|
+
if (options.foreignKeys) bidirectionalSchema.foreignKeys = options.foreignKeys;
|
|
300
|
+
if (options.indexes) bidirectionalSchema.indexes = options.indexes;
|
|
300
301
|
}
|
|
301
302
|
Object.defineProperty(bidirectionalSchema, "~standard", {
|
|
302
303
|
value: schema["~standard"],
|
|
@@ -329,27 +330,49 @@ var LinesDB = class LinesDB {
|
|
|
329
330
|
return new LinesDB(config, dbPath);
|
|
330
331
|
}
|
|
331
332
|
/**
|
|
332
|
-
* Initialize database by loading all JSONL files
|
|
333
|
+
* Initialize database by loading all JSONL files or a specific table
|
|
333
334
|
* Uses dependency resolution to ensure foreign key references are loaded in correct order
|
|
335
|
+
* @param options Optional configuration for initialization
|
|
336
|
+
* @param options.tableName Optional table name to initialize. If not provided, initializes all tables
|
|
337
|
+
* @param options.detailedValidate If true, performs detailed validation by inserting rows one by one to catch constraint violations
|
|
338
|
+
* @param options.transform Optional transform function to apply to rows before validation (only applied to the specified tableName)
|
|
339
|
+
* @returns ValidationResult containing validation status, errors, and warnings
|
|
334
340
|
*/
|
|
335
|
-
async initialize() {
|
|
341
|
+
async initialize(options) {
|
|
342
|
+
const allErrors = [];
|
|
343
|
+
const allWarnings = [];
|
|
344
|
+
const tableName = options?.tableName;
|
|
345
|
+
const detailedValidate = options?.detailedValidate ?? false;
|
|
346
|
+
const transform = options?.transform;
|
|
336
347
|
this.tables = await DirectoryScanner.scanDirectory(this.config.dataDir);
|
|
348
|
+
const tablesToLoad = tableName ? [tableName] : Array.from(this.tables.keys());
|
|
349
|
+
for (const tableNameToLoad of tablesToLoad) if (!this.tables.has(tableNameToLoad)) throw new Error(`Table '${tableNameToLoad}' not found in directory '${this.config.dataDir}'`);
|
|
337
350
|
const loadedTables = /* @__PURE__ */ new Set();
|
|
338
351
|
const loadingTables = /* @__PURE__ */ new Set();
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
this.validationSchemas.delete(tableName);
|
|
352
|
+
const attemptedTables = /* @__PURE__ */ new Set();
|
|
353
|
+
for (const tableNameToLoad of tablesToLoad) if (!attemptedTables.has(tableNameToLoad)) {
|
|
354
|
+
const tableTransform = tableNameToLoad === tableName ? transform : void 0;
|
|
355
|
+
const { errors, warnings } = await this.loadTableWithDependencies(tableNameToLoad, loadedTables, loadingTables, attemptedTables, detailedValidate, tableTransform);
|
|
356
|
+
allErrors.push(...errors);
|
|
357
|
+
allWarnings.push(...warnings);
|
|
346
358
|
}
|
|
359
|
+
return {
|
|
360
|
+
valid: allErrors.length === 0,
|
|
361
|
+
errors: allErrors,
|
|
362
|
+
warnings: allWarnings
|
|
363
|
+
};
|
|
347
364
|
}
|
|
348
365
|
/**
|
|
349
366
|
* Load a table and its dependencies recursively
|
|
350
367
|
*/
|
|
351
|
-
async loadTableWithDependencies(tableName, loadedTables, loadingTables) {
|
|
352
|
-
|
|
368
|
+
async loadTableWithDependencies(tableName, loadedTables, loadingTables, attemptedTables, detailedValidate, transform) {
|
|
369
|
+
const errors = [];
|
|
370
|
+
const warnings = [];
|
|
371
|
+
if (attemptedTables.has(tableName)) return {
|
|
372
|
+
errors,
|
|
373
|
+
warnings
|
|
374
|
+
};
|
|
375
|
+
attemptedTables.add(tableName);
|
|
353
376
|
if (loadingTables.has(tableName)) throw new Error(`Circular dependency detected for table '${tableName}'`);
|
|
354
377
|
const tableConfig = this.tables.get(tableName);
|
|
355
378
|
if (!tableConfig) throw new Error(`Table configuration not found for '${tableName}'`);
|
|
@@ -363,21 +386,35 @@ var LinesDB = class LinesDB {
|
|
|
363
386
|
} catch {}
|
|
364
387
|
if (foreignKeys && foreignKeys.length > 0) for (const fk of foreignKeys) {
|
|
365
388
|
const referencedTable = fk.references.table;
|
|
366
|
-
if (
|
|
367
|
-
|
|
389
|
+
if (referencedTable === tableName) continue;
|
|
390
|
+
if (!attemptedTables.has(referencedTable)) if (this.tables.has(referencedTable)) {
|
|
391
|
+
const depResult = await this.loadTableWithDependencies(referencedTable, loadedTables, loadingTables, attemptedTables, detailedValidate, void 0);
|
|
392
|
+
errors.push(...depResult.errors);
|
|
393
|
+
warnings.push(...depResult.warnings);
|
|
394
|
+
} else throw new Error(`Foreign key reference to non-existent table '${referencedTable}' in table '${tableName}'`);
|
|
395
|
+
}
|
|
396
|
+
const { loaded, errors: loadErrors } = await this.loadTable(tableName, tableConfig, detailedValidate, transform);
|
|
397
|
+
errors.push(...loadErrors);
|
|
398
|
+
if (loaded) loadedTables.add(tableName);
|
|
399
|
+
else {
|
|
400
|
+
warnings.push(`Table '${tableName}' was not loaded (no data or skipped)`);
|
|
401
|
+
this.tables.delete(tableName);
|
|
368
402
|
}
|
|
369
|
-
if (await this.loadTable(tableName, tableConfig)) loadedTables.add(tableName);
|
|
370
|
-
else this.tables.delete(tableName);
|
|
371
403
|
} finally {
|
|
372
404
|
loadingTables.delete(tableName);
|
|
373
405
|
}
|
|
406
|
+
return {
|
|
407
|
+
errors,
|
|
408
|
+
warnings
|
|
409
|
+
};
|
|
374
410
|
}
|
|
375
411
|
/**
|
|
376
412
|
* Load a single table from JSONL file
|
|
377
|
-
* @returns
|
|
413
|
+
* @returns Object with loaded status and validation errors
|
|
378
414
|
*/
|
|
379
|
-
async loadTable(tableName, config) {
|
|
380
|
-
|
|
415
|
+
async loadTable(tableName, config, detailedValidate, transform) {
|
|
416
|
+
let data = await JsonlReader.read(config.jsonlPath);
|
|
417
|
+
if (transform) data = data.map((row) => transform(row));
|
|
381
418
|
let validationSchema = config.validationSchema;
|
|
382
419
|
const schemaMetadata = {};
|
|
383
420
|
if (!validationSchema) try {
|
|
@@ -411,18 +448,32 @@ var LinesDB = class LinesDB {
|
|
|
411
448
|
else throw error;
|
|
412
449
|
}
|
|
413
450
|
}
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
}
|
|
451
|
+
const validationErrorDetails = validationErrors.map((ve) => ({
|
|
452
|
+
file: config.jsonlPath,
|
|
453
|
+
tableName,
|
|
454
|
+
rowIndex: ve.rowIndex,
|
|
455
|
+
issues: ve.error.issues,
|
|
456
|
+
type: "schema"
|
|
457
|
+
}));
|
|
458
|
+
if (validationErrors.length > 0) return {
|
|
459
|
+
loaded: false,
|
|
460
|
+
errors: validationErrorDetails
|
|
461
|
+
};
|
|
421
462
|
let schema;
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
schema =
|
|
463
|
+
let inferredSchema;
|
|
464
|
+
if (validatedData.length > 0) inferredSchema = JsonlReader.inferSchema(tableName, validatedData);
|
|
465
|
+
if (config.schema) {
|
|
466
|
+
schema = config.schema;
|
|
467
|
+
if (inferredSchema) for (const inferredCol of inferredSchema.columns) {
|
|
468
|
+
const schemaCol = schema.columns.find((c) => c.name === inferredCol.name);
|
|
469
|
+
if (schemaCol && inferredCol.valueType && !schemaCol.valueType) schemaCol.valueType = inferredCol.valueType;
|
|
470
|
+
}
|
|
471
|
+
} else if (config.autoInferSchema !== false) {
|
|
472
|
+
if (validatedData.length === 0) return {
|
|
473
|
+
loaded: false,
|
|
474
|
+
errors: []
|
|
475
|
+
};
|
|
476
|
+
schema = inferredSchema;
|
|
426
477
|
} else throw new Error(`No schema provided for table ${tableName} and autoInferSchema is disabled`);
|
|
427
478
|
const biSchema = validationSchema;
|
|
428
479
|
const primaryKey = biSchema?.primaryKey || schemaMetadata.primaryKey;
|
|
@@ -439,8 +490,17 @@ var LinesDB = class LinesDB {
|
|
|
439
490
|
if (indexes) schema.indexes = indexes;
|
|
440
491
|
this.schemas.set(tableName, schema);
|
|
441
492
|
this.createTable(schema);
|
|
442
|
-
|
|
443
|
-
|
|
493
|
+
if (detailedValidate) {
|
|
494
|
+
const insertErrors = this.insertDataWithDetailedValidation(tableName, schema, validatedData, config.jsonlPath);
|
|
495
|
+
if (insertErrors.length > 0) return {
|
|
496
|
+
loaded: false,
|
|
497
|
+
errors: insertErrors
|
|
498
|
+
};
|
|
499
|
+
} else this.insertData(tableName, schema, validatedData);
|
|
500
|
+
return {
|
|
501
|
+
loaded: true,
|
|
502
|
+
errors: []
|
|
503
|
+
};
|
|
444
504
|
}
|
|
445
505
|
/**
|
|
446
506
|
* Create table in SQLite with constraints and indexes
|
|
@@ -485,18 +545,85 @@ var LinesDB = class LinesDB {
|
|
|
485
545
|
return `"${identifier.replace(/"/g, "\"\"")}"`;
|
|
486
546
|
}
|
|
487
547
|
/**
|
|
488
|
-
* Insert data into table
|
|
548
|
+
* Insert data into table using batch insert (multiple rows per SQL)
|
|
549
|
+
* SQLite has a parameter limit (default 999), so we batch rows accordingly
|
|
550
|
+
* Throws exception if any constraint violation occurs
|
|
489
551
|
*/
|
|
490
552
|
insertData(tableName, schema, data) {
|
|
553
|
+
if (data.length === 0) return;
|
|
554
|
+
const columnNames = schema.columns.map((col) => col.name);
|
|
555
|
+
const quotedColumns = columnNames.map((name) => this.quoteIdentifier(name));
|
|
556
|
+
const columnCount = columnNames.length;
|
|
557
|
+
const maxBatchSize = Math.floor(900 / columnCount);
|
|
558
|
+
const batchSize = Math.max(1, Math.min(maxBatchSize, 100));
|
|
559
|
+
for (let i = 0; i < data.length; i += batchSize) {
|
|
560
|
+
const batch = data.slice(i, i + batchSize);
|
|
561
|
+
const rowPlaceholders = columnNames.map(() => "?").join(", ");
|
|
562
|
+
const valuesPlaceholders = batch.map(() => `(${rowPlaceholders})`).join(", ");
|
|
563
|
+
const sql = `INSERT INTO ${this.quoteTableName(tableName)} (${quotedColumns.join(", ")}) VALUES ${valuesPlaceholders}`;
|
|
564
|
+
const values = [];
|
|
565
|
+
for (const row of batch) for (const col of columnNames) values.push(this.normalizeValue(row[col]));
|
|
566
|
+
this.db.prepare(sql).run(...values);
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
/**
|
|
570
|
+
* Insert data into table one row at a time with detailed error reporting
|
|
571
|
+
* This is used for validation to catch constraint violations
|
|
572
|
+
*/
|
|
573
|
+
insertDataWithDetailedValidation(tableName, schema, data, filePath) {
|
|
574
|
+
const errors = [];
|
|
491
575
|
const columnNames = schema.columns.map((col) => col.name);
|
|
492
576
|
const quotedColumns = columnNames.map((name) => this.quoteIdentifier(name));
|
|
493
577
|
const placeholders = columnNames.map(() => "?").join(", ");
|
|
494
578
|
const sql = `INSERT INTO ${this.quoteTableName(tableName)} (${quotedColumns.join(", ")}) VALUES (${placeholders})`;
|
|
495
579
|
const stmt = this.db.prepare(sql);
|
|
496
|
-
for (
|
|
497
|
-
const
|
|
498
|
-
|
|
580
|
+
for (let rowIndex = 0; rowIndex < data.length; rowIndex++) {
|
|
581
|
+
const row = data[rowIndex];
|
|
582
|
+
try {
|
|
583
|
+
const values = columnNames.map((col) => this.normalizeValue(row[col]));
|
|
584
|
+
stmt.run(...values);
|
|
585
|
+
} catch (error) {
|
|
586
|
+
const constraintError = this.analyzeConstraintError(error, filePath, tableName, rowIndex, row, schema.foreignKeys || []);
|
|
587
|
+
if (constraintError) errors.push(constraintError);
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
return errors;
|
|
591
|
+
}
|
|
592
|
+
/**
|
|
593
|
+
* Analyze constraint error and extract detailed information
|
|
594
|
+
*/
|
|
595
|
+
analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys) {
|
|
596
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
597
|
+
if (errorMessage.includes("FOREIGN KEY constraint failed")) for (const fk of foreignKeys) {
|
|
598
|
+
const fkValue = row[fk.column];
|
|
599
|
+
if (fkValue === null || fkValue === void 0) continue;
|
|
600
|
+
try {
|
|
601
|
+
const result = this.query(`SELECT COUNT(*) as count FROM ${this.quoteIdentifier(fk.references.table)} WHERE ${this.quoteIdentifier(fk.references.column)} = ?`, [this.normalizeValue(fkValue)]);
|
|
602
|
+
if (result.length > 0 && result[0].count === 0) return {
|
|
603
|
+
file,
|
|
604
|
+
tableName,
|
|
605
|
+
rowIndex,
|
|
606
|
+
issues: [],
|
|
607
|
+
type: "foreignKey",
|
|
608
|
+
foreignKeyError: {
|
|
609
|
+
column: fk.column,
|
|
610
|
+
value: fkValue,
|
|
611
|
+
referencedTable: fk.references.table,
|
|
612
|
+
referencedColumn: fk.references.column
|
|
613
|
+
}
|
|
614
|
+
};
|
|
615
|
+
} catch (_) {}
|
|
499
616
|
}
|
|
617
|
+
return {
|
|
618
|
+
file,
|
|
619
|
+
tableName,
|
|
620
|
+
rowIndex,
|
|
621
|
+
issues: [{
|
|
622
|
+
message: errorMessage,
|
|
623
|
+
path: []
|
|
624
|
+
}],
|
|
625
|
+
type: "schema"
|
|
626
|
+
};
|
|
500
627
|
}
|
|
501
628
|
/**
|
|
502
629
|
* Execute a raw SQL query
|
|
@@ -908,9 +1035,13 @@ var LinesDB = class LinesDB {
|
|
|
908
1035
|
/**
|
|
909
1036
|
* Sync database changes back to JSONL files
|
|
910
1037
|
* Uses backward transformation when available
|
|
1038
|
+
* @param tableName Optional table name to sync. If not provided, syncs all loaded tables
|
|
911
1039
|
*/
|
|
912
|
-
async sync() {
|
|
913
|
-
|
|
1040
|
+
async sync(tableName) {
|
|
1041
|
+
if (tableName) {
|
|
1042
|
+
if (!this.schemas.has(tableName)) throw new Error(`Table '${tableName}' is not loaded`);
|
|
1043
|
+
await this.syncTable(tableName);
|
|
1044
|
+
} else for (const [name] of this.schemas) await this.syncTable(name);
|
|
914
1045
|
}
|
|
915
1046
|
/**
|
|
916
1047
|
* Execute a function within a transaction
|
|
@@ -1057,293 +1188,31 @@ function sanitizeIdentifier(value) {
|
|
|
1057
1188
|
return value.replace(/[^A-Za-z0-9_$]/g, "");
|
|
1058
1189
|
}
|
|
1059
1190
|
|
|
1060
|
-
//#endregion
|
|
1061
|
-
//#region src/validator.ts
|
|
1062
|
-
var Validator = class {
|
|
1063
|
-
path;
|
|
1064
|
-
projectRoot;
|
|
1065
|
-
constructor(options) {
|
|
1066
|
-
this.path = options.path;
|
|
1067
|
-
this.projectRoot = options.projectRoot || process.cwd();
|
|
1068
|
-
}
|
|
1069
|
-
/**
|
|
1070
|
-
* Validate JSONL file(s)
|
|
1071
|
-
*/
|
|
1072
|
-
async validate() {
|
|
1073
|
-
const fullPath = this.path.startsWith("/") ? this.path : (0, node_path.join)(this.projectRoot, this.path);
|
|
1074
|
-
const stats = await (0, node_fs_promises.stat)(fullPath);
|
|
1075
|
-
if (stats.isDirectory()) return this.validateDirectory(fullPath);
|
|
1076
|
-
else if (stats.isFile() && fullPath.endsWith(".jsonl")) return this.validateFile(fullPath);
|
|
1077
|
-
else throw new Error(`Invalid path: ${this.path}. Must be a directory or .jsonl file.`);
|
|
1078
|
-
}
|
|
1079
|
-
/**
|
|
1080
|
-
* Validate all JSONL files in a directory
|
|
1081
|
-
*/
|
|
1082
|
-
async validateDirectory(dirPath) {
|
|
1083
|
-
const jsonlFiles = (await (0, node_fs_promises.readdir)(dirPath, { withFileTypes: true })).filter((entry) => entry.isFile() && entry.name.endsWith(".jsonl")).map((entry) => (0, node_path.join)(dirPath, entry.name));
|
|
1084
|
-
if (jsonlFiles.length === 0) throw new Error(`No JSONL files found in directory: ${dirPath}`);
|
|
1085
|
-
const allErrors = [];
|
|
1086
|
-
const allWarnings = [];
|
|
1087
|
-
const filesWithSchema = [];
|
|
1088
|
-
for (const file of jsonlFiles) if (await SchemaLoader.hasSchema(file)) filesWithSchema.push(file);
|
|
1089
|
-
else {
|
|
1090
|
-
const tableName = (0, node_path.basename)(file, ".jsonl");
|
|
1091
|
-
allWarnings.push(`Skipping validation for '${tableName}': schema file not found`);
|
|
1092
|
-
}
|
|
1093
|
-
for (const file of filesWithSchema) {
|
|
1094
|
-
const result = await this.validateFile(file);
|
|
1095
|
-
allErrors.push(...result.errors);
|
|
1096
|
-
allWarnings.push(...result.warnings);
|
|
1097
|
-
}
|
|
1098
|
-
if (filesWithSchema.length > 0 && allErrors.length === 0) {
|
|
1099
|
-
const dbErrors = await this.validateWithDatabase(dirPath, filesWithSchema);
|
|
1100
|
-
allErrors.push(...dbErrors);
|
|
1101
|
-
}
|
|
1102
|
-
return {
|
|
1103
|
-
valid: allErrors.length === 0,
|
|
1104
|
-
errors: allErrors,
|
|
1105
|
-
warnings: allWarnings
|
|
1106
|
-
};
|
|
1107
|
-
}
|
|
1108
|
-
/**
|
|
1109
|
-
* Validate by loading data into database one row at a time
|
|
1110
|
-
* This catches constraint violations and extracts detailed error information
|
|
1111
|
-
*/
|
|
1112
|
-
async validateWithDatabase(dirPath, jsonlFiles) {
|
|
1113
|
-
const errors = [];
|
|
1114
|
-
try {
|
|
1115
|
-
const db = LinesDB.create({ dataDir: ":memory:" });
|
|
1116
|
-
for (const file of jsonlFiles) {
|
|
1117
|
-
const tableName = (0, node_path.basename)(file, ".jsonl");
|
|
1118
|
-
const data = await JsonlReader.read(file);
|
|
1119
|
-
let schema;
|
|
1120
|
-
let foreignKeys = [];
|
|
1121
|
-
let indexes = [];
|
|
1122
|
-
let primaryKey;
|
|
1123
|
-
try {
|
|
1124
|
-
schema = await SchemaLoader.loadSchema(file);
|
|
1125
|
-
const { pathToFileURL: pathToFileURL$1 } = await import("node:url");
|
|
1126
|
-
const schemaModule = await import(`${pathToFileURL$1(file.replace(".jsonl", ".schema.ts")).href}?t=${Date.now()}`);
|
|
1127
|
-
const schemaExport = schemaModule.schema || schemaModule.default;
|
|
1128
|
-
if (schemaExport?.foreignKeys) foreignKeys = schemaExport.foreignKeys;
|
|
1129
|
-
if (schemaExport?.indexes) indexes = schemaExport.indexes;
|
|
1130
|
-
if (schemaExport?.primaryKey) primaryKey = schemaExport.primaryKey;
|
|
1131
|
-
} catch (_error) {
|
|
1132
|
-
continue;
|
|
1133
|
-
}
|
|
1134
|
-
try {
|
|
1135
|
-
const tableSchema = this.createTableSchema(tableName, data, schema, foreignKeys, indexes, primaryKey);
|
|
1136
|
-
this.createTableInDb(db, tableSchema);
|
|
1137
|
-
for (let rowIndex = 0; rowIndex < data.length; rowIndex++) {
|
|
1138
|
-
const row = data[rowIndex];
|
|
1139
|
-
try {
|
|
1140
|
-
this.insertRowIntoDb(db, tableName, tableSchema, row);
|
|
1141
|
-
} catch (error) {
|
|
1142
|
-
const constraintError = this.analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys, db);
|
|
1143
|
-
if (constraintError) errors.push(constraintError);
|
|
1144
|
-
}
|
|
1145
|
-
}
|
|
1146
|
-
} catch (_error) {
|
|
1147
|
-
continue;
|
|
1148
|
-
}
|
|
1149
|
-
}
|
|
1150
|
-
await db.close();
|
|
1151
|
-
} catch (error) {
|
|
1152
|
-
errors.push({
|
|
1153
|
-
file: dirPath,
|
|
1154
|
-
tableName: "database",
|
|
1155
|
-
rowIndex: 0,
|
|
1156
|
-
issues: [{
|
|
1157
|
-
message: `Database initialization failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
1158
|
-
path: []
|
|
1159
|
-
}],
|
|
1160
|
-
type: "schema"
|
|
1161
|
-
});
|
|
1162
|
-
}
|
|
1163
|
-
return errors;
|
|
1164
|
-
}
|
|
1165
|
-
/**
|
|
1166
|
-
* Create table schema from data and validation schema
|
|
1167
|
-
*/
|
|
1168
|
-
createTableSchema(tableName, data, validationSchema, foreignKeys, indexes, primaryKey) {
|
|
1169
|
-
if (data.length === 0) throw new Error(`No data found in ${tableName}`);
|
|
1170
|
-
const schema = JsonlReader.inferSchema(tableName, data);
|
|
1171
|
-
if (primaryKey) {
|
|
1172
|
-
const pkColumn = schema.columns.find((col) => col.name === primaryKey);
|
|
1173
|
-
if (pkColumn) pkColumn.primaryKey = true;
|
|
1174
|
-
} else if (!schema.columns.some((col) => col.primaryKey)) {
|
|
1175
|
-
const idColumn = schema.columns.find((c) => c.name === "id");
|
|
1176
|
-
if (idColumn) idColumn.primaryKey = true;
|
|
1177
|
-
}
|
|
1178
|
-
if (foreignKeys && foreignKeys.length > 0) schema.foreignKeys = foreignKeys;
|
|
1179
|
-
if (indexes && indexes.length > 0) schema.indexes = indexes;
|
|
1180
|
-
return schema;
|
|
1181
|
-
}
|
|
1182
|
-
/**
|
|
1183
|
-
* Create table in database
|
|
1184
|
-
*/
|
|
1185
|
-
createTableInDb(db, schema) {
|
|
1186
|
-
const columns = schema.columns.map((col) => {
|
|
1187
|
-
let colDef = `${this.quoteIdentifier(col.name)} ${col.type.toUpperCase()}`;
|
|
1188
|
-
if (col.primaryKey) colDef += " PRIMARY KEY";
|
|
1189
|
-
return colDef;
|
|
1190
|
-
});
|
|
1191
|
-
if (schema.foreignKeys && schema.foreignKeys.length > 0) for (const fk of schema.foreignKeys) columns.push(`FOREIGN KEY (${this.quoteIdentifier(fk.column)}) REFERENCES ${this.quoteIdentifier(fk.references.table)}(${this.quoteIdentifier(fk.references.column)})`);
|
|
1192
|
-
const sql = `CREATE TABLE IF NOT EXISTS ${this.quoteIdentifier(schema.name)} (${columns.join(", ")})`;
|
|
1193
|
-
db.execute(sql);
|
|
1194
|
-
if (schema.indexes && schema.indexes.length > 0) for (const index of schema.indexes) {
|
|
1195
|
-
const indexName = index.name || `idx_${schema.name}_${index.columns.join("_")}`;
|
|
1196
|
-
const uniqueKeyword = index.unique ? "UNIQUE" : "";
|
|
1197
|
-
const indexColumns = index.columns.map((col) => this.quoteIdentifier(col)).join(", ");
|
|
1198
|
-
const indexSql = `CREATE ${uniqueKeyword} INDEX IF NOT EXISTS ${this.quoteIdentifier(indexName)} ON ${this.quoteIdentifier(schema.name)} (${indexColumns})`;
|
|
1199
|
-
db.execute(indexSql);
|
|
1200
|
-
}
|
|
1201
|
-
}
|
|
1202
|
-
/**
|
|
1203
|
-
* Insert a row into database
|
|
1204
|
-
*/
|
|
1205
|
-
insertRowIntoDb(db, tableName, schema, row) {
|
|
1206
|
-
const columnNames = schema.columns.map((col) => col.name);
|
|
1207
|
-
const quotedColumns = columnNames.map((name) => this.quoteIdentifier(name));
|
|
1208
|
-
const placeholders = columnNames.map(() => "?").join(", ");
|
|
1209
|
-
const sql = `INSERT INTO ${this.quoteIdentifier(tableName)} (${quotedColumns.join(", ")}) VALUES (${placeholders})`;
|
|
1210
|
-
const values = columnNames.map((col) => {
|
|
1211
|
-
const value = row[col];
|
|
1212
|
-
if (value === null || value === void 0) return null;
|
|
1213
|
-
if (typeof value === "object") return JSON.stringify(value);
|
|
1214
|
-
if (typeof value === "boolean") return value ? 1 : 0;
|
|
1215
|
-
return value;
|
|
1216
|
-
});
|
|
1217
|
-
db.execute(sql, values);
|
|
1218
|
-
}
|
|
1219
|
-
/**
|
|
1220
|
-
* Analyze constraint error and extract detailed information
|
|
1221
|
-
*/
|
|
1222
|
-
analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys, db) {
|
|
1223
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
1224
|
-
if (errorMessage.includes("FOREIGN KEY constraint failed")) for (const fk of foreignKeys) {
|
|
1225
|
-
const fkValue = row[fk.column];
|
|
1226
|
-
if (fkValue === null || fkValue === void 0) continue;
|
|
1227
|
-
try {
|
|
1228
|
-
const result = db.query(`SELECT COUNT(*) as count FROM ${this.quoteIdentifier(fk.references.table)} WHERE ${this.quoteIdentifier(fk.references.column)} = ?`, [fkValue]);
|
|
1229
|
-
if (result.length > 0 && result[0].count === 0) return {
|
|
1230
|
-
file,
|
|
1231
|
-
tableName,
|
|
1232
|
-
rowIndex,
|
|
1233
|
-
issues: [],
|
|
1234
|
-
type: "foreignKey",
|
|
1235
|
-
foreignKeyError: {
|
|
1236
|
-
column: fk.column,
|
|
1237
|
-
value: fkValue,
|
|
1238
|
-
referencedTable: fk.references.table,
|
|
1239
|
-
referencedColumn: fk.references.column
|
|
1240
|
-
}
|
|
1241
|
-
};
|
|
1242
|
-
} catch (_) {}
|
|
1243
|
-
}
|
|
1244
|
-
return {
|
|
1245
|
-
file,
|
|
1246
|
-
tableName,
|
|
1247
|
-
rowIndex,
|
|
1248
|
-
issues: [{
|
|
1249
|
-
message: errorMessage,
|
|
1250
|
-
path: []
|
|
1251
|
-
}],
|
|
1252
|
-
type: "schema"
|
|
1253
|
-
};
|
|
1254
|
-
}
|
|
1255
|
-
/**
|
|
1256
|
-
* Quote SQL identifier
|
|
1257
|
-
*/
|
|
1258
|
-
quoteIdentifier(name) {
|
|
1259
|
-
return `"${name.replace(/"/g, "\"\"")}"`;
|
|
1260
|
-
}
|
|
1261
|
-
/**
|
|
1262
|
-
* Validate a single JSONL file
|
|
1263
|
-
*/
|
|
1264
|
-
async validateFile(filePath) {
|
|
1265
|
-
const tableName = (0, node_path.basename)(filePath, ".jsonl");
|
|
1266
|
-
const data = await JsonlReader.read(filePath);
|
|
1267
|
-
const schema = await SchemaLoader.loadSchema(filePath);
|
|
1268
|
-
const errors = [];
|
|
1269
|
-
for (let i = 0; i < data.length; i++) {
|
|
1270
|
-
const row = data[i];
|
|
1271
|
-
const result = schema["~standard"].validate(row);
|
|
1272
|
-
if (result instanceof Promise) throw new Error("Asynchronous validation is not supported.");
|
|
1273
|
-
if (result.issues && result.issues.length > 0) errors.push({
|
|
1274
|
-
file: filePath,
|
|
1275
|
-
tableName,
|
|
1276
|
-
rowIndex: i,
|
|
1277
|
-
issues: result.issues,
|
|
1278
|
-
type: "schema"
|
|
1279
|
-
});
|
|
1280
|
-
}
|
|
1281
|
-
if (errors.length === 0) {
|
|
1282
|
-
const dirPath = (0, node_path.dirname)(filePath);
|
|
1283
|
-
const allJsonlFiles = (await (0, node_fs_promises.readdir)(dirPath, { withFileTypes: true })).filter((entry) => entry.isFile() && entry.name.endsWith(".jsonl")).map((entry) => (0, node_path.join)(dirPath, entry.name));
|
|
1284
|
-
const dbErrors = await this.validateWithDatabase(dirPath, allJsonlFiles);
|
|
1285
|
-
errors.push(...dbErrors.filter((e) => e.file === filePath));
|
|
1286
|
-
}
|
|
1287
|
-
return {
|
|
1288
|
-
valid: errors.length === 0,
|
|
1289
|
-
errors,
|
|
1290
|
-
warnings: []
|
|
1291
|
-
};
|
|
1292
|
-
}
|
|
1293
|
-
};
|
|
1294
|
-
|
|
1295
1191
|
//#endregion
|
|
1296
1192
|
//#region src/jsonl-migration.ts
|
|
1297
1193
|
/**
|
|
1298
1194
|
* Validate a table by temporarily supplying in-memory rows while reusing the existing LinesDB validation pipeline.
|
|
1299
|
-
* If validation fails,
|
|
1195
|
+
* If validation fails, throws an error with validation details.
|
|
1300
1196
|
*/
|
|
1301
1197
|
async function ensureTableRowsValid(options) {
|
|
1302
|
-
console.log("[ensureTableRowsValid] START");
|
|
1303
|
-
console.log("[ensureTableRowsValid] dataDir:", options.dataDir);
|
|
1304
|
-
console.log("[ensureTableRowsValid] tableName:", options.tableName);
|
|
1305
|
-
console.log("[ensureTableRowsValid] rows count:", options.rows.length);
|
|
1306
1198
|
const tablePath = (0, node_path.join)(options.dataDir, `${options.tableName}.jsonl`);
|
|
1307
1199
|
const overrides = new Map([[tablePath, options.rows]]);
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
}
|
|
1320
|
-
};
|
|
1321
|
-
try {
|
|
1322
|
-
console.log("[ensureTableRowsValid] Calling JsonlReader.withOverrides");
|
|
1323
|
-
await JsonlReader.withOverrides(overrides, async () => {
|
|
1324
|
-
console.log("[ensureTableRowsValid] Inside withOverrides callback");
|
|
1325
|
-
const db = LinesDB.create({ dataDir: options.dataDir });
|
|
1326
|
-
console.log("[ensureTableRowsValid] LinesDB created");
|
|
1327
|
-
try {
|
|
1328
|
-
console.log("[ensureTableRowsValid] Calling db.initialize()");
|
|
1329
|
-
await db.initialize();
|
|
1330
|
-
console.log("[ensureTableRowsValid] db.initialize() completed");
|
|
1331
|
-
} finally {
|
|
1332
|
-
console.log("[ensureTableRowsValid] Calling db.close()");
|
|
1333
|
-
await db.close();
|
|
1200
|
+
await JsonlReader.withOverrides(overrides, async () => {
|
|
1201
|
+
const db = LinesDB.create({ dataDir: options.dataDir });
|
|
1202
|
+
try {
|
|
1203
|
+
const result = await db.initialize({ tableName: options.tableName });
|
|
1204
|
+
if (!result.valid) {
|
|
1205
|
+
const errorCount = result.errors.length;
|
|
1206
|
+
const errorDetails = result.errors.map((e) => {
|
|
1207
|
+
const issueMessages = e.issues.map((issue) => issue.message).join(", ");
|
|
1208
|
+
return ` Row ${e.rowIndex}: ${issueMessages}`;
|
|
1209
|
+
}).join("\n");
|
|
1210
|
+
throw new Error(`Validation failed for table '${options.tableName}' (${errorCount} error(s)):\n${errorDetails}`);
|
|
1334
1211
|
}
|
|
1335
|
-
}
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
}
|
|
1340
|
-
console.log("[ensureTableRowsValid] Warnings captured:", warnMessages.length);
|
|
1341
|
-
console.log("[ensureTableRowsValid] capturedError:", capturedError ? "YES" : "NO");
|
|
1342
|
-
if (capturedError) {
|
|
1343
|
-
console.log("[ensureTableRowsValid] Throwing captured error");
|
|
1344
|
-
throw capturedError;
|
|
1345
|
-
}
|
|
1346
|
-
console.log("[ensureTableRowsValid] END (success)");
|
|
1212
|
+
} finally {
|
|
1213
|
+
await db.close();
|
|
1214
|
+
}
|
|
1215
|
+
});
|
|
1347
1216
|
}
|
|
1348
1217
|
|
|
1349
1218
|
//#endregion
|
|
@@ -1354,7 +1223,6 @@ exports.LinesDB = LinesDB;
|
|
|
1354
1223
|
exports.RUNTIME = RUNTIME;
|
|
1355
1224
|
exports.SchemaLoader = SchemaLoader;
|
|
1356
1225
|
exports.TypeGenerator = TypeGenerator;
|
|
1357
|
-
exports.Validator = Validator;
|
|
1358
1226
|
exports.defineSchema = defineSchema;
|
|
1359
1227
|
exports.detectRuntime = detectRuntime;
|
|
1360
1228
|
exports.ensureTableRowsValid = ensureTableRowsValid;
|