@toiroakr/lines-db 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +22 -0
- package/bin/cli.js +250 -134
- package/dist/index.cjs +169 -53
- package/dist/index.d.cts +31 -10
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.ts +31 -10
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +169 -53
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/database.ts +32 -15
- package/src/schema.ts +6 -6
- package/src/types.ts +2 -2
- package/src/validator.test.ts +140 -0
- package/src/validator.ts +272 -57
package/dist/index.js
CHANGED
|
@@ -261,9 +261,9 @@ var DirectoryScanner = class {
|
|
|
261
261
|
* const schema = defineSchema(
|
|
262
262
|
* v.object({ id: v.number(), customerId: v.number() }),
|
|
263
263
|
* {
|
|
264
|
-
* primaryKey:
|
|
264
|
+
* primaryKey: 'id',
|
|
265
265
|
* foreignKeys: [
|
|
266
|
-
* {
|
|
266
|
+
* { column: 'customerId', references: { table: 'users', column: 'id' } }
|
|
267
267
|
* ]
|
|
268
268
|
* }
|
|
269
269
|
* );
|
|
@@ -337,7 +337,8 @@ var LinesDB = class LinesDB {
|
|
|
337
337
|
let foreignKeys;
|
|
338
338
|
try {
|
|
339
339
|
const { pathToFileURL: pathToFileURL$1 } = await import("node:url");
|
|
340
|
-
|
|
340
|
+
const schemaModule = await import(`${pathToFileURL$1(tableConfig.jsonlPath.replace(".jsonl", ".schema.ts")).href}?t=${Date.now()}`);
|
|
341
|
+
foreignKeys = (schemaModule.schema || schemaModule.default)?.foreignKeys || schemaModule.foreignKeys;
|
|
341
342
|
} catch {}
|
|
342
343
|
if (foreignKeys && foreignKeys.length > 0) for (const fk of foreignKeys) {
|
|
343
344
|
const referencedTable = fk.references.table;
|
|
@@ -364,9 +365,13 @@ var LinesDB = class LinesDB {
|
|
|
364
365
|
if (!config.validationSchema) try {
|
|
365
366
|
const { pathToFileURL: pathToFileURL$1 } = await import("node:url");
|
|
366
367
|
const schemaModule = await import(`${pathToFileURL$1(config.jsonlPath.replace(".jsonl", ".schema.ts")).href}?t=${Date.now()}`);
|
|
367
|
-
|
|
368
|
-
if (
|
|
369
|
-
if (schemaModule.
|
|
368
|
+
const schemaExport = schemaModule.schema || schemaModule.default;
|
|
369
|
+
if (schemaExport?.primaryKey) schemaMetadata.primaryKey = schemaExport.primaryKey;
|
|
370
|
+
else if (schemaModule.primaryKey) schemaMetadata.primaryKey = schemaModule.primaryKey;
|
|
371
|
+
if (schemaExport?.foreignKeys) schemaMetadata.foreignKeys = schemaExport.foreignKeys;
|
|
372
|
+
else if (schemaModule.foreignKeys) schemaMetadata.foreignKeys = schemaModule.foreignKeys;
|
|
373
|
+
if (schemaExport?.indexes) schemaMetadata.indexes = schemaExport.indexes;
|
|
374
|
+
else if (schemaModule.indexes) schemaMetadata.indexes = schemaModule.indexes;
|
|
370
375
|
} catch (_error) {}
|
|
371
376
|
this.validationSchemas.set(tableName, validationSchema);
|
|
372
377
|
const validationErrors = [];
|
|
@@ -402,9 +407,12 @@ var LinesDB = class LinesDB {
|
|
|
402
407
|
const primaryKey = biSchema?.primaryKey || schemaMetadata.primaryKey;
|
|
403
408
|
const foreignKeys = biSchema?.foreignKeys || schemaMetadata.foreignKeys;
|
|
404
409
|
const indexes = biSchema?.indexes || schemaMetadata.indexes;
|
|
405
|
-
if (primaryKey && !schema.columns.some((col) => col.primaryKey))
|
|
406
|
-
const col = schema.columns.find((c) => c.name ===
|
|
410
|
+
if (primaryKey && !schema.columns.some((col) => col.primaryKey)) {
|
|
411
|
+
const col = schema.columns.find((c) => c.name === primaryKey);
|
|
407
412
|
if (col) col.primaryKey = true;
|
|
413
|
+
} else if (!primaryKey && !schema.columns.some((col) => col.primaryKey)) {
|
|
414
|
+
const idColumn = schema.columns.find((c) => c.name === "id");
|
|
415
|
+
if (idColumn) idColumn.primaryKey = true;
|
|
408
416
|
}
|
|
409
417
|
if (foreignKeys) schema.foreignKeys = foreignKeys;
|
|
410
418
|
if (indexes) schema.indexes = indexes;
|
|
@@ -428,7 +436,7 @@ var LinesDB = class LinesDB {
|
|
|
428
436
|
});
|
|
429
437
|
const foreignKeyDefs = [];
|
|
430
438
|
if (schema.foreignKeys && schema.foreignKeys.length > 0) for (const fk of schema.foreignKeys) {
|
|
431
|
-
const fkParts = [`FOREIGN KEY (${
|
|
439
|
+
const fkParts = [`FOREIGN KEY (${this.quoteIdentifier(fk.column)})`, `REFERENCES ${this.quoteTableName(fk.references.table)}(${this.quoteIdentifier(fk.references.column)})`];
|
|
432
440
|
if (fk.onDelete) fkParts.push(`ON DELETE ${fk.onDelete}`);
|
|
433
441
|
if (fk.onUpdate) fkParts.push(`ON UPDATE ${fk.onUpdate}`);
|
|
434
442
|
foreignKeyDefs.push(fkParts.join(" "));
|
|
@@ -1066,9 +1074,9 @@ var Validator = class {
|
|
|
1066
1074
|
allErrors.push(...result.errors);
|
|
1067
1075
|
allWarnings.push(...result.warnings);
|
|
1068
1076
|
}
|
|
1069
|
-
if (filesWithSchema.length > 0) {
|
|
1070
|
-
const
|
|
1071
|
-
allErrors.push(...
|
|
1077
|
+
if (filesWithSchema.length > 0 && allErrors.length === 0) {
|
|
1078
|
+
const dbErrors = await this.validateWithDatabase(dirPath, filesWithSchema);
|
|
1079
|
+
allErrors.push(...dbErrors);
|
|
1072
1080
|
}
|
|
1073
1081
|
return {
|
|
1074
1082
|
valid: allErrors.length === 0,
|
|
@@ -1077,57 +1085,159 @@ var Validator = class {
|
|
|
1077
1085
|
};
|
|
1078
1086
|
}
|
|
1079
1087
|
/**
|
|
1080
|
-
* Validate
|
|
1088
|
+
* Validate by loading data into database one row at a time
|
|
1089
|
+
* This catches constraint violations and extracts detailed error information
|
|
1081
1090
|
*/
|
|
1082
|
-
async
|
|
1091
|
+
async validateWithDatabase(dirPath, jsonlFiles) {
|
|
1083
1092
|
const errors = [];
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
for (const refRow of referencedData) {
|
|
1104
|
-
const keyValues = fk.references.columns.map((col) => refRow[col]);
|
|
1105
|
-
const compositeKey = JSON.stringify(keyValues);
|
|
1106
|
-
referencedValues.add(compositeKey);
|
|
1093
|
+
try {
|
|
1094
|
+
const db = LinesDB.create({ dataDir: ":memory:" });
|
|
1095
|
+
for (const file of jsonlFiles) {
|
|
1096
|
+
const tableName = basename(file, ".jsonl");
|
|
1097
|
+
const data = await JsonlReader.read(file);
|
|
1098
|
+
let schema;
|
|
1099
|
+
let foreignKeys = [];
|
|
1100
|
+
let indexes = [];
|
|
1101
|
+
let primaryKey;
|
|
1102
|
+
try {
|
|
1103
|
+
schema = await SchemaLoader.loadSchema(file);
|
|
1104
|
+
const { pathToFileURL: pathToFileURL$1 } = await import("node:url");
|
|
1105
|
+
const schemaModule = await import(`${pathToFileURL$1(file.replace(".jsonl", ".schema.ts")).href}?t=${Date.now()}`);
|
|
1106
|
+
const schemaExport = schemaModule.schema || schemaModule.default;
|
|
1107
|
+
if (schemaExport?.foreignKeys) foreignKeys = schemaExport.foreignKeys;
|
|
1108
|
+
if (schemaExport?.indexes) indexes = schemaExport.indexes;
|
|
1109
|
+
if (schemaExport?.primaryKey) primaryKey = schemaExport.primaryKey;
|
|
1110
|
+
} catch (_error) {
|
|
1111
|
+
continue;
|
|
1107
1112
|
}
|
|
1108
|
-
|
|
1109
|
-
const
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
foreignKeyError: {
|
|
1119
|
-
column: fk.columns.join(", "),
|
|
1120
|
-
value: foreignKeyValues.length === 1 ? foreignKeyValues[0] : foreignKeyValues,
|
|
1121
|
-
referencedTable,
|
|
1122
|
-
referencedColumn: fk.references.columns.join(", ")
|
|
1113
|
+
try {
|
|
1114
|
+
const tableSchema = this.createTableSchema(tableName, data, schema, foreignKeys, indexes, primaryKey);
|
|
1115
|
+
this.createTableInDb(db, tableSchema);
|
|
1116
|
+
for (let rowIndex = 0; rowIndex < data.length; rowIndex++) {
|
|
1117
|
+
const row = data[rowIndex];
|
|
1118
|
+
try {
|
|
1119
|
+
this.insertRowIntoDb(db, tableName, tableSchema, row);
|
|
1120
|
+
} catch (error) {
|
|
1121
|
+
const constraintError = this.analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys, db);
|
|
1122
|
+
if (constraintError) errors.push(constraintError);
|
|
1123
1123
|
}
|
|
1124
|
-
}
|
|
1124
|
+
}
|
|
1125
|
+
} catch (_error) {
|
|
1126
|
+
continue;
|
|
1125
1127
|
}
|
|
1126
1128
|
}
|
|
1129
|
+
await db.close();
|
|
1130
|
+
} catch (error) {
|
|
1131
|
+
errors.push({
|
|
1132
|
+
file: dirPath,
|
|
1133
|
+
tableName: "database",
|
|
1134
|
+
rowIndex: 0,
|
|
1135
|
+
issues: [{
|
|
1136
|
+
message: `Database initialization failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
1137
|
+
path: []
|
|
1138
|
+
}],
|
|
1139
|
+
type: "schema"
|
|
1140
|
+
});
|
|
1127
1141
|
}
|
|
1128
1142
|
return errors;
|
|
1129
1143
|
}
|
|
1130
1144
|
/**
|
|
1145
|
+
* Create table schema from data and validation schema
|
|
1146
|
+
*/
|
|
1147
|
+
createTableSchema(tableName, data, validationSchema, foreignKeys, indexes, primaryKey) {
|
|
1148
|
+
if (data.length === 0) throw new Error(`No data found in ${tableName}`);
|
|
1149
|
+
const schema = JsonlReader.inferSchema(tableName, data);
|
|
1150
|
+
if (primaryKey) {
|
|
1151
|
+
const pkColumn = schema.columns.find((col) => col.name === primaryKey);
|
|
1152
|
+
if (pkColumn) pkColumn.primaryKey = true;
|
|
1153
|
+
} else if (!schema.columns.some((col) => col.primaryKey)) {
|
|
1154
|
+
const idColumn = schema.columns.find((c) => c.name === "id");
|
|
1155
|
+
if (idColumn) idColumn.primaryKey = true;
|
|
1156
|
+
}
|
|
1157
|
+
if (foreignKeys && foreignKeys.length > 0) schema.foreignKeys = foreignKeys;
|
|
1158
|
+
if (indexes && indexes.length > 0) schema.indexes = indexes;
|
|
1159
|
+
return schema;
|
|
1160
|
+
}
|
|
1161
|
+
/**
|
|
1162
|
+
* Create table in database
|
|
1163
|
+
*/
|
|
1164
|
+
createTableInDb(db, schema) {
|
|
1165
|
+
const columns = schema.columns.map((col) => {
|
|
1166
|
+
let colDef = `${this.quoteIdentifier(col.name)} ${col.type.toUpperCase()}`;
|
|
1167
|
+
if (col.primaryKey) colDef += " PRIMARY KEY";
|
|
1168
|
+
return colDef;
|
|
1169
|
+
});
|
|
1170
|
+
if (schema.foreignKeys && schema.foreignKeys.length > 0) for (const fk of schema.foreignKeys) columns.push(`FOREIGN KEY (${this.quoteIdentifier(fk.column)}) REFERENCES ${this.quoteIdentifier(fk.references.table)}(${this.quoteIdentifier(fk.references.column)})`);
|
|
1171
|
+
const sql = `CREATE TABLE IF NOT EXISTS ${this.quoteIdentifier(schema.name)} (${columns.join(", ")})`;
|
|
1172
|
+
db.execute(sql);
|
|
1173
|
+
if (schema.indexes && schema.indexes.length > 0) for (const index of schema.indexes) {
|
|
1174
|
+
const indexName = index.name || `idx_${schema.name}_${index.columns.join("_")}`;
|
|
1175
|
+
const uniqueKeyword = index.unique ? "UNIQUE" : "";
|
|
1176
|
+
const indexColumns = index.columns.map((col) => this.quoteIdentifier(col)).join(", ");
|
|
1177
|
+
const indexSql = `CREATE ${uniqueKeyword} INDEX IF NOT EXISTS ${this.quoteIdentifier(indexName)} ON ${this.quoteIdentifier(schema.name)} (${indexColumns})`;
|
|
1178
|
+
db.execute(indexSql);
|
|
1179
|
+
}
|
|
1180
|
+
}
|
|
1181
|
+
/**
|
|
1182
|
+
* Insert a row into database
|
|
1183
|
+
*/
|
|
1184
|
+
insertRowIntoDb(db, tableName, schema, row) {
|
|
1185
|
+
const columnNames = schema.columns.map((col) => col.name);
|
|
1186
|
+
const quotedColumns = columnNames.map((name) => this.quoteIdentifier(name));
|
|
1187
|
+
const placeholders = columnNames.map(() => "?").join(", ");
|
|
1188
|
+
const sql = `INSERT INTO ${this.quoteIdentifier(tableName)} (${quotedColumns.join(", ")}) VALUES (${placeholders})`;
|
|
1189
|
+
const values = columnNames.map((col) => {
|
|
1190
|
+
const value = row[col];
|
|
1191
|
+
if (value === null || value === void 0) return null;
|
|
1192
|
+
if (typeof value === "object") return JSON.stringify(value);
|
|
1193
|
+
if (typeof value === "boolean") return value ? 1 : 0;
|
|
1194
|
+
return value;
|
|
1195
|
+
});
|
|
1196
|
+
db.execute(sql, values);
|
|
1197
|
+
}
|
|
1198
|
+
/**
|
|
1199
|
+
* Analyze constraint error and extract detailed information
|
|
1200
|
+
*/
|
|
1201
|
+
analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys, db) {
|
|
1202
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
1203
|
+
if (errorMessage.includes("FOREIGN KEY constraint failed")) for (const fk of foreignKeys) {
|
|
1204
|
+
const fkValue = row[fk.column];
|
|
1205
|
+
if (fkValue === null || fkValue === void 0) continue;
|
|
1206
|
+
try {
|
|
1207
|
+
const result = db.query(`SELECT COUNT(*) as count FROM ${this.quoteIdentifier(fk.references.table)} WHERE ${this.quoteIdentifier(fk.references.column)} = ?`, [fkValue]);
|
|
1208
|
+
if (result.length > 0 && result[0].count === 0) return {
|
|
1209
|
+
file,
|
|
1210
|
+
tableName,
|
|
1211
|
+
rowIndex,
|
|
1212
|
+
issues: [],
|
|
1213
|
+
type: "foreignKey",
|
|
1214
|
+
foreignKeyError: {
|
|
1215
|
+
column: fk.column,
|
|
1216
|
+
value: fkValue,
|
|
1217
|
+
referencedTable: fk.references.table,
|
|
1218
|
+
referencedColumn: fk.references.column
|
|
1219
|
+
}
|
|
1220
|
+
};
|
|
1221
|
+
} catch (_) {}
|
|
1222
|
+
}
|
|
1223
|
+
return {
|
|
1224
|
+
file,
|
|
1225
|
+
tableName,
|
|
1226
|
+
rowIndex,
|
|
1227
|
+
issues: [{
|
|
1228
|
+
message: errorMessage,
|
|
1229
|
+
path: []
|
|
1230
|
+
}],
|
|
1231
|
+
type: "schema"
|
|
1232
|
+
};
|
|
1233
|
+
}
|
|
1234
|
+
/**
|
|
1235
|
+
* Quote SQL identifier
|
|
1236
|
+
*/
|
|
1237
|
+
quoteIdentifier(name) {
|
|
1238
|
+
return `"${name.replace(/"/g, "\"\"")}"`;
|
|
1239
|
+
}
|
|
1240
|
+
/**
|
|
1131
1241
|
* Validate a single JSONL file
|
|
1132
1242
|
*/
|
|
1133
1243
|
async validateFile(filePath) {
|
|
@@ -1147,6 +1257,12 @@ var Validator = class {
|
|
|
1147
1257
|
type: "schema"
|
|
1148
1258
|
});
|
|
1149
1259
|
}
|
|
1260
|
+
if (errors.length === 0) {
|
|
1261
|
+
const dirPath = dirname(filePath);
|
|
1262
|
+
const allJsonlFiles = (await readdir(dirPath, { withFileTypes: true })).filter((entry) => entry.isFile() && entry.name.endsWith(".jsonl")).map((entry) => join(dirPath, entry.name));
|
|
1263
|
+
const dbErrors = await this.validateWithDatabase(dirPath, allJsonlFiles);
|
|
1264
|
+
errors.push(...dbErrors.filter((e) => e.file === filePath));
|
|
1265
|
+
}
|
|
1150
1266
|
return {
|
|
1151
1267
|
valid: errors.length === 0,
|
|
1152
1268
|
errors,
|