@toiroakr/lines-db 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/bin/cli.js +136 -30
- package/dist/index.cjs +136 -30
- package/dist/index.d.cts +22 -2
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.ts +22 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +136 -30
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/validator.test.ts +140 -0
- package/src/validator.ts +259 -42
package/dist/index.js
CHANGED
|
@@ -1085,37 +1085,48 @@ var Validator = class {
|
|
|
1085
1085
|
};
|
|
1086
1086
|
}
|
|
1087
1087
|
/**
|
|
1088
|
-
* Validate by loading data into
|
|
1089
|
-
* This catches constraint violations
|
|
1088
|
+
* Validate by loading data into database one row at a time
|
|
1089
|
+
* This catches constraint violations and extracts detailed error information
|
|
1090
1090
|
*/
|
|
1091
1091
|
async validateWithDatabase(dirPath, jsonlFiles) {
|
|
1092
1092
|
const errors = [];
|
|
1093
|
-
const warnMessages = [];
|
|
1094
|
-
const originalWarn = console.warn;
|
|
1095
|
-
console.warn = (...args) => {
|
|
1096
|
-
const message = args.map((arg) => String(arg)).join(" ");
|
|
1097
|
-
warnMessages.push(message);
|
|
1098
|
-
originalWarn(...args);
|
|
1099
|
-
};
|
|
1100
1093
|
try {
|
|
1101
|
-
const db = LinesDB.create({ dataDir:
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
})
|
|
1094
|
+
const db = LinesDB.create({ dataDir: ":memory:" });
|
|
1095
|
+
for (const file of jsonlFiles) {
|
|
1096
|
+
const tableName = basename(file, ".jsonl");
|
|
1097
|
+
const data = await JsonlReader.read(file);
|
|
1098
|
+
let schema;
|
|
1099
|
+
let foreignKeys = [];
|
|
1100
|
+
let indexes = [];
|
|
1101
|
+
let primaryKey;
|
|
1102
|
+
try {
|
|
1103
|
+
schema = await SchemaLoader.loadSchema(file);
|
|
1104
|
+
const { pathToFileURL: pathToFileURL$1 } = await import("node:url");
|
|
1105
|
+
const schemaModule = await import(`${pathToFileURL$1(file.replace(".jsonl", ".schema.ts")).href}?t=${Date.now()}`);
|
|
1106
|
+
const schemaExport = schemaModule.schema || schemaModule.default;
|
|
1107
|
+
if (schemaExport?.foreignKeys) foreignKeys = schemaExport.foreignKeys;
|
|
1108
|
+
if (schemaExport?.indexes) indexes = schemaExport.indexes;
|
|
1109
|
+
if (schemaExport?.primaryKey) primaryKey = schemaExport.primaryKey;
|
|
1110
|
+
} catch (_error) {
|
|
1111
|
+
continue;
|
|
1112
|
+
}
|
|
1113
|
+
try {
|
|
1114
|
+
const tableSchema = this.createTableSchema(tableName, data, schema, foreignKeys, indexes, primaryKey);
|
|
1115
|
+
this.createTableInDb(db, tableSchema);
|
|
1116
|
+
for (let rowIndex = 0; rowIndex < data.length; rowIndex++) {
|
|
1117
|
+
const row = data[rowIndex];
|
|
1118
|
+
try {
|
|
1119
|
+
this.insertRowIntoDb(db, tableName, tableSchema, row);
|
|
1120
|
+
} catch (error) {
|
|
1121
|
+
const constraintError = this.analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys, db);
|
|
1122
|
+
if (constraintError) errors.push(constraintError);
|
|
1123
|
+
}
|
|
1124
|
+
}
|
|
1125
|
+
} catch (_error) {
|
|
1126
|
+
continue;
|
|
1127
|
+
}
|
|
1118
1128
|
}
|
|
1129
|
+
await db.close();
|
|
1119
1130
|
} catch (error) {
|
|
1120
1131
|
errors.push({
|
|
1121
1132
|
file: dirPath,
|
|
@@ -1127,12 +1138,106 @@ var Validator = class {
|
|
|
1127
1138
|
}],
|
|
1128
1139
|
type: "schema"
|
|
1129
1140
|
});
|
|
1130
|
-
} finally {
|
|
1131
|
-
console.warn = originalWarn;
|
|
1132
1141
|
}
|
|
1133
1142
|
return errors;
|
|
1134
1143
|
}
|
|
1135
1144
|
/**
|
|
1145
|
+
* Create table schema from data and validation schema
|
|
1146
|
+
*/
|
|
1147
|
+
createTableSchema(tableName, data, validationSchema, foreignKeys, indexes, primaryKey) {
|
|
1148
|
+
if (data.length === 0) throw new Error(`No data found in ${tableName}`);
|
|
1149
|
+
const schema = JsonlReader.inferSchema(tableName, data);
|
|
1150
|
+
if (primaryKey) {
|
|
1151
|
+
const pkColumn = schema.columns.find((col) => col.name === primaryKey);
|
|
1152
|
+
if (pkColumn) pkColumn.primaryKey = true;
|
|
1153
|
+
} else if (!schema.columns.some((col) => col.primaryKey)) {
|
|
1154
|
+
const idColumn = schema.columns.find((c) => c.name === "id");
|
|
1155
|
+
if (idColumn) idColumn.primaryKey = true;
|
|
1156
|
+
}
|
|
1157
|
+
if (foreignKeys && foreignKeys.length > 0) schema.foreignKeys = foreignKeys;
|
|
1158
|
+
if (indexes && indexes.length > 0) schema.indexes = indexes;
|
|
1159
|
+
return schema;
|
|
1160
|
+
}
|
|
1161
|
+
/**
|
|
1162
|
+
* Create table in database
|
|
1163
|
+
*/
|
|
1164
|
+
createTableInDb(db, schema) {
|
|
1165
|
+
const columns = schema.columns.map((col) => {
|
|
1166
|
+
let colDef = `${this.quoteIdentifier(col.name)} ${col.type.toUpperCase()}`;
|
|
1167
|
+
if (col.primaryKey) colDef += " PRIMARY KEY";
|
|
1168
|
+
return colDef;
|
|
1169
|
+
});
|
|
1170
|
+
if (schema.foreignKeys && schema.foreignKeys.length > 0) for (const fk of schema.foreignKeys) columns.push(`FOREIGN KEY (${this.quoteIdentifier(fk.column)}) REFERENCES ${this.quoteIdentifier(fk.references.table)}(${this.quoteIdentifier(fk.references.column)})`);
|
|
1171
|
+
const sql = `CREATE TABLE IF NOT EXISTS ${this.quoteIdentifier(schema.name)} (${columns.join(", ")})`;
|
|
1172
|
+
db.execute(sql);
|
|
1173
|
+
if (schema.indexes && schema.indexes.length > 0) for (const index of schema.indexes) {
|
|
1174
|
+
const indexName = index.name || `idx_${schema.name}_${index.columns.join("_")}`;
|
|
1175
|
+
const uniqueKeyword = index.unique ? "UNIQUE" : "";
|
|
1176
|
+
const indexColumns = index.columns.map((col) => this.quoteIdentifier(col)).join(", ");
|
|
1177
|
+
const indexSql = `CREATE ${uniqueKeyword} INDEX IF NOT EXISTS ${this.quoteIdentifier(indexName)} ON ${this.quoteIdentifier(schema.name)} (${indexColumns})`;
|
|
1178
|
+
db.execute(indexSql);
|
|
1179
|
+
}
|
|
1180
|
+
}
|
|
1181
|
+
/**
|
|
1182
|
+
* Insert a row into database
|
|
1183
|
+
*/
|
|
1184
|
+
insertRowIntoDb(db, tableName, schema, row) {
|
|
1185
|
+
const columnNames = schema.columns.map((col) => col.name);
|
|
1186
|
+
const quotedColumns = columnNames.map((name) => this.quoteIdentifier(name));
|
|
1187
|
+
const placeholders = columnNames.map(() => "?").join(", ");
|
|
1188
|
+
const sql = `INSERT INTO ${this.quoteIdentifier(tableName)} (${quotedColumns.join(", ")}) VALUES (${placeholders})`;
|
|
1189
|
+
const values = columnNames.map((col) => {
|
|
1190
|
+
const value = row[col];
|
|
1191
|
+
if (value === null || value === void 0) return null;
|
|
1192
|
+
if (typeof value === "object") return JSON.stringify(value);
|
|
1193
|
+
if (typeof value === "boolean") return value ? 1 : 0;
|
|
1194
|
+
return value;
|
|
1195
|
+
});
|
|
1196
|
+
db.execute(sql, values);
|
|
1197
|
+
}
|
|
1198
|
+
/**
|
|
1199
|
+
* Analyze constraint error and extract detailed information
|
|
1200
|
+
*/
|
|
1201
|
+
analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys, db) {
|
|
1202
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
1203
|
+
if (errorMessage.includes("FOREIGN KEY constraint failed")) for (const fk of foreignKeys) {
|
|
1204
|
+
const fkValue = row[fk.column];
|
|
1205
|
+
if (fkValue === null || fkValue === void 0) continue;
|
|
1206
|
+
try {
|
|
1207
|
+
const result = db.query(`SELECT COUNT(*) as count FROM ${this.quoteIdentifier(fk.references.table)} WHERE ${this.quoteIdentifier(fk.references.column)} = ?`, [fkValue]);
|
|
1208
|
+
if (result.length > 0 && result[0].count === 0) return {
|
|
1209
|
+
file,
|
|
1210
|
+
tableName,
|
|
1211
|
+
rowIndex,
|
|
1212
|
+
issues: [],
|
|
1213
|
+
type: "foreignKey",
|
|
1214
|
+
foreignKeyError: {
|
|
1215
|
+
column: fk.column,
|
|
1216
|
+
value: fkValue,
|
|
1217
|
+
referencedTable: fk.references.table,
|
|
1218
|
+
referencedColumn: fk.references.column
|
|
1219
|
+
}
|
|
1220
|
+
};
|
|
1221
|
+
} catch (_) {}
|
|
1222
|
+
}
|
|
1223
|
+
return {
|
|
1224
|
+
file,
|
|
1225
|
+
tableName,
|
|
1226
|
+
rowIndex,
|
|
1227
|
+
issues: [{
|
|
1228
|
+
message: errorMessage,
|
|
1229
|
+
path: []
|
|
1230
|
+
}],
|
|
1231
|
+
type: "schema"
|
|
1232
|
+
};
|
|
1233
|
+
}
|
|
1234
|
+
/**
|
|
1235
|
+
* Quote SQL identifier
|
|
1236
|
+
*/
|
|
1237
|
+
quoteIdentifier(name) {
|
|
1238
|
+
return `"${name.replace(/"/g, "\"\"")}"`;
|
|
1239
|
+
}
|
|
1240
|
+
/**
|
|
1136
1241
|
* Validate a single JSONL file
|
|
1137
1242
|
*/
|
|
1138
1243
|
async validateFile(filePath) {
|
|
@@ -1154,8 +1259,9 @@ var Validator = class {
|
|
|
1154
1259
|
}
|
|
1155
1260
|
if (errors.length === 0) {
|
|
1156
1261
|
const dirPath = dirname(filePath);
|
|
1157
|
-
const
|
|
1158
|
-
|
|
1262
|
+
const allJsonlFiles = (await readdir(dirPath, { withFileTypes: true })).filter((entry) => entry.isFile() && entry.name.endsWith(".jsonl")).map((entry) => join(dirPath, entry.name));
|
|
1263
|
+
const dbErrors = await this.validateWithDatabase(dirPath, allJsonlFiles);
|
|
1264
|
+
errors.push(...dbErrors.filter((e) => e.file === filePath));
|
|
1159
1265
|
}
|
|
1160
1266
|
return {
|
|
1161
1267
|
valid: errors.length === 0,
|